背景
公司的中台产品,需要对外部API接口返回的JSON数据进行采集入湖,有时候外部API接口返回的JSON数据层级嵌套比较深,举个栗子:
上述的JSON数据中,最外层为请求返回对象,data里面包含返回的业务数据,业务数据按照学校 / 班级 / 学生进行嵌套
在数据入湖时,需要按照最内层的学生视角将数据拆分为行列数据,最终的拆分结果如下:
由于对接的外部API接口返回的JSON数据结构不是统一的、固定的,所以需要通过一种算法对每一层对象、数组进行遍历和钻取,实现JSON数据的扁平化
网上找了一些JSON扁平化的中间件,例如:Json2Flat在扁平化处理过程不太完美,不支持跨层级的数组嵌套结构
所以决定自己实现扁平化处理
关键代码如下:
public class LinkedNode { private LinkedNode parent; private String parentName; private Map<String, Object> data; public LinkedNode(LinkedNode parent, String parentName, Map<String, Object> data) { this.parent = parent; this.parentName = parentName; this.data = data; } }
public class JSONFlatProcessor { private LinkedList<LinkedNode> nodes; private LinkedList<String> column; private List<Object[]> data; public void find(LinkedNode parent, String parentName, Map<String, Object> data) { LinkedNode node = new LinkedNode(parent, parentName, data); if (!hasObjectOrArray(data)) { nodes.add(node); } else { for (Map.Entry entry : data.entrySet()) { if (entry.getValue() instanceof Map) { find(node, String.valueOf(entry.getKey()), (Map<String, Object>) entry.getValue()); } else if (isObjectArray(entry.getValue())) { find(node, String.valueOf(entry.getKey()), (List<Map<String, Object>>) entry.getValue()); } } } } public void find(LinkedNode parent, String parentName, List<Map<String, Object>> data) { for (Map<String, Object> item : data) { find(parent, parentName, item); } } protected Boolean hasObjectOrArray(Map<String, Object> item) { Object field; for (Map.Entry entry : item.entrySet()) { field = entry.getValue(); if (field instanceof Map || isObjectArray(field)) { return Boolean.TRUE; } } return Boolean.FALSE; } protected Boolean isObjectArray(Object object) { return object instanceof List && !CollectionUtils.isEmpty((List) object) && ((List) object).get(0) instanceof Map; } public JSONFlatProcessor process(List<Map<String, Object>> data) { nodes = new LinkedList<>(); find(null, null, data); return this; } public JSONFlatProcessor process(Map<String, Object> data) { nodes = new LinkedList<>(); find(null, null, data); return this; } public LinkedList<LinkedNode> getNodes() { return nodes; } public List<String> getColumn() { if (CollectionUtils.isEmpty(nodes)) { return Collections.emptyList(); } column = new LinkedList<>(); collectColumn(nodes.getFirst()); return column; } protected void collectColumn(LinkedNode node) { List<String> innerColumn = new ArrayList<>(node.getData().size()); String columnBuilder; for (Map.Entry entry : node.getData().entrySet()) { if (!(entry.getValue() instanceof Map || isObjectArray(entry.getValue()))) { columnBuilder = null == node.getParentName()? String.valueOf(entry.getKey()) : String.format("%s.%s", node.getParentName(), entry.getKey()); innerColumn.add(columnBuilder); } } column.addAll(0, innerColumn); if (null != node.getParent()) { collectColumn(node.getParent()); } } public List<Object[]> getData() { if (CollectionUtils.isEmpty(nodes)) { return Collections.emptyList(); } data = new ArrayList<>(nodes.size()); LinkedList<Object> container; for (LinkedNode node : nodes) { container = new LinkedList<>(); collectData(node, container); data.add(container.toArray()); } return data; } protected void collectData(LinkedNode node, LinkedList<Object> container) { List<Object> innerData = new ArrayList<>(node.getData().size()); for (Map.Entry entry : node.getData().entrySet()) { if (!(entry.getValue() instanceof Map || isObjectArray(entry.getValue()))) { innerData.add(entry.getValue()); } } container.addAll(0, innerData); if (null != node.getParent()) { collectData(node.getParent(), container); } } protected static class CollectionUtils { public static boolean isEmpty(Collection<?> collection) { return (collection == null || collection.isEmpty()); } } }
public class MainTests {
public static void main(String[] args) throws Exception {
String jsonStr = "{\"code\":200,\"requestId\":\"1680177848458\",\"data\":[{\"school\":\"xxx市第一实验小学\",\"no\":\"1001\",\"class\":[{\"name\":\"一(1)班\",\"teacher\":\"吴老师\",\"student\":[{\"name\":\"张同学\",\"age\":6},{\"name\":\"王同学\",\"age\":7}]}]},{\"school\":\"xxx市第二实验小学\",\"no\":\"1002\",\"class\":[{\"name\":\"一(2)班\",\"teacher\":\"陈老师\",\"student\":[{\"name\":\"欧阳同学\",\"age\":6}]}]}]}";
ObjectMapper jsonMapper = new ObjectMapper();
// List<Map<String, Object>> map = jsonMapper.readValue(jsonStr, List.class);
Map<String, Object> map = jsonMapper.readValue(jsonStr, Map.class);
JSONFlatProcessor processor = new JSONFlatProcessor().process(map);
System.out.println("数据条数: " + processor.getNodes().size());
System.out.println("字段名: " + processor.getColumn());
System.out.println("首行数据: " + new ObjectMapper().writeValueAsString(processor.getData().get(0)));
}
}
数据条数: 3 字段名: [code, requestId, data.school, data.no, class.name, class.teacher, student.name, student.age] 首行数据: [200,"1680177848458","xxx市第一实验小学","1001","一(1)班","吴老师","张同学",6]