二百四十五、海豚调治器——用DolphinScheduler调治实行复杂的HiveSQL(HQL包罗多种海豚无法精确辨认的符号)
一、目标在Hive中完成复杂JSON,既有对象尚有数组而且数组中包罗数组的分析后,本来以为没啥题目了,效果在DolphinScheduler中调治又出现了大题目,搞了一天、试了很多种方法、死了无数脑细胞,才办理了这个题目!
二、HiveSQL
insertoverwritetablehurys_dc_dwd.dwd_json_statistics partition(day)
select
t1.device_no,
source_device_type,
sn,
model,
create_time,
cycle,
get_json_object(coil_list,'$.laneNo')lane_no,
get_json_object(coil_list,'$.laneType') lane_type,
section_no,
get_json_object(coil_list,'$.coilNo') coil_no,
get_json_object(coil_list,'$.volumeSum') volume_sum,
get_json_object(coil_list,'$.volumePerson') volume_person,
get_json_object(coil_list,'$.volumeCarNon') volume_car_non,
get_json_object(coil_list,'$.volumeCarSmall') volume_car_small,
get_json_object(coil_list,'$.volumeCarMiddle') volume_car_middle,
get_json_object(coil_list,'$.volumeCarBig') volume_car_big,
get_json_object(coil_list,'$.speedAvg') speed_avg,
get_json_object(coil_list,'$.speed85') speed_85,
get_json_object(coil_list,'$.timeOccupancy') time_occupancy,
get_json_object(coil_list,'$.averageHeadway') average_headway,
get_json_object(coil_list,'$.averageGap') average_gap,
substr(create_time,1,10) day
from (select
get_json_object(statistics_json,'$.deviceNo') device_no,
get_json_object(statistics_json,'$.sourceDeviceType')source_device_type,
get_json_object(statistics_json,'$.sn') sn,
get_json_object(statistics_json,'$.model') model,
get_json_object(statistics_json,'$.createTime') create_time ,
get_json_object(statistics_json,'$.data.cycle') cycle,
get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.sectionNo') section_no,
section_list
from hurys_dc_ods.ods_statistics
lateral view explode(split(replace(replace(replace(get_json_object(statistics_json,'$.data.sectionList'),
'[',''),']',''),'},{"sectionNo"','}|{"sectionNo"'),"\\|")) tf as section_list
where day='2024-07-18' --date_sub(current_date(), 1) -- '2024-07-18' --
) as t1
lateral view
页:
[1]