insert overwrite table clear_data_2023 select station as station, `date` as `date`, split(`name`, ',')[1] as area, split(`date`, '-')[0] as `year`, split(`date`, '-')[1] as `month`, split(`date`, '-')[2] as `day`, c2f(temp) as temp, c2f(`max`) as temp_max, c2f(`min`) as temp_min, if(abs(999.9-wdsp) > 0.1, wdsp, null) as wind_speed, if(abs(99.9-prcp) > 0.1, prcp, null) as rainfall from data_2023
create table day_country_temp comment '国家每天统计表' row format delimited fields terminated by '\t' as
select area, `date`, avg(temp) as temp, max(temp_max) as temp_max, min(temp_min) as temp_min from clear_data_2023 group by area, `date`;
-- 每个站点每月的平均气温,最高,最低温度,平均风速,最大,最小风速
create table day_station_monthly_data comment '每个站点每个月的数据' row format delimited fields terminated by '\t' as
select station, area , `month`, avg(temp) as temp, max(temp_max) as temp_max, min(temp_min) as temp_min, avg(wind_speed) as avg_wind_speed, min(wind_speed) as min_wind_speed, max(wind_speed) as max_wind_speed, avg(rainfall) as avg_rainfall, max(rainfall) as max_rainfall, min(rainfall) as min_rainfall from clear_data_2023 group by station, area, `month`;
-- 每个国家每月的平均降水量
create table monthly_area_rainfall row format delimited fields terminated by '\t' as
select area, `month`, avg(avg_rainfall) as avg_rainfall from day_station_monthly_data group by area, `month`;
-- 每个国家最大降水量的月份及降水量
create table area_max_rainfall_data row format delimited fields terminated by '\t' as
select t.area, a.month, t.max_rainfall from monthly_area_rainfall a join
(select area, max(avg_rainfall) as max_rainfall from monthly_area_rainfall group by area) t on a.area = t.area and a.avg_rainfall = t.max_rainfall;
复制代码
4、数据传输
我这是将数据从hive导入到mysql
4.1 mysql初始化
先创建好mysql的库 my_test ;
-- 先创建好数据库my_test
use my_test;
create table if not exists `day_country_temp` ( area varchar(20), `date` varchar(20), temp decimal(10,2), temp_max decimal(10,2), temp_min decimal(10,2)) comment '国家每天统计表';
create table if not exists `day_station_monthly_data` ( station varchar(20), area varchar(20), `month` int(4), temp decimal(10,2), temp_max decimal(10,2), temp_min decimal(10,2), avg_wind_speed decimal(10,2), min_wind_speed decimal(10,2), max_wind_speed decimal(10,2), avg_rainfall decimal(10,2), max_rainfall decimal(10,2), min_rainfall decimal(10,2)) comment '每个站点每个月的数据';
create table if not exists `monthly_area_rainfall` ( area varchar(20), `month` int(4), avg_rainfall decimal(10,2)) comment '每个国家每月的平均降水量';
create table if not exists `area_max_rainfall_data` ( area varchar(20), `month` int(4), max_rainfall decimal(10,2)) comment '每个国家最大降水量的月份及降水量';