其他部分答案在B站工房 https://gf.bilibili.com/item/detail/1105242061
Hive综合应用案例——用户学历查询
1 查询每一个用户从出生到现在的总天数
- ---------- 禁止修改 ----------
- drop database if exists mydb cascade;
- ---------- 禁止修改 ----------
- ---------- begin ----------
- ---创建mydb数据库
- create database if not exists mydb;
- ---使用mydb数据库
- use mydb;
- ---创建表user
- create table usertab(
- id string,
- sex string,
- time string,
- education string,
- occupation string,
- income string,
- area string,
- desired_area string,
- city_countryside string
- )
- row format delimited fields terminated by ',';
- ---导入数据:/root/data.txt
- load data local inpath '/root/data.txt' into table usertab;
- --查询每一个用户从出生到2019-06-10的总天数
- select id, datediff('2019-06-10',regexp_replace(time, '/', '-')) from usertab;
复制代码 2 同一个地区雷同的教育程度的最高收入
- ---------- 禁止修改 ----------
- drop database if exists mydb cascade;
- ---------- 禁止修改 ----------
- ---------- begin ----------
- --创建mydb数据库
- create database if not exists mydb;
- ---使用mydb数据库
- use mydb;
- ---创建表user
- create table usertab1(
- id int,
- sex string,
- time string,
- education string,
- occupation string,
- income string,
- area string,
- desired_area string,
- city_countryside string
- )
- row format delimited fields terminated by ',';
- ---导入数据:/root/data.txt
- load data local inpath '/root/data1.txt' into table usertab1;
- --同一个地区相同的教育程度的最高收入
- select area,education,income
- from(
- select area,education,income,
- row_number() over(
- partition by area, education order by income desc
- ) as t1
- from usertab1
- ) as t2
- where t2.t1 = 1;
- ---------- end ----------
复制代码 3 统计各级学历所占总人数百分比
- ---------- 禁止修改 ----------
- drop database if exists mydb cascade;
- set hive.mapred.mode=nonstrict;
- ---------- 禁止修改 ----------
- ---------- begin ----------
- --创建mydb数据库
- create database if not exists mydb;
- ---使用mydb数据库
- use mydb;
- ---创建表user
- create table usertab2(
- id int,
- sex string,
- time string,
- education string,
- occupation string,
- income string,
- area string,
- desired_area string,
- city_countryside string
- )
- row format delimited fields terminated by ',';
- ---导入数据:/root/data.txt
- load data local inpath '/root/data.txt' into table usertab2;
- --统计各级学历所占总人数百分比(对结果保留两位小数)
- select concat(round(t1.cnted * 100 / t2.cnt, 2),'%'), t1.education
- from
- (
- select count(*) as cnted,education
- from usertab2
- group by education
- ) as t1,
- (
- select count(*) as cnt from usertab2
- ) as t2
- order by t1.education;
- ---------- end ----------
复制代码 免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |