ToB企服应用市场:ToB评测及商务社交产业平台
标题:
HiveSQL经典面试题(建议点赞收藏)
[打印本页]
作者:
慢吞云雾缓吐愁
时间:
2024-6-19 05:01
标题:
HiveSQL经典面试题(建议点赞收藏)
目录
经典面试系列
每科成绩都大于80分的学生信息
连续登录问题
行列转换问题
留存问题:看当天登录后第N天是否登录
TopN问题
累计计算问题
HSQL进阶版
直播间在线最大人数
SQL循环
计算中位数
产生连续数值
经典面试系列
每科成绩都大于80分的学生信息
建表+初始化
--创建表scdn_student_score_test 并且初始化三个学生成绩
create table hdw_tmp_dev.scdn_student_score_test as
select '张三' as name,'数学' as subject ,'80' as score
union all
select '张三' as name,'语文' as subject ,'90' as score
union all
select '张三' as name,'英语' as subject ,'90' as score
union all
select '李四' as name,'数学' as subject ,'90' as score
union all
select '李四' as name,'语文' as subject ,'90' as score
union all
select '李四' as name,'英语' as subject ,'70' as score
union all
select '王五' as name,'数学' as subject ,'90' as score
union all
select '王五' as name,'语文' as subject ,'90' as score
union all
select '王五' as name,'英语' as subject ,'50' as score
--查询结果显示
| scdn_student_score_test.name | scdn_student_score_test.subject | scdn_student_score_test.score |
+-------------------------------+----------------------------------+--------------------------------+
| 张三 | 数学 | 80 |
| 张三 | 语文 | 90 |
| 张三 | 英语 | 90 |
| 李四 | 数学 | 90 |
| 李四 | 语文 | 90 |
| 李四 | 英语 | 70 |
| 王五 | 数学 | 90 |
| 王五 | 语文 | 90 |
| 王五 | 英语 | 50 |
+-------------------------------+----------------------------------+---------------------
--求所有学科成绩都大于等于80分的学生姓名
复制代码
思路一(头脑转换):所有问题找最小(最大)
select
t1.name
,t1.min_score
from
(
select
name
,min(score) as min_score
from hdw_tmp_dev.scdn_student_score_test
group by name
) as t1 --求出最小的成绩
where t1.min_score >=80 --最小成绩大于等于80,则这个学生的所有成绩都会大于等于80
复制代码
执行效果
思路二(巧用左关联举行筛选):
select
t1.name
from
(
select
name
from hdw_tmp_dev.scdn_student_score_test
group by name
) as t1
left join
(
select
name
from hdw_tmp_dev.scdn_student_score_test
where score <80
group by name
) as t2 on t1.name = t2.name
where t2.name is null
复制代码
连续登录问题
建表+初始化
create table hdw_tmp_dev.csdn_user_login_test as
select 'xiaoming' as user_name,'2024-01-01' as login_date
union all
select 'xiaoming' as user_name,'2024-01-02' as login_date
union all
select 'xiaoming' as user_name,'2024-01-03' as login_date
union all
select 'xiaoming' as user_name,'2024-01-04' as login_date
union all
select 'xiaoming' as user_name,'2024-01-05' as login_date
union all
select 'dahuang' as user_name,'2024-01-02' as login_date
union all
select 'dahuang' as user_name,'2024-01-03' as login_date
union all
select 'dahuang' as user_name,'2024-01-04' as login_date
union all
select 'dahuang' as user_name,'2024-01-05' as login_date
union all
select 'lucky_dog' as user_name,'2024-01-01' as login_date
union all
select 'lucky_dog' as user_name,'2024-01-03' as login_date
union all
select 'lucky_dog' as user_name,'2024-01-04' as login_date
union all
select 'lucky_dog' as user_name,'2024-01-05' as login_date
复制代码
Sql参考
select
t2.user_name
,t2.date_begin_flag
,count(1) as max_login_days
from
(
select
t1.user_name
,t1.login_date
,date_sub(date(login_date),t1.rn) as date_begin_flag
from
(
select
user_name
,login_date
,row_number()over(partition by user_name order by login_date) as rn
from hdw_tmp_dev.csdn_user_login_test
) as t1
) as t2
group by t2.user_name
,t2.date_begin_flag
复制代码
执行过程+效果
行列转换问题
建表+初始化:参考问题一的表
| t1.name | t1.subject | t1.score |
+-------------------------------+----------------------------------+-----------------+
| 张三 | 数学 | 80 |
| 张三 | 语文 | 90 |
| 张三 | 英语 | 90 |
| 李四 | 数学 | 90 |
| 李四 | 语文 | 90 |
| 李四 | 英语 | 70 |
| 王五 | 数学 | 90 |
| 王五 | 语文 | 90 |
| 王五 | 英语 | 50 |
+-------------------------------+----------------------------------+------------------
复制代码
行专列Sql参考
select
name
,max(case when subject = '数学' then score end) as math_score
,max(case when subject = '语文' then score end) as china_score
,max(case when subject = '英语' then score end) as english_score
from hdw_tmp_dev.scdn_student_score_test
group by name
复制代码
行专列执行效果
列转行:可以参考建表初始化语句(union all)
select
name
,collect_set(subject) as subject_set
from hdw_tmp_dev.scdn_student_score_test
group by name
复制代码
每个学生选课效果(多行变一行):
将上面的效果睁开(一行变多行)数据准备:
create table hdw_tmp_dev.scdn_student_score_test_collect as
select 'zhangsan' as name ,'"数学","语文","英语"' as subject_list
union all
select 'lisi' as name ,'"美术","生物","物理"' as subject_list
union all
select 'wangwu' as name ,'"计算机","日语","韩语"' as subject_list
复制代码
炸开代码
select
name
,subject_list
,subject_name
from hdw_tmp_dev.scdn_student_score_test_collect
lateral view explode(split(subject_list,',')) extend_sub as subject_name
复制代码
效果
留存问题:看当天登录后第N天是否登录
建表+初始化:参照连续登录表
Sql参考
--方案一:利用lead(日期,N)是否等于 当天登录实践+N天
select
t1.user_name
,t1.logon_date
,case when lead1_logon_date = date_add(logon_date,1) then '1天留存' end as 1day_remain
,case when lead3_logon_date = date_add(logon_date,3) then '3天留存' end as 3day_remain
from
(
select
user_name
,logon_date
,lead(user_name,1)over(partition by user_name order by logon_date) as lead1_user_name
,lead(logon_date,1)over(partition by user_name order by logon_date) as lead1_logon_date
,lead(user_name,3)over(partition by user_name order by logon_date) as lead3_user_name
,lead(logon_date,3)over(partition by user_name order by logon_date) as lead3_logon_date
from hdw_tmp_dev.csdn_user_logon_test
) as t1
--方案二:
select
t2.first_log_date as first_log_date
,count(t2.user_id) as new_user_cnt --新用户数
,count(t3.user_id) as next_user_id --次日回访用户数
,count(t4.user_id) as 30_user_id --30天回访用户数
,count(t3.user_id)/count(t2.user_id) as next_back_rate --次日回访率
,count(t4.user_id)/count(t2.user_id) as 30_back_rate --30天回访率
from
(
select
first_log_date
,user_id
,date_add(first_log_date,1) as next_log_date
,date_add(first_log_date,29) as 30_log_date
from
(
select
user_id
,log_time
,first_value(date(log_time))over(partition by user_id) as first_log_date
from user_log
) as t1
group by first_log_date
,user_id
) as t2
left join
(
select
user_id
,date(log_date) as log_date
from user_log
group by user_id
,date(log_date) as log_date
) as t3 on t2.user_id = t2.user_id and t2.next_log_date = t3.log_date
left join
(
select
user_id
,date(log_date) as log_date
from user_log
group by user_id
,date(log_date) as log_date
) as t4 on t2.user_id = t4.user_id and t2.30_log_date = t4.log_date
group by t2.first_log_date
复制代码
方案一执行效果
TopN问题
建表+初始化
create table hdw_tmp_dev.scdn_student_score_test1 as
select '张三' as name,'数学' as subject ,'80' as score
union all
select '张三' as name,'语文' as subject ,'90' as score
union all
select '张三' as name,'英语' as subject ,'90' as score
union all
select '李四' as name,'数学' as subject ,'90' as score
union all
select '李四' as name,'语文' as subject ,'90' as score
union all
select '李四' as name,'英语' as subject ,'70' as score
union all
select '王五' as name,'数学' as subject ,'90' as score
union all
select '王五' as name,'语文' as subject ,'90' as score
union all
select '王五' as name,'英语' as subject ,'50' as score
union all
select '小明' as name,'数学' as subject ,'88' as score
union all
select '小明' as name,'语文' as subject ,'99' as score
union all
select '小明' as name,'英语' as subject ,'77' as score
union all
select '小文' as name,'数学' as subject ,'66' as score
union all
select '小文' as name,'语文' as subject ,'89' as score
union all
select '小文' as name,'英语' as subject ,'90' as score
复制代码
Sql参考:求每科前三名对应的人员的成绩单
select
*
from
(
select
name
,subject
,score
,row_number()over(partition by subject order by score desc) as rn
from hdw_tmp_dev.scdn_student_score_test1
) as t1
where rn<=3
复制代码
执行效果
累计计算问题
建表+初始化
create table hdw_tmp_dev.user_sale_date as
select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
union all
select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
union all
select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
union all
select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
union all
select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
union all
select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
复制代码
SQL逻辑
select
user_id
,sale_date
,amount
,sum(amount)over(partition by user_id order by sale_date) as accuma_amount --按日期逐渐累加
,sum(amount)over(partition by user_id) as total_amount --按人汇总
,avg(amount)over(partition by user_id) as avg_amount --安人平均每天
,max(amount)over(partition by user_id) as max_amount --单日最大销售
,min(amount)over(partition by user_id) as min_amount --单日最小销售
from hdw_tmp_dev.user_sale_date
复制代码
效果展示
HSQL进阶版
直播间在线最大人数
建表+初始化
create table hdw_tmp_dev.csdn_user_login_time_detail as
select '001' as user_id,'2024-02-01 10:00:00' as begin_time,'2024-02-01 12:00:00' as end_time
union all
select '002' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 13:00:00' as end_time
union all
select '003' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 14:00:00' as end_time
union all
select '004' as user_id,'2024-02-01 15:00:00' as begin_time,'2024-02-01 16:00:00' as end_time
复制代码
SQL逻辑
select
t1.user_id
,t1.time1
,t1.flag
,sum(flag)over(order by t1.time1) as user_cnt
from
(
select
user_id
,begin_time as time1
,1 as flag
from hdw_tmp_dev.csdn_user_login_time_detail
union all
select
user_id
,end_time
,-1 as flag
from hdw_tmp_dev.csdn_user_login_time_detail
) as t1
复制代码
效果展示
SQL循环
建表+初始化
create table hdw_tmp_dev.cycle_1 as
select '1011' as a
union all
select '0101' as a
复制代码
SQL逻辑
select
a,
concat_ws(",",collect_list(cast(index as string))) as res
from (
select
a,
index+1 as index,
chr
from (
select
a,
concat_ws(",",substr(a,1,1),substr(a,2,1),substr(a,3,1),substr(a,-1)) str
from hdw_tmp_dev.cycle_1 as t8
) tmp1
lateral view posexplode(split(str,",")) t as index,chr
where chr = "1"
) tmp2
group by a;
复制代码
效果展示
计算中位数
建表+初始化:
create table hdw_tmp_dev.user_sale_date as
select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
union all
select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
union all
select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
union all
select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
union all
select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
union all
select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
复制代码
SQL逻辑
select
t1.user_id
,t1.sale_date
,t1.amount
,avg(t1.amount)over(partition by t1.user_id) as zhongwenshu
from
(
select
user_id
,sale_date
,amount
,row_number()over(partition by user_id order by amount) as rn
from hdw_tmp_dev.user_sale_date
) as t1
left join
(
select
user_id
,count(1) as cnt
from hdw_tmp_dev.user_sale_date
group by user_id
) as t2 on t1.user_id = t2.user_id
where t1.rn in (cnt/2,(cnt+1)/2,cnt/2+1)
--总个数为奇数命中(cnt+1)/2;总个数为偶数命中:cnt/2,cnt/2+1,两数相加求平均值
复制代码
效果展示
产生连续数值
SQL逻辑
--产生1到10的连续数据
select
start_id + pos as id
,pos
,val
from(
select
1 as start_id,
10 as end_id
) m lateral view posexplode(split(space(end_id - start_id),'')) t as pos, val
--方案二
select
row_number() over() as id
from
(select split(space(99), '') as x) t
lateral view
explode(x) ex;
复制代码
效果展示
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
欢迎光临 ToB企服应用市场:ToB评测及商务社交产业平台 (https://dis.qidao123.com/)
Powered by Discuz! X3.4