目录
经典面试系列
每科成绩都大于80分的学生信息
连续登录问题
行列转换问题
留存问题:看当天登录后第N天是否登录
TopN问题
累计计算问题
HSQL进阶版
直播间在线最大人数
SQL循环
计算中位数
产生连续数值
经典面试系列
- 每科成绩都大于80分的学生信息
- 建表+初始化
- --创建表scdn_student_score_test 并且初始化三个学生成绩
- create table hdw_tmp_dev.scdn_student_score_test as
- select '张三' as name,'数学' as subject ,'80' as score
- union all
- select '张三' as name,'语文' as subject ,'90' as score
- union all
- select '张三' as name,'英语' as subject ,'90' as score
- union all
- select '李四' as name,'数学' as subject ,'90' as score
- union all
- select '李四' as name,'语文' as subject ,'90' as score
- union all
- select '李四' as name,'英语' as subject ,'70' as score
- union all
- select '王五' as name,'数学' as subject ,'90' as score
- union all
- select '王五' as name,'语文' as subject ,'90' as score
- union all
- select '王五' as name,'英语' as subject ,'50' as score
- --查询结果显示
- | scdn_student_score_test.name | scdn_student_score_test.subject | scdn_student_score_test.score |
- +-------------------------------+----------------------------------+--------------------------------+
- | 张三 | 数学 | 80 |
- | 张三 | 语文 | 90 |
- | 张三 | 英语 | 90 |
- | 李四 | 数学 | 90 |
- | 李四 | 语文 | 90 |
- | 李四 | 英语 | 70 |
- | 王五 | 数学 | 90 |
- | 王五 | 语文 | 90 |
- | 王五 | 英语 | 50 |
- +-------------------------------+----------------------------------+---------------------
- --求所有学科成绩都大于等于80分的学生姓名
复制代码 - 思路一(头脑转换):所有问题找最小(最大)
- select
- t1.name
- ,t1.min_score
- from
- (
- select
- name
- ,min(score) as min_score
- from hdw_tmp_dev.scdn_student_score_test
- group by name
- ) as t1 --求出最小的成绩
- where t1.min_score >=80 --最小成绩大于等于80,则这个学生的所有成绩都会大于等于80
复制代码 - 执行效果
- 思路二(巧用左关联举行筛选):
- select
- t1.name
- from
- (
- select
- name
- from hdw_tmp_dev.scdn_student_score_test
- group by name
- ) as t1
- left join
- (
- select
- name
- from hdw_tmp_dev.scdn_student_score_test
- where score <80
- group by name
- ) as t2 on t1.name = t2.name
- where t2.name is null
复制代码
- 连续登录问题
- 建表+初始化
- create table hdw_tmp_dev.csdn_user_login_test as
- select 'xiaoming' as user_name,'2024-01-01' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-02' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-03' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-04' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-05' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-02' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-03' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-04' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-05' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-01' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-03' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-04' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-05' as login_date
复制代码 - Sql参考
- select
- t2.user_name
- ,t2.date_begin_flag
- ,count(1) as max_login_days
- from
- (
- select
- t1.user_name
- ,t1.login_date
- ,date_sub(date(login_date),t1.rn) as date_begin_flag
- from
- (
- select
- user_name
- ,login_date
- ,row_number()over(partition by user_name order by login_date) as rn
- from hdw_tmp_dev.csdn_user_login_test
- ) as t1
- ) as t2
- group by t2.user_name
- ,t2.date_begin_flag
复制代码 - 执行过程+效果
- 行列转换问题
- 建表+初始化:参考问题一的表
- | t1.name | t1.subject | t1.score |
- +-------------------------------+----------------------------------+-----------------+
- | 张三 | 数学 | 80 |
- | 张三 | 语文 | 90 |
- | 张三 | 英语 | 90 |
- | 李四 | 数学 | 90 |
- | 李四 | 语文 | 90 |
- | 李四 | 英语 | 70 |
- | 王五 | 数学 | 90 |
- | 王五 | 语文 | 90 |
- | 王五 | 英语 | 50 |
- +-------------------------------+----------------------------------+------------------
复制代码 - 行专列Sql参考
- select
- name
- ,max(case when subject = '数学' then score end) as math_score
- ,max(case when subject = '语文' then score end) as china_score
- ,max(case when subject = '英语' then score end) as english_score
- from hdw_tmp_dev.scdn_student_score_test
- group by name
复制代码 - 行专列执行效果
- 列转行:可以参考建表初始化语句(union all)
- select
- name
- ,collect_set(subject) as subject_set
- from hdw_tmp_dev.scdn_student_score_test
- group by name
复制代码 - 每个学生选课效果(多行变一行):
- 将上面的效果睁开(一行变多行)数据准备:
- create table hdw_tmp_dev.scdn_student_score_test_collect as
- select 'zhangsan' as name ,'"数学","语文","英语"' as subject_list
- union all
- select 'lisi' as name ,'"美术","生物","物理"' as subject_list
- union all
- select 'wangwu' as name ,'"计算机","日语","韩语"' as subject_list
复制代码 - 炸开代码
- select
- name
- ,subject_list
- ,subject_name
- from hdw_tmp_dev.scdn_student_score_test_collect
- lateral view explode(split(subject_list,',')) extend_sub as subject_name
复制代码 - 效果
- 留存问题:看当天登录后第N天是否登录
- 建表+初始化:参照连续登录表
- Sql参考
- --方案一:利用lead(日期,N)是否等于 当天登录实践+N天
- select
- t1.user_name
- ,t1.logon_date
- ,case when lead1_logon_date = date_add(logon_date,1) then '1天留存' end as 1day_remain
- ,case when lead3_logon_date = date_add(logon_date,3) then '3天留存' end as 3day_remain
- from
- (
- select
- user_name
- ,logon_date
- ,lead(user_name,1)over(partition by user_name order by logon_date) as lead1_user_name
- ,lead(logon_date,1)over(partition by user_name order by logon_date) as lead1_logon_date
- ,lead(user_name,3)over(partition by user_name order by logon_date) as lead3_user_name
- ,lead(logon_date,3)over(partition by user_name order by logon_date) as lead3_logon_date
- from hdw_tmp_dev.csdn_user_logon_test
- ) as t1
- --方案二:
- select
- t2.first_log_date as first_log_date
- ,count(t2.user_id) as new_user_cnt --新用户数
- ,count(t3.user_id) as next_user_id --次日回访用户数
- ,count(t4.user_id) as 30_user_id --30天回访用户数
- ,count(t3.user_id)/count(t2.user_id) as next_back_rate --次日回访率
- ,count(t4.user_id)/count(t2.user_id) as 30_back_rate --30天回访率
- from
- (
- select
- first_log_date
- ,user_id
- ,date_add(first_log_date,1) as next_log_date
- ,date_add(first_log_date,29) as 30_log_date
- from
- (
- select
- user_id
- ,log_time
- ,first_value(date(log_time))over(partition by user_id) as first_log_date
- from user_log
- ) as t1
- group by first_log_date
- ,user_id
- ) as t2
- left join
- (
- select
- user_id
- ,date(log_date) as log_date
- from user_log
- group by user_id
- ,date(log_date) as log_date
- ) as t3 on t2.user_id = t2.user_id and t2.next_log_date = t3.log_date
- left join
- (
- select
- user_id
- ,date(log_date) as log_date
- from user_log
- group by user_id
- ,date(log_date) as log_date
- ) as t4 on t2.user_id = t4.user_id and t2.30_log_date = t4.log_date
- group by t2.first_log_date
复制代码 - 方案一执行效果
- TopN问题
- 建表+初始化
- create table hdw_tmp_dev.scdn_student_score_test1 as
- select '张三' as name,'数学' as subject ,'80' as score
- union all
- select '张三' as name,'语文' as subject ,'90' as score
- union all
- select '张三' as name,'英语' as subject ,'90' as score
- union all
- select '李四' as name,'数学' as subject ,'90' as score
- union all
- select '李四' as name,'语文' as subject ,'90' as score
- union all
- select '李四' as name,'英语' as subject ,'70' as score
- union all
- select '王五' as name,'数学' as subject ,'90' as score
- union all
- select '王五' as name,'语文' as subject ,'90' as score
- union all
- select '王五' as name,'英语' as subject ,'50' as score
- union all
- select '小明' as name,'数学' as subject ,'88' as score
- union all
- select '小明' as name,'语文' as subject ,'99' as score
- union all
- select '小明' as name,'英语' as subject ,'77' as score
- union all
- select '小文' as name,'数学' as subject ,'66' as score
- union all
- select '小文' as name,'语文' as subject ,'89' as score
- union all
- select '小文' as name,'英语' as subject ,'90' as score
复制代码 - Sql参考:求每科前三名对应的人员的成绩单
- select
- *
- from
- (
- select
- name
- ,subject
- ,score
- ,row_number()over(partition by subject order by score desc) as rn
- from hdw_tmp_dev.scdn_student_score_test1
- ) as t1
- where rn<=3
复制代码 - 执行效果
- 累计计算问题
- 建表+初始化
- create table hdw_tmp_dev.user_sale_date as
- select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
- union all
- select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
- union all
- select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
- union all
- select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
- union all
- select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
- union all
- select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
复制代码 - SQL逻辑
- select
- user_id
- ,sale_date
- ,amount
- ,sum(amount)over(partition by user_id order by sale_date) as accuma_amount --按日期逐渐累加
- ,sum(amount)over(partition by user_id) as total_amount --按人汇总
- ,avg(amount)over(partition by user_id) as avg_amount --安人平均每天
- ,max(amount)over(partition by user_id) as max_amount --单日最大销售
- ,min(amount)over(partition by user_id) as min_amount --单日最小销售
- from hdw_tmp_dev.user_sale_date
复制代码 - 效果展示
HSQL进阶版
- 直播间在线最大人数
- 建表+初始化
- create table hdw_tmp_dev.csdn_user_login_time_detail as
- select '001' as user_id,'2024-02-01 10:00:00' as begin_time,'2024-02-01 12:00:00' as end_time
- union all
- select '002' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 13:00:00' as end_time
- union all
- select '003' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 14:00:00' as end_time
- union all
- select '004' as user_id,'2024-02-01 15:00:00' as begin_time,'2024-02-01 16:00:00' as end_time
复制代码 - SQL逻辑
- select
- t1.user_id
- ,t1.time1
- ,t1.flag
- ,sum(flag)over(order by t1.time1) as user_cnt
- from
- (
- select
- user_id
- ,begin_time as time1
- ,1 as flag
- from hdw_tmp_dev.csdn_user_login_time_detail
- union all
- select
- user_id
- ,end_time
- ,-1 as flag
- from hdw_tmp_dev.csdn_user_login_time_detail
- ) as t1
复制代码 - 效果展示
- SQL循环
- 建表+初始化
- create table hdw_tmp_dev.cycle_1 as
- select '1011' as a
- union all
- select '0101' as a
复制代码 - SQL逻辑
- select
- a,
- concat_ws(",",collect_list(cast(index as string))) as res
- from (
- select
- a,
- index+1 as index,
- chr
- from (
- select
- a,
- concat_ws(",",substr(a,1,1),substr(a,2,1),substr(a,3,1),substr(a,-1)) str
- from hdw_tmp_dev.cycle_1 as t8
- ) tmp1
- lateral view posexplode(split(str,",")) t as index,chr
- where chr = "1"
- ) tmp2
- group by a;
复制代码 - 效果展示
- 计算中位数
- 建表+初始化:
- create table hdw_tmp_dev.user_sale_date as
- select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
- union all
- select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
- union all
- select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
- union all
- select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
- union all
- select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
- union all
- select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
复制代码 - SQL逻辑
- select
- t1.user_id
- ,t1.sale_date
- ,t1.amount
- ,avg(t1.amount)over(partition by t1.user_id) as zhongwenshu
- from
- (
- select
- user_id
- ,sale_date
- ,amount
- ,row_number()over(partition by user_id order by amount) as rn
- from hdw_tmp_dev.user_sale_date
- ) as t1
- left join
- (
- select
- user_id
- ,count(1) as cnt
- from hdw_tmp_dev.user_sale_date
- group by user_id
- ) as t2 on t1.user_id = t2.user_id
- where t1.rn in (cnt/2,(cnt+1)/2,cnt/2+1)
- --总个数为奇数命中(cnt+1)/2;总个数为偶数命中:cnt/2,cnt/2+1,两数相加求平均值
复制代码 - 效果展示
- 产生连续数值
- SQL逻辑
- --产生1到10的连续数据
- select
- start_id + pos as id
- ,pos
- ,val
- from(
- select
- 1 as start_id,
- 10 as end_id
- ) m lateral view posexplode(split(space(end_id - start_id),'')) t as pos, val
- --方案二
- select
- row_number() over() as id
- from
- (select split(space(99), '') as x) t
- lateral view
- explode(x) ex;
复制代码 - 效果展示
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |