查询语法和内置函数
查询语法整体结构
- SELECT
- [ALL | DISTINCT | DISTINCTROW ] -- 对查询字段的结果是否需要去重,还是全部保留等参数
- select_expr [, select_expr ...] -- select的查询字段
- [FROM table_references
- [PARTITION partition_list] -- from 哪个库里面的那张表甚至哪一个(几个)分区
- [WHERE where_condition] -- WHERE 查询
- [GROUP BY {col_name | expr | position} -- group by 聚合
- [ASC | DESC], ... [WITH ROLLUP]]
- [HAVING where_condition] -- having 针对聚合函数的再一次过滤
- [ORDER BY {col_name | expr | position} -- 对结果数据按照字段进行排序
- [ASC | DESC], ...] -- 排序规则
- [LIMIT {[offset,] row_count | row_count OFFSET offset}] -- 限制输出多少行内容
- [INTO OUTFILE 'file_name'] -- 将查询的结果导出到文件中
复制代码 内置函数
条件函数
if
- if(boolean condition, type valueTrue, type valueFalseOrNull)
- --如果表达式 condition 成立,返回结果 valueTrue;否则,返回结果 valueFalseOrNull
- --返回值类型:valueTrue 表达式结果的类型
- 示例:
- SQL
- mysql> select user_id, if(user_id = 1, "true", "false") as test_if from test;
- +---------+---------+
- | user_id | test_if |
- +---------+---------+
- | 1 | true |
- | 2 | false |
- +---------+---------+
复制代码 ifnull,nvl,coalesce,nullif
- ifnull(expr1, expr2)
- --如果 expr1 的值不为 NULL 则返回 expr1,否则返回 expr2
- nvl(expr1, expr2)
- --如果 expr1 的值不为 NULL 则返回 expr1,否则返回 expr2
- coalesce(expr1, expr2, ...., expr_n))
- --返回参数中的第一个非空表达式(从左向右)
- nullif(expr1, expr2)
- -- 如果两个参数相等,则返回NULL。否则返回第一个参数的值
- mysql> select ifnull(1,0);
- +--------------+
- | ifnull(1, 0) |
- +--------------+
- | 1 |
- +--------------+
- mysql> select nvl(null,10);
- +------------------+
- | nvl(null,10) |
- +------------------+
- | 10 |
- +------------------+
- mysql> select coalesce(NULL, '1111', '0000');
- +--------------------------------+
- | coalesce(NULL, '1111', '0000') |
- +--------------------------------+
- | 1111 |
- +--------------------------------+
- mysql> select coalesce(NULL, NULL,NULL,'0000', NULL);
- +----------------------------------------+
- | coalesce(NULL, NULL,NULL,'0000', NULL) |
- +----------------------------------------+
- | 0000 |
- +----------------------------------------+
- mysql> select nullif(1,1);
- +--------------+
- | nullif(1, 1) |
- +--------------+
- | NULL |
- +--------------+
- mysql> select nullif(1,0);
- +--------------+
- | nullif(1, 0) |
- +--------------+
- | 1 |
- +--------------+
复制代码 case
- -- 方式一
- CASE expression
- WHEN condition1 THEN result1
- [WHEN condition2 THEN result2]
- ...
- [WHEN conditionN THEN resultN]
- [ELSE result]
- END
- -- 方式二
- CASE WHEN condition1 THEN result1
- [WHEN condition2 THEN result2]
- ...
- [WHEN conditionN THEN resultN]
- [ELSE result]
- END
- -- 将表达式和多个可能的值进行比较,当匹配时返回相应的结果
- mysql> select user_id,
- case user_id
- when 1 then 'user_id = 1'
- when 2 then 'user_id = 2'
- else 'user_id not exist'
- end as test_case
- from test;
- +---------+-------------+
- | user_id | test_case |
- +---------+-------------+
- | 1 | user_id = 1 |
- | 2 | user_id = 2 |
- | 3 | 'user_id not exist' |
- +---------+-------------+
-
- mysql> select user_id,
- case
- when user_id = 1 then 'user_id = 1'
- when user_id = 2 then 'user_id = 2'
- else 'user_id not exist'
- end as test_case
- from test;
- +---------+-------------+
- | user_id | test_case |
- +---------+-------------+
- | 1 | user_id = 1 |
- | 2 | user_id = 2 |
- +---------+-------------+
复制代码 聚合函数
min,max,sum,avg,count和mysql用法一致
min_by和max_by
- MAX_BY(expr1, expr2)
- 返回expr2最大值所在行的 expr1 (求分组top1的简介函数)
- MySQL > select * from tbl;
- +------+------+------+------+
- | k1 | k2 | k3 | k4 |
- +------+------+------+------+
- | 0 | 3 | 2 | 100 |
- | 1 | 2 | 3 | 4 |
- | 4 | 3 | 2 | 2 |
- | 3 | 4 | 2 | 1 |
- +------+------+------+------+
- MySQL > select max_by(k1, k4) from tbl;
- select max_by(k1, k4) from tbl;
- --取k4这个列中的最大值对应的k1这个列的值
- +--------------------+
- | max_by(`k1`, `k4`) |
- +--------------------+
- | 0 |
- +--------------------+
- -- 练习
- name subject score
- zss,chinese,99
- zss,math,89
- zss,English,79
- lss,chinese,88
- lss,math,88
- lss,English,22
- www,chinese,99
- www,math,45
- zll,chinese,23
- zll,math,88
- zll,English,80
- www,English,94
- -- 建表语句
- create table score
- (
- name varchar(50),
- subject varchar(50),
- score double
- )
- DUPLICATE KEY(name)
- DISTRIBUTED BY HASH(name) BUCKETS 1;
- -- 通过本地文件的方式导入数据
- curl \
- -u root: \
- -H "label:salary" \
- -H "column_separator:," \
- -T /root/data/salary.txt \
- http://doitedu01:8040/api/test/salary/_stream_load
- -- 求每门课程成绩最高分的那个人
- select
- subject,max_by(name,score) as name
- from score
- group by subject
- +---------+------+
- | subject | name |
- +---------+------+
- | English | www |
- | math | lss |
- | chinese | www |
- +---------+------+
复制代码 group_concat
- VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR 列名[, VARCHAR sep]
- 该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串
- -- group_concat对于收集的字段只能是string,varchar,char类型
- --当不指定分隔符的时候,默认使用 ','
- VARCHAR :代表GROUP_CONCAT函数返回值类型
- [DISTINCT]:可选参数,针对需要拼接的列的值进行去重
- [, VARCHAR sep]:拼接成字符串的分隔符,默认是 ','
- --建表
- create table example(
- id int,
- name varchar(50),
- age int,
- gender string,
- is_marry boolean,
- marry_date date,
- marry_datetime datetime
- )engine = olap
- distributed by hash(id) buckets 3;
- --插入数据
- insert into example values \
- (1,'zss',18,'male',0,null,null),\
- (2,'lss',28,'female',1,'2022-01-01','2022-01-01 11:11:11'),\
- (3,'ww',38,'male',1,'2022-02-01','2022-02-01 11:11:11'),\
- (4,'zl',48,'female',0,null,null),\
- (5,'tq',58,'male',1,'2022-03-01','2022-03-01 11:11:11'),\
- (6,'mly',18,'male',1,'2022-04-01','2022-04-01 11:11:11'),\
- (7,null,18,'male',1,'2022-05-01','2022-05-01 11:11:11');
- --当收集的那一列,有值为null时,他会自动将null的值过滤掉
- select
- gender,
- group_concat(name,',') as gc_name
- from example
- group by gender;
- +--------+---------------+
- | gender | gc_name |
- +--------+---------------+
- | female | zl,lss |
- | male | zss,ww,tq,mly |
- +--------+---------------+
- select
- gender,
- group_concat(DISTINCT cast(age as string)) as gc_age
- from example
- group by gender;
- +--------+------------+
- | gender | gc_age |
- +--------+------------+
- | female | 48, 28 |
- | male | 58, 38, 18 |
- +--------+------------+
- -- 求:每一个人有考试成绩的所有科目
- select
- name,
- group_concat(subject,',') as all_subject
- from score
- group by name
复制代码 collect_list,collect_set (1.2版本上线)
- ARRAY<T> collect_list(expr)
- --返回一个包含 expr 中所有元素(不包括NULL)的数组,数组中元素顺序是不确定的。
- ARRAY<T> collect_set(expr)
- --返回一个包含 expr 中所有去重后元素(不包括NULL)的数组,数组中元素顺序是不确定的。
复制代码 日期函数
获取当前时间
- -- curdate,current_date,now,curtime,current_time,current_timestamp
- select current_date();
- +----------------+
- | current_date() |
- +----------------+
- | 2022-11-25 |
- +----------------+
- select curdate();
- +------------+
- | curdate() |
- +------------+
- | 2022-11-25 |
- +------------+
- select now();
- +---------------------+
- | now() |
- +---------------------+
- | 2022-11-25 00:55:15 |
- +---------------------+
- select curtime();
- +-----------+
- | curtime() |
- +-----------+
- | 00:42:13 |
- +-----------+
- select current_timestamp();
- +---------------------+
- | current_timestamp() |
- +---------------------+
- | 2022-11-25 00:42:30 |
- +---------------------+
复制代码 last_day(1.2版本上线)
- DATE last_day(DATETIME date)
- -- 返回输入日期中月份的最后一天;
- --'28'(非闰年的二月份),
- --'29'(闰年的二月份),
- --'30'(四月,六月,九月,十一月),
- --'31'(一月,三月,五月,七月,八月,十月,十二月)
- select last_day('2000-03-03 01:00:00'); -- 给我返回这个月份中的最后一天的日期 年月日
- ERROR 1105 (HY000): errCode = 2, detailMessage = No matching function with signature: last_day(varchar(-1)).
复制代码 from_unixtime
- DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format])
- -- 将 unix 时间戳转化为对应的 time 格式,返回的格式由 string_format 指定
- --支持date_format中的format格式,默认为 %Y-%m-%d %H:%i:%s
- -- 正常使用的三种格式
- yyyyMMdd
- yyyy-MM-dd
- yyyy-MM-dd HH:mm:ss
- 示例:
- SQL
- mysql> select from_unixtime(1196440219); -- 时区
- +---------------------------+
- | from_unixtime(1196440219) |
- +---------------------------+
- | 2007-12-01 00:30:19 |
- +---------------------------+
- mysql> select from_unixtime(1196440219, 'yyyy-MM-dd HH:mm:ss');
- +--------------------------------------------------+
- | from_unixtime(1196440219, 'yyyy-MM-dd HH:mm:ss') |
- +--------------------------------------------------+
- | 2007-12-01 00:30:19 |
- +--------------------------------------------------+
- mysql> select from_unixtime(1196440219, '%Y-%m-%d');
- +-----------------------------------------+
- | from_unixtime(1196440219, '%Y-%m-%d') |
- +-----------------------------------------+
- | 2007-12-01 |
- +-----------------------------------------+
复制代码 unix_timestamp
- UNIX_TIMESTAMP(),
- UNIX_TIMESTAMP(DATETIME date),
- UNIX_TIMESTAMP(DATETIME date, STRING fmt) -- 给一个日期,指定这个日期的格式
- -- 将日期转换成时间戳,返回值是一个int类型
- -- 获取当前日期的时间戳
- select unix_timestamp();
- +------------------+
- | unix_timestamp() |
- +------------------+
- | 1669309722 |
- +------------------+
- -- 获取指定日期的时间戳
- select unix_timestamp('2022-11-26 01:09:01');
- +---------------------------------------+
- | unix_timestamp('2022-11-26 01:09:01') |
- +---------------------------------------+
- | 1669396141 |
- +---------------------------------------+
- -- 给定一个特殊日期格式的时间戳,指定格式
- select unix_timestamp('2022-11-26 01:09-01', '%Y-%m-%d %H:%i-%s');
- +------------------------------------------------------------+
- | unix_timestamp('2022-11-26 01:09-01', '%Y-%m-%d %H:%i-%s') |
- +------------------------------------------------------------+
- | 1669396141 |
- +------------------------------------------------------------+
复制代码 to_date
- DATE TO_DATE(DATETIME)
- --返回 DATETIME 类型中的日期部分。
- 示例:
- SQL
- select to_date("2022-11-20 00:00:00");
- +--------------------------------+
- | to_date('2022-11-20 00:00:00') |
- +--------------------------------+
- | 2022-11-20 |
- +--------------------------------+
复制代码 extract
- extract(unit FROM DATETIME) --抽取
- -- 提取DATETIME某个指定单位的值。
- --unit单位可以为year, month, day, hour, minute或者second
- 示例:
- SQL
- select
- extract(year from '2022-09-22 17:01:30') as year,
- extract(month from '2022-09-22 17:01:30') as month,
- extract(day from '2022-09-22 17:01:30') as day,
- extract(hour from '2022-09-22 17:01:30') as hour,
- extract(minute from '2022-09-22 17:01:30') as minute,
- extract(second from '2022-09-22 17:01:30') as second;
- +------+-------+------+------+--------+--------+
- | year | month | day | hour | minute | second |
- +------+-------+------+------+--------+--------+
- | 2022 | 9 | 22 | 17 | 1 | 30 |
- +------+-------+------+------+--------+--------+
复制代码 date_add,date_sub,datediff
- DATE_ADD(DATETIME date,INTERVAL expr type)
- DATE_SUB(DATETIME date,INTERVAL expr type)
- DATEDIFF(DATETIME expr1,DATETIME expr2)
- -- 计算两个日期相差多少天,结果精确到天。
- -- 向日期添加指定的时间间隔。
- -- date 参数是合法的日期表达式。
- -- expr 参数是您希望添加的时间间隔。
- -- type 参数可以是下列值:YEAR, MONTH, DAY, HOUR, MINUTE, SECOND
- select date_add('2010-11-30 23:59:59', INTERVAL 2 DAY);
- +-------------------------------------------------+
- | date_add('2010-11-30 23:59:59', INTERVAL 2 DAY) |
- +-------------------------------------------------+
- | 2010-12-02 23:59:59 |
- +-------------------------------------------------+
- --传一个负数进去也就等同于date_sub
- select date_add('2010-11-30 23:59:59', INTERVAL -2 DAY);
- +--------------------------------------------------+
- | date_add('2010-11-30 23:59:59', INTERVAL -2 DAY) |
- +--------------------------------------------------+
- | 2010-11-28 23:59:59 |
- +--------------------------------------------------+
- mysql> select datediff('2022-11-27 22:51:56','2022-11-24 22:50:56');
- +--------------------------------------------------------+
- | datediff('2022-11-27 22:51:56', '2022-11-24 22:50:56') |
- +--------------------------------------------------------+
- | 3 |
- +--------------------------------------------------------+
复制代码 date_format
- VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)
- --将日期类型按照format的类型转化为字符串
- 示例:
- SQL
- select date_format('2007-10-04 22:23:00', '%H:%i:%s');
- +------------------------------------------------+
- | date_format('2007-10-04 22:23:00', '%H:%i:%s') |
- +------------------------------------------------+
- | 22:23:00 |
- +------------------------------------------------+
- select date_format('2007-10-04 22:23:00', 'yyyy-MM-dd');
- +------------------------------------------------+
- | date_format('2007-10-04 22:23:00', '%Y-%m-%d') |
- +------------------------------------------------+
- | 2007-10-04 |
- +------------------------------------------------+
复制代码 字符串函数
length,lower,upper,reverse
获取到字符串的长度,对字符串转大小写和字符串的反转
lpad,rpad
- VARCHAR rpad(VARCHAR str, INT len, VARCHAR pad)
- VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)
- -- 返回 str 中长度为 len(从首字母开始算起)的字符串。
- --如果 len 大于 str 的长度,则在 str 的后面不断补充 pad 字符,
- --直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度,
- --该函数相当于截断 str 字符串,只返回长度为 len 的字符串。
- --len 指的是字符长度而不是字节长度。
- 示例:
- SQL
- -- 向左边补齐
- SELECT lpad("1", 5, "hellohello");
- +---------------------+
- | lpad("1", 5, "0") |
- +---------------------+
- | 00001 |
- +---------------------+
- -- 向右边补齐
- SELECT rpad('11', 5, '0');
- +---------------------+
- | rpad('11', 5, '0') |
- +---------------------+
- | 11000 |
- +---------------------+
复制代码 concat,concat_ws
- select concat("a", "b");
- +------------------+
- | concat('a', 'b') |
- +------------------+
- | ab |
- +------------------+
- select concat("a", "b", "c");
- +-----------------------+
- | concat('a', 'b', 'c') |
- +-----------------------+
- | abc |
- +-----------------------+
- -- concat中,如果有一个值为null,那么得到的结果就是null
- mysql> select concat("a", null, "c");
- +------------------------+
- | concat('a', NULL, 'c') |
- +------------------------+
- | NULL |
- +------------------------+
- --使用第一个参数 sep 作为连接符
- --将第二个参数以及后续所有参数(或ARRAY中的所有字符串)拼接成一个字符串。
- -- 如果分隔符是 NULL,返回 NULL。 concat_ws函数不会跳过空字符串,会跳过 NULL 值。
- mysql> select concat_ws("_", "a", "b");
- +----------------------------+
- | concat_ws("_", "a", "b") |
- +----------------------------+
- | a_b |
- +----------------------------+
- mysql> select concat_ws(NULL, "d", "is");
- +----------------------------+
- | concat_ws(NULL, 'd', 'is') |
- +----------------------------+
- | NULL |
- +----------------------------+
复制代码 substr
- --求子字符串,返回第一个参数描述的字符串中从start开始长度为len的部分字符串。
- --首字母的下标为1。
- mysql> select substr("Hello doris", 3, 5);
- +-----------------------------+
- | substr('Hello doris', 2, 1) |
- +-----------------------------+
- | e |
- +-----------------------------+
- mysql> select substr("Hello doris", 1, 2);
- +-----------------------------+
- | substr('Hello doris', 1, 2) |
- +-----------------------------+
- | He |
- +-----------------------------+
复制代码 ends_with,starts_with
- BOOLEAN ENDS_WITH (VARCHAR str, VARCHAR suffix)
- --如果字符串以指定后缀结尾,返回true。否则,返回false。
- --任意参数为NULL,返回NULL。
- BOOLEAN STARTS_WITH (VARCHAR str, VARCHAR prefix)
- --如果字符串以指定前缀开头,返回true。否则,返回false。
- --任意参数为NULL,返回NULL。
- 示例:
- SQL
- select ends_with("Hello doris", "doris");
- +-----------------------------------+
- | ends_with('Hello doris', 'doris') |
- +-----------------------------------+
- | 1 |
- +-----------------------------------+
- select ends_with("Hello doris", "Hello");
- +-----------------------------------+
- | ends_with('Hello doris', 'Hello') |
- +-----------------------------------+
- | 0 |
- +-----------------------------------+
- select starts_with("hello world","hello");
- +-------------------------------------+
- | starts_with('hello world', 'hello') |
- +-------------------------------------+
- | 1 |
- +-------------------------------------+
- select starts_with("hello world","world");
- +-------------------------------------+
- | starts_with('hello world', 'world') |
- +-------------------------------------+
- | 0 |
- +-------------------------------------+
复制代码 trim,ltrim,rtrim
- VARCHAR trim(VARCHAR str)
- -- 将参数 str 中左侧和右侧开始部分连续出现的空格去掉
- mysql> SELECT trim(' ab d ') str;
- +------+
- | str |
- +------+
- | ab d |
- +------+
- VARCHAR ltrim(VARCHAR str)
- -- 将参数 str 中从左侧部分开始部分连续出现的空格去掉
- mysql> SELECT ltrim(' ab d') str;
- +------+
- | str |
- +------+
- | ab d |
- +------+
- VARCHAR rtrim(VARCHAR str)
- --将参数 str 中从右侧部分开始部分连续出现的空格去掉
- mysql> SELECT rtrim('ab d ') str;
- +------+
- | str |
- +------+
- | ab d |
- +------+
复制代码 null_or_empty,not_null_or_empty
- BOOLEAN NULL_OR_EMPTY (VARCHAR str)
- -- 如果字符串为空字符串或者NULL,返回true。否则,返回false。
- select null_or_empty(null);
- +---------------------+
- | null_or_empty(NULL) |
- +---------------------+
- | 1 |
- +---------------------+
- select null_or_empty("");
- +-------------------+
- | null_or_empty('') |
- +-------------------+
- | 1 |
- +-------------------+
- select null_or_empty("a");
- +--------------------+
- | null_or_empty('a') |
- +--------------------+
- | 0 |
- +--------------------+
- BOOLEAN NOT_NULL_OR_EMPTY (VARCHAR str)
- 如果字符串为空字符串或者NULL,返回false。否则,返回true。
- select not_null_or_empty(null);
- +-------------------------+
- | not_null_or_empty(NULL) |
- +-------------------------+
- | 0 |
- +-------------------------+
- select not_null_or_empty("");
- +-----------------------+
- | not_null_or_empty('') |
- +-----------------------+
- | 0 |
- +-----------------------+
- select not_null_or_empty("a");
- +------------------------+
- | not_null_or_empty('a') |
- +------------------------+
- | 1 |
- +------------------------+
复制代码 replace
- VARCHAR REPLACE (VARCHAR str, VARCHAR old, VARCHAR new)
- -- 将str字符串中的old子串全部替换为new串
- mysql> select replace("http://www.baidu.com:9090", "9090", "");
- +------------------------------------------------------+
- | replace('http://www.baidu.com:9090', '9090', '') |
- +------------------------------------------------------+
- | http://www.baidu.com: |
- +------------------------------------------------------+
复制代码 split_part
- VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)
- -- 根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。
- mysql> select split_part("hello world", " ", 1);
- +----------------------------------+
- | split_part('hello world', ' ', 1) |
- +----------------------------------+
- | hello |
- +----------------------------------+
- mysql> select split_part("hello world", " ", 2);
- +----------------------------------+
- | split_part('hello world', ' ', 2) |
- +----------------------------------+
- | world |
- +----------------------------------+
- mysql> select split_part("2019年7月8号", "月", 1);
- +-----------------------------------------+
- | split_part('2019年7月8号', '月', 1) |
- +-----------------------------------------+
- | 2019年7 |
- +-----------------------------------------+
- mysql> select split_part("abca", "a", 1);
- +----------------------------+
- | split_part('abca', 'a', 1) |
- +----------------------------+
- | |
- +----------------------------+
复制代码 money_format
- VARCHAR money_format(Number)
- -- 将数字按照货币格式输出,整数部分每隔3位用逗号分隔,小数部分保留2位
- mysql> select money_format(17014116);
- +------------------------+
- | money_format(17014116) |
- +------------------------+
- | 17,014,116.00 |
- +------------------------+
- mysql> select money_format(1123.456);
- +------------------------+
- | money_format(1123.456) |
- +------------------------+
- | 1,123.46 |
- +------------------------+
- mysql> select money_format(1123.4);
- +----------------------+
- | money_format(1123.4) |
- +----------------------+
- | 1,123.40 |
- +----------------------+
复制代码 数学函数
ceil和floor
- BIGINT ceil(DOUBLE x)
- -- 返回大于或等于x的最小整数值.
- mysql> select ceil(1);
- +-----------+
- | ceil(1.0) |
- +-----------+
- | 1 |
- +-----------+
- mysql> select ceil(2.4);
- +-----------+
- | ceil(2.4) |
- +-----------+
- | 3 |
- +-----------+
- mysql> select ceil(-10.3);
- +-------------+
- | ceil(-10.3) |
- +-------------+
- | -10 |
- +-------------+
- BIGINT floor(DOUBLE x)
- -- 返回小于或等于x的最大整数值.
- mysql> select floor(1);
- +------------+
- | floor(1.0) |
- +------------+
- | 1 |
- +------------+
- mysql> select floor(2.4);
- +------------+
- | floor(2.4) |
- +------------+
- | 2 |
- +------------+
- mysql> select floor(-10.3);
- +--------------+
- | floor(-10.3) |
- +--------------+
- | -11 |
- +--------------+
复制代码 round
- round(x), round(x, d)
- -- 将x四舍五入后保留d位小数,d默认为0。
- -- 如果d为负数,则小数点左边d位为0。如果x或d为null,返回null。
- mysql> select round(2.4);
- +------------+
- | round(2.4) |
- +------------+
- | 2 |
- +------------+
- mysql> select round(2.5);
- +------------+
- | round(2.5) |
- +------------+
- | 3 |
- +------------+
- mysql> select round(-3.4);
- +-------------+
- | round(-3.4) |
- +-------------+
- | -3 |
- +-------------+
- mysql> select round(-3.5);
- +-------------+
- | round(-3.5) |
- +-------------+
- | -4 |
- +-------------+
- mysql> select round(1667.2725, 2);
- +---------------------+
- | round(1667.2725, 2) |
- +---------------------+
- | 1667.27 |
- +---------------------+
- mysql> select round(1667.2725, -2);
- +----------------------+
- | round(1667.2725, -2) |
- +----------------------+
- | 1700 |
- +----------------------+
复制代码 truncate
- DOUBLE truncate(DOUBLE x, INT d)
- -- 按照保留小数的位数d对x进行数值截取。
- -- 规则如下:
- -- 当d > 0时:保留x的d位小数
- -- 当d = 0时:将x的小数部分去除,只保留整数部分
- -- 当d < 0时:将x的小数部分去除,整数部分按照 d所指定的位数,采用数字0进行替换
- mysql> select truncate(124.3867, 2);
- +-----------------------+
- | truncate(124.3867, 2) |
- +-----------------------+
- | 124.38 |
- +-----------------------+
- mysql> select truncate(124.3867, 0);
- +-----------------------+
- | truncate(124.3867, 0) |
- +-----------------------+
- | 124 |
- +-----------------------+
- mysql> select truncate(-124.3867, -2);
- +-------------------------+
- | truncate(-124.3867, -2) |
- +-------------------------+
- | -100 |
- +-------------------------+
复制代码 abs
- 数值类型 abs(数值类型 x)
- -- 返回x的绝对值.
- mysql> select abs(-2);
- +---------+
- | abs(-2) |
- +---------+
- | 2 |
- +---------+
- mysql> select abs(3.254655654);
- +------------------+
- | abs(3.254655654) |
- +------------------+
- | 3.254655654 |
- +------------------+
- mysql> select abs(-3254654236547654354654767);
- +---------------------------------+
- | abs(-3254654236547654354654767) |
- +---------------------------------+
- | 3254654236547654354654767 |
- +---------------------------------+
复制代码 pow
- DOUBLE pow(DOUBLE a, DOUBLE b)
- -- 求幂次:返回a的b次方.
- mysql> select pow(2,0);
- +---------------+
- | pow(2.0, 0.0) |
- +---------------+
- | 1 |
- +---------------+
- mysql> select pow(2,3);
- +---------------+
- | pow(2.0, 3.0) |
- +---------------+
- | 8 |
- +---------------+
- mysql> select round(pow(3,2.4),2);
- +--------------------+
- | pow(3.0, 2.4) |
- +--------------------+
- | 13.966610165238235 |
- +--------------------+
复制代码 greatest和 least
- greatest(col_a, col_b, …, col_n)
- -- 返回一行中 n个column的最大值.若column中有NULL,则返回NULL.
- least(col_a, col_b, …, col_n)
- -- 返回一行中 n个column的最小值.若column中有NULL,则返回NULL.
- mysql> select greatest(-1, 0, 5, 8);
- +-----------------------+
- | greatest(-1, 0, 5, 8) |
- +-----------------------+
- | 8 |
- +-----------------------+
- mysql> select greatest(-1, 0, 5, NULL);
- +--------------------------+
- | greatest(-1, 0, 5, NULL) |
- +--------------------------+
- | NULL |
- +--------------------------+
- mysql> select greatest(6.3, 4.29, 7.6876);
- +-----------------------------+
- | greatest(6.3, 4.29, 7.6876) |
- +-----------------------------+
- | 7.6876 |
- +-----------------------------+
- mysql> select greatest("2022-02-26 20:02:11","2020-01-23 20:02:11","2020-06-22 20:02:11");
- +-------------------------------------------------------------------------------+
- | greatest('2022-02-26 20:02:11', '2020-01-23 20:02:11', '2020-06-22 20:02:11') |
- +-------------------------------------------------------------------------------+
- | 2022-02-26 20:02:11 |
- +-------------------------------------------------------------------------------+
复制代码 JSON函数
- CREATE TABLE test_json (
- id INT,
- json_string String
- )
- DUPLICATE KEY(id)
- DISTRIBUTED BY HASH(id) BUCKETS 3
- PROPERTIES("replication_num" = "1");
- --测试数据
- {"k1":"v31", "k2": 300, "a1": [{"k1":"v41", "k2": 400}, 1, "a", 3.14]}
- {"k1":"v32", "k2": 400, "a1": [{"k1":"v41", "k2": 400}, 2, "a", 4.14],"a2":{"k3":"v33", "k4": 200,"a2": [{"k1":"v41", "k2": 400}, 2, "a", 4.14]}}
- {"k1":"v33", "k2": 500, "a1": [{"k1":"v41", "k2": 400}, 3, "a", 5.14],"a2":{"k3":"v33", "k4": 200,"a2": [{"k5":"v42", "k6": 600}]}}
- {"k1":"v31"}
- {"k1":"v31", "k2": 300}
- {"k1":"v31", "k2": 200 "a1": []}
- --json是一种里面存着一对对key,value类型的结构
- --针对值类型的不同:
- -- 1.简单值:"k1":"v31"
- -- 2.数组:[{"k1":"v41", "k2": 400}, 1, "a", 3.14]
- -- 3.对象:"a2":{"k3":"v33", "k4": 200,"a2": [{"k5":"v42", "k6": 600}]}
复制代码 取值的时候,指定的'$.k1'==>这样的东西我们称之为json path ,json的路劲
-- 通过本地文件的方式导入- curl \
- -u root: \
- -H "label:load_local_file1" \
- -H "column_separator:_" \
- -T /root/data/json.txt \
- http://doitedu01:8040/api/test/test_json/_stream_load
复制代码- -- 用insert into 的方式导入一条
- INSERT INTO test_json VALUES(7, '{"k1":"v1", "k2": 200}');
- 5.1.7.1get_json_double,get_json_int,get_json_string
- DOUBLE get_json_int(VARCHAR json_str, VARCHAR json_path)
- INT get_json_int(VARCHAR json_str, VARCHAR json_path)
- VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path)
- -- 解析并获取 json 字符串内指定路径的double,int,string 类型的内容。
- -- 其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。
- -- 如果路径中包含 . ,则可以使用双引号包围。
- -- 使用 [ ] 表示数组下标,从 0 开始。
- -- path 的内容不能包含 ", [ 和 ]。
- -- 如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。
- --1.获取到k1对应的value的值
- mysql> select id, get_json_string(json_string,'$.k1') as k1 from test_json;
- +------+------+
- | id | k1 |
- +------+------+
- | 2 | v32 |
- | 4 | v31 |
- | 5 | v31 |
- | 6 | v31 |
- | 1 | v31 |
- | 3 | v33 |
- +------+------+
- --2.获取到key 为a1 里面的数组
- mysql> select id, get_json_string(json_string,'$.a1') as arr from test_json;
- +------+------------------------------------+
- | id | arr |
- +------+------------------------------------+
- | 1 | [{"k1":"v41","k2":400},1,"a",3.14] |
- | 3 | [{"k1":"v41","k2":400},3,"a",5.14] |
- | 2 | [{"k1":"v41","k2":400},2,"a",4.14] |
- | 4 | NULL |
- | 5 | NULL |
- | 6 | [] |
- +------+------------------------------------+
- --3.获取到key 为a1 里面的数组中第一个元素的值
- mysql> select id, get_json_string(json_string,'$.a1[0]') as arr from test_json;
- +------+-----------------------+
- | id | arr |
- +------+-----------------------+
- | 2 | {"k1":"v41","k2":400} |
- | 1 | {"k1":"v41","k2":400} |
- | 3 | {"k1":"v41","k2":400} |
- | 4 | NULL |
- | 5 | NULL |
- | 6 | NULL |
- +------+-----------------------+
- --4.获取到key 为a1 里面的数组中第一个元素的值(这个值是一个json串,再次获取到这个字符串中)
- select id, get_json_string(get_json_string(json_string,'$.a1[0]'),'$.k1') as arr from test_json;
- +------+------+
- | id | arr |
- +------+------+
- | 2 | v41 |
- | 1 | v41 |
- | 3 | v41 |
- | 4 | NULL |
- | 5 | NULL |
- | 6 | NULL |
- +------+------+
- 6 rows in set (0.02 sec)
复制代码 json_object- VARCHAR json_object(VARCHAR,...)
- -- 生成一个包含指定Key-Value对的json object,
- -- 传入的参数是key,value对,且key不能为null
- MySQL> select json_object('time',curtime());
- +--------------------------------+
- | json_object('time', curtime()) |
- +--------------------------------+
- | {"time": "10:49:18"} |
- +--------------------------------+
- MySQL> SELECT json_object('id', 87, 'name', 'carrot');
- +-----------------------------------------+
- | json_object('id', 87, 'name', 'carrot') |
- +-----------------------------------------+
- | {"id": 87, "name": "carrot"} |
- +-----------------------------------------+
- json_object('id', 87, 'name', 'carrot');
- MySQL> select json_object('username',null);
- +---------------------------------+
- | json_object('username', 'NULL') |
- +---------------------------------+
- | {"username": NULL} |
- +---------------------------------+
复制代码 窗口函数
ROW_NUMBER(),DENSE_RANK(),RANK()
- -- 测试rank打行号,名次相同会并列排名,比如两个第一名,就是1 1 然后第二名会显示3
- select x, y, rank() over(partition by x order by y) as rank from int_t;
- | x | y | rank |
- |----|------|----------|
- | 1 | 1 | 1 |
- | 1 | 2 | 2 |
- | 1 | 2 | 2 |
- | 2 | 1 | 1 |
- | 2 | 2 | 2 |
- | 2 | 3 | 3 |
- | 3 | 1 | 1 |
- | 3 | 1 | 1 |
- | 3 | 2 | 3 |
- -- 测试dense_rank(),名词相同会并列排名,比如两个第一名,就是1 1 然后第二名会显示2
- select x, y, dense_rank() over(partition by x order by y) as rank from int_t;
- | x | y | rank |
- |----|------|----------|
- | 1 | 1 | 1 |
- | 1 | 2 | 2 |
- | 1 | 2 | 2 |
- | 2 | 1 | 1 |
- | 2 | 2 | 2 |
- | 2 | 3 | 3 |
- | 3 | 1 | 1 |
- | 3 | 1 | 1 |
- | 3 | 2 | 2 |
-
- -- 测试ROW_NUMBER() 按照分组排序要求,返回的编号依次底层,1 2 3 4 5 ,
- -- 不会有重复值,也不会有空缺值,就是连续递增的整数,从1 开始
- select x, y, row_number() over(partition by x order by y) as rank from int_t;
- | x | y | rank |
- |---|------|----------|
- | 1 | 1 | 1 |
- | 1 | 2 | 2 |
- | 1 | 2 | 3 |
- | 2 | 1 | 1 |
- | 2 | 2 | 2 |
- | 2 | 3 | 3 |
- | 3 | 1 | 1 |
- | 3 | 1 | 2 |
- | 3 | 2 | 3 |
- -- 数据
- 孙悟空,语文,87
- 孙悟空,数学,95
- 娜娜,英语,84
- 宋宋,语文,64
- 孙悟空,英语,68
- 宋宋,英语,84
- 婷婷,语文,65
- 娜娜,语文,94
- 宋宋,数学,86
- 婷婷,数学,85
- 娜娜,数学,56
- 婷婷,英语,78
- -- 建表语句
- create table stu
- (
- name varchar(50),
- subject varchar(50),
- score double
- )
- DUPLICATE KEY(name)
- DISTRIBUTED BY HASH(name) BUCKETS 1;
- -- 通过本地文件的方式导入数据
- curl \
- -u root: \
- -H "label:num_test" \
- -H "column_separator:," \
- -T /root/data/stu.txt \
- http://doitedu01:8040/api/test/stu/_stream_load
复制代码 需求:
【相同分数并列(假设第一名有两个,排名就是并列第一,然后第三名从2开始)】
1.按照分数降序排序,求每个学科中每个人的名次
2.按照每个人的总分进行升序排列,得到每个人总分名次的名次
【相同分数并列(假设第一名有两个,排名就是并列第一,然后第三名从3开始)】
3.按照学科进行升序排列,得到每个人的每个学科的名次
4.按照每个人的总分进行升序排列,得到每个人总分名次的名次
【相同分数并列
(假设第一名有两个,排名就是并列第一,
就再单独比语文的成绩,然后数学,最后英语,
分数全部一样,按照学生名字的字典顺序,在前的为第一)】
5.按照每个人的总分进行升序排列,得到每个人总分名次的名次- -- 1.按照学科进行升序排列,得到每个人的每个学科的名次
- select
- name,subject,score,
- dense_rank() over(partition by subject order by score desc) as rank
- from stu
- +-----------+---------+-------+------+
- | name | subject | score | rank |
- +-----------+---------+-------+------+
- | 孙悟空 | 数学 | 95 | 1 |
- | 宋宋 | 数学 | 86 | 2 |
- | 婷婷 | 数学 | 85 | 3 |
- | 娜娜 | 数学 | 56 | 4 |
- | 娜娜 | 英语 | 84 | 1 |
- | 宋宋 | 英语 | 84 | 1 |
- | 婷婷 | 英语 | 78 | 2 |
- | 孙悟空 | 英语 | 68 | 3 |
- | 娜娜 | 语文 | 94 | 1 |
- | 孙悟空 | 语文 | 87 | 2 |
- | 婷婷 | 语文 | 65 | 3 |
- | 宋宋 | 语文 | 64 | 4 |
- +-----------+---------+-------+------+
- -- 2.按照每个人的总分进行升序排列,得到每个人总分名次的名次
- select
- name,sum_score,
- -- 因为是整体按照学生的总分进行求名次,所有学生为1组,就不需要分组了
- dense_rank() over(order by sum_score desc) as rank
- from
- (
- select
- name,sum(score) as sum_score
- from stu
- group by name
- ) as t ;
- +-----------+-----------+------+
- | name | sum_score | rank |
- +-----------+-----------+------+
- | 孙悟空 | 250 | 1 |
- | 宋宋 | 234 | 2 |
- | 娜娜 | 234 | 2 |
- | 婷婷 | 228 | 3 |
- +-----------+-----------+------+
- 【相同分数并列(假设第一名有两个,排名就是并列第一,然后第三名从3开始)】
- -- 3.按照学科进行升序排列,得到每个人的每个学科的名次
- select
- name,subject,score,
- rank() over(partition by subject order by score desc) as rank
- from stu
- +-----------+---------+-------+------+
- | name | subject | score | rank |
- +-----------+---------+-------+------+
- | 孙悟空 | 数学 | 95 | 1 |
- | 宋宋 | 数学 | 86 | 2 |
- | 婷婷 | 数学 | 85 | 3 |
- | 娜娜 | 数学 | 56 | 4 |
- | 娜娜 | 英语 | 84 | 1 |
- | 宋宋 | 英语 | 84 | 1 |
- | 婷婷 | 英语 | 78 | 3 |
- | 孙悟空 | 英语 | 68 | 4 |
- | 娜娜 | 语文 | 94 | 1 |
- | 孙悟空 | 语文 | 87 | 2 |
- | 婷婷 | 语文 | 65 | 3 |
- | 宋宋 | 语文 | 64 | 4 |
- +-----------+---------+-------+------+
- -- 4.按照每个人的总分进行升序排列,得到每个人总分名次的名次
- select
- name,sum_score,
- -- 因为是整体按照学生的总分进行求名次,所有学生为1组,就不需要分组了
- rank() over(order by sum_score desc) as rank
- from
- (
- select
- name,sum(score) as sum_score
- from stu
- group by name
- ) as t ;
- +-----------+-----------+------+
- | name | sum_score | rank |
- +-----------+-----------+------+
- | 孙悟空 | 250 | 1 |
- | 宋宋 | 234 | 2 |
- | 娜娜 | 234 | 2 |
- | 婷婷 | 228 | 4 |
- +-----------+-----------+------+
- 【相同分数并列
- (假设第一名有两个,排名就是并列第一,
- 就再单独比语文的成绩,然后数学,最后英语,
- 分数全部一样,按照学生名字的字典顺序,在前的为第一)】
- -- 5.按照每个人的总分进行升序排列,得到每个人总分名次的名次
- --方案1:利用窗口函数来列转行
- select
- name,subject,score as math_score,english_score,chinese_score,sum_score,
- row_number()over(order by sum_score desc ,chinese_score desc ,score desc ,english_score desc,name asc) as num
- from
- (
- select
- name,subject,score,
- lead(score,1,0)over(partition by name order by subject) as english_score,
- lead(score,2,0)over(partition by name order by subject) as chinese_score,
- sum(score)over(partition by name) as sum_score,
- row_number()over(partition by name) as num
- from stu
- ) as tmp
- where num = 1
-
- -- 方案2:利用if判断来列转行
- select
- name,chinese_score,match_score,english_score,sum_score,
- row_number()over(order by sum_score desc ,chinese_score desc ,match_score desc ,english_score desc,name asc) as num
- from
- (
- select
- name,
- sum(chinese_score) as chinese_score,
- sum(match_score) as match_score,
- sum(english_score) as english_score,
- sum(chinese_score) + sum(match_score) + sum(english_score) as sum_score
- from
- (
- select name,subject,
- if(subject = '语文',score,0) as chinese_score,
- if(subject = '数学',score,0) as match_score,
- if(subject = '英语',score,0) as english_score
- from stu
- )as t
- group by name
- ) as t1
- +-----------+---------+------------+---------------+---------------+-----------+------+
- | name | subject | math_score | english_score | chinese_score | sum_score | num |
- +-----------+---------+------------+---------------+---------------+-----------+------+
- | 孙悟空 | 数学 | 95 | 68 | 87 | 250 | 1 |
- | 娜娜 | 数学 | 56 | 84 | 94 | 234 | 2 |
- | 宋宋 | 数学 | 86 | 84 | 64 | 234 | 3 |
- | 婷婷 | 数学 | 85 | 78 | 65 | 228 | 4 |
- +-----------+---------+------------+---------------+---------------+-----------+------+
复制代码 min,max,sum,avg,count
- min(x)over() -- 取窗口中x列的最小值
- max(x)over() -- 取窗口中x列的最大值
- sum(x)over() -- 取窗口中x列的数据总和
- avg(x)over() -- 取窗口中x列的数据平均值
- count(x)over() -- 取窗口中x列有多少行
- unbounded preceding
- current row
- 1 following
- 1 PRECEDING
- rows between unbounded preceding and current row --指在当前窗口中第一行到当前行的范围
- rows between unbounded preceding and 1 following --指在当前窗口中第一行到当前行下一行的范围
- rows between unbounded preceding and 1 PRECEDING --指在当前窗口中第一行到当前行前一行的范围
复制代码 LEAD() ,LAG()
- -- LAG() 方法用来计算当前行向前数若干行的值。
- LAG(expr, offset, default) OVER (partition_by_clause order_by_clause)
- -- LEAD() 方法用来计算当前行向后数若干行的值。
- LEAD(expr, offset, default]) OVER (partition_by_clause order_by_clause)
复制代码 漏斗模型分析函数window_funnel
业务目标、到达路径,路径步骤、步骤人数,步骤之间的相对转换率和绝对转换率
每一种业务都有他的核心任务和流程,而流程的每一个步骤,都可能有用户流失。
所以如果把每一个步骤及其对应的数据(如UV)拼接起来,就会形成一个上大下小的漏斗形态,这就是漏斗模型。- -- 准备数据
- uid event_id event_action event_time
- u001,e1,view_detail_page,2022-11-01 01:10:21
- u001,e2,add_bag_page,2022-11-01 01:11:13
- u001,e3,collect_goods_page,2022-11-01 02:07:11
- u002,e3,collect_goods_page,2022-11-01 01:10:21
- u002,e4,order_detail_page,2022-11-01 01:11:13
- u002,e5,pay_detail_page,2022-11-01 02:07:11
- u002,e6,click_adver_page,2022-11-01 13:07:23
- u002,e7,home_page,2022-11-01 08:18:12
- u002,e8,list_detail_page,2022-11-01 23:34:29
- u002,e1,view_detail_page,2022-11-01 11:25:32
- u002,e2,add_bag_page,2022-11-01 12:41:21
- u002,e3,collect_goods_page,2022-11-01 16:21:15
- u002,e4,order_detail_page,2022-11-01 21:41:12
- u003,e5,pay_detail_page,2022-11-01 01:10:21
- u003,e6,click_adver_page,2022-11-01 01:11:13
- u003,e7,home_page,2022-11-01 02:07:11
- u001,e4,order_detail_page,2022-11-01 13:07:23
- u001,e5,pay_detail_page,2022-11-01 08:18:12
- u001,e6,click_adver_page,2022-11-01 23:34:29
- u001,e7,home_page,2022-11-01 11:25:32
- u001,e8,list_detail_page,2022-11-01 12:41:21
- u001,e1,view_detail_page,2022-11-01 16:21:15
- u001,e2,add_bag_page,2022-11-01 21:41:12
- u003,e8,list_detail_page,2022-11-01 13:07:23
- u003,e1,view_detail_page,2022-11-01 08:18:12
- u003,e2,add_bag_page,2022-11-01 23:34:29
- u003,e3,collect_goods_page,2022-11-01 11:25:32
- u003,e4,order_detail_page,2022-11-01 12:41:21
- u003,e5,pay_detail_page,2022-11-01 16:21:15
- u003,e6,click_adver_page,2022-11-01 21:41:12
- u004,e7,home_page,2022-11-01 01:10:21
- u004,e8,list_detail_page,2022-11-01 01:11:13
- u004,e1,view_detail_page,2022-11-01 02:07:11
- u004,e2,add_bag_page,2022-11-01 13:07:23
- u004,e3,collect_goods_page,2022-11-01 08:18:12
- u004,e4,order_detail_page,2022-11-01 23:34:29
- u004,e5,pay_detail_page,2022-11-01 11:25:32
- u004,e6,click_adver_page,2022-11-01 12:41:21
- u004,e7,home_page,2022-11-01 16:21:15
- u004,e8,list_detail_page,2022-11-01 21:41:12
- u005,e1,view_detail_page,2022-11-01 01:10:21
- u005,e2,add_bag_page,2022-11-01 01:11:13
- u005,e3,collect_goods_page,2022-11-01 02:07:11
- u005,e4,order_detail_page,2022-11-01 13:07:23
- u005,e5,pay_detail_page,2022-11-01 08:18:12
- u005,e6,click_adver_page,2022-11-01 23:34:29
- u005,e7,home_page,2022-11-01 11:25:32
- u005,e8,list_detail_page,2022-11-01 12:41:21
- u005,e1,view_detail_page,2022-11-01 16:21:15
- u005,e2,add_bag_page,2022-11-01 21:41:12
- u005,e3,collect_goods_page,2022-11-01 01:10:21
- u006,e4,order_detail_page,2022-11-01 01:11:13
- u006,e5,pay_detail_page,2022-11-01 02:07:11
- u006,e6,click_adver_page,2022-11-01 13:07:23
- u006,e7,home_page,2022-11-01 08:18:12
- u006,e8,list_detail_page,2022-11-01 23:34:29
- u006,e1,view_detail_page,2022-11-01 11:25:32
- u006,e2,add_bag_page,2022-11-01 12:41:21
- u006,e3,collect_goods_page,2022-11-01 16:21:15
- u006,e4,order_detail_page,2022-11-01 21:41:12
- u006,e5,pay_detail_page,2022-11-01 23:10:21
- u006,e6,click_adver_page,2022-11-01 01:11:13
- u007,e7,home_page,2022-11-01 02:07:11
- u007,e8,list_detail_page,2022-11-01 13:07:23
- u007,e1,view_detail_page,2022-11-01 08:18:12
- u007,e2,add_bag_page,2022-11-01 23:34:29
- u007,e3,collect_goods_page,2022-11-01 11:25:32
- u007,e4,order_detail_page,2022-11-01 12:41:21
- u007,e5,pay_detail_page,2022-11-01 16:21:15
- u007,e6,click_adver_page,2022-11-01 21:41:12
- u007,e7,home_page,2022-11-01 01:10:21
- u008,e8,list_detail_page,2022-11-01 01:11:13
- u008,e1,view_detail_page,2022-11-01 02:07:11
- u008,e2,add_bag_page,2022-11-01 13:07:23
- u008,e3,collect_goods_page,2022-11-01 08:18:12
- u008,e4,order_detail_page,2022-11-01 23:34:29
- u008,e5,pay_detail_page,2022-11-01 11:25:32
- u008,e6,click_adver_page,2022-11-01 12:41:21
- u008,e7,home_page,2022-11-01 16:21:15
- u008,e8,list_detail_page,2022-11-01 21:41:12
- u008,e1,view_detail_page,2022-11-01 01:10:21
- u009,e2,add_bag_page,2022-11-01 01:11:13
- u009,e3,collect_goods_page,2022-11-01 02:07:11
- u009,e4,order_detail_page,2022-11-01 13:07:23
- u009,e5,pay_detail_page,2022-11-01 08:18:12
- u009,e6,click_adver_page,2022-11-01 23:34:29
- u009,e7,home_page,2022-11-01 11:25:32
- u009,e8,list_detail_page,2022-11-01 12:41:21
- u009,e1,view_detail_page,2022-11-01 16:21:15
- u009,e2,add_bag_page,2022-11-01 21:41:12
- u009,e3,collect_goods_page,2022-11-01 01:10:21
- u010,e4,order_detail_page,2022-11-01 01:11:13
- u010,e5,pay_detail_page,2022-11-01 02:07:11
- u010,e6,click_adver_page,2022-11-01 13:07:23
- u010,e7,home_page,2022-11-01 08:18:12
- u010,e8,list_detail_page,2022-11-01 23:34:29
- u010,e5,pay_detail_page,2022-11-01 11:25:32
- u010,e6,click_adver_page,2022-11-01 12:41:21
- u010,e7,home_page,2022-11-01 16:21:15
- u010,e8,list_detail_page,2022-11-01 21:41:12
- -- 创建表
- drop table if exists event_info_log;
- create table event_info_log
- (
- uid varchar(20),
- event_id varchar(20),
- event_action varchar(20),
- event_time datetime
- )
- DUPLICATE KEY(uid)
- DISTRIBUTED BY HASH(uid) BUCKETS 1;
- -- 通过本地文件的方式导入数据
- curl \
- -u root: \
- -H "label:event_info_log" \
- -H "column_separator:," \
- -T /root/data/event_log.txt \
- http://linux01:8040/api/test/event_info_log/_stream_load
复制代码 封装、要素(时间范围,事件的排序时间依据,漏斗模型的事件链)- window_funnel(window, mode, timestamp_column, event1, event2, ... , eventN)
- -- 漏斗分析函数搜索滑动时间窗口内最大的发生的最大事件序列长度。
- -- window :滑动时间窗口大小,单位为秒。
- -- mode :保留,目前只支持default。-- 相邻两个事件之间没有时间间隔要求,并且相邻两个事件中可以做其他的事件
- -- timestamp_column :指定时间列,类型为DATETIME, 滑动窗口沿着此列工作。
- -- eventN :表示事件的布尔表达式。
- select
- user_id,
- window_funnel(3600*24, 'default', event_time, event_id='e1', event_id='e2' , event_id='e4', event_id='e5') as step
- from event_info_log
- group by user_id
- +---------+------+
- | user_id | step |
- +---------+------+
- | u006 | 4 |
- | u007 | 2 |
- | u005 | 3 |
- | u004 | 3 |
- | u010 | 0 |
- | u001 | 3 |
- | u003 | 2 |
- | u002 | 3 |
- | u008 | 3 |
- | u009 | 2 |
- +---------+------+
- -- 算每一层级的转换率
- select
- '购买转化漏斗' as funnel_name,
- sum(if(step >= 1 ,1,0)) as step1,
- sum(if(step >= 2 ,1,0)) as step2,
- sum(if(step >= 3 ,1,0)) as step3,
- sum(if(step >= 4 ,1,0)) as step4,
- round(sum(if(step >= 2 ,1,0))/sum(if(step >= 1 ,1,0)),2) as 'step1->step2_radio',
- round(sum(if(step >= 3 ,1,0))/sum(if(step >= 2 ,1,0)),2) as 'step2->step3_radio',
- round(sum(if(step >= 4 ,1,0))/sum(if(step >= 3 ,1,0)),2) as 'step3->step4_radio'
- from
- (
- select
- user_id,
- window_funnel(3600*24, 'default', report_date, event_id='e1', event_id='e2' , event_id='e4', event_id='e5') as step
- from event_info_log
- where to_date(report_date) = '2022-11-01'
- and event_id in('e1','e4','e5','e2')
- group by user_id
- ) as t1
- -- res
- +--------------------+-------+-------+-------+-------+--------------------+--------------------+--------------------+
- | funnel_name | step1 | step2 | step3 | step4 | step1->step2_radio | step2->step3_radio | step3->step4_radio |
- +--------------------+-------+-------+-------+-------+--------------------+--------------------+--------------------+
- | 购买转化漏斗 | 9 | 9 | 6 | 1 | 1 | 0.67 | 0.17 |
- +--------------------+-------+-------+-------+-------+--------------------+--------------------+--------------------+
复制代码 免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作! |