【GreatSQL优化器-14】直方图应用 - ToB企服应用市场:ToB评测及商务社交产业平台

greatsql> CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT,date1 DATETIME);
greatsql> INSERT INTO t1 VALUES (1,10,'2021-03-25 16:44:00.123456'),(2,1,'2022-03-26 16:44:00.123456'),(3,4,'2023-03-27 16:44:00.123456'),(5,5,'2024-03-25 16:44:00.123456');
greatsql> CREATE TABLE t2 (cc1 INT PRIMARY KEY, cc2 INT);
greatsql> INSERT INTO t2 VALUES (1,3),(2,1),(3,2),(4,3),(5,15);
greatsql> CREATE TABLE t3 (ccc1 INT, ccc2 varchar(100));
greatsql> INSERT INTO t3 VALUES (1,'aa1'),(2,'bb1'),(3,'cc1'),(4,'dd1'),(null,'ee');
greatsql> CREATE INDEX idx1 ON t1(c2);
greatsql> CREATE INDEX idx2 ON t1(c2,date1);
greatsql> CREATE INDEX idx2_1 ON t2(cc2);
greatsql> CREATE INDEX idx3_1 ON t3(ccc1);
系统自动创建buckets：
greatsql> ANALYZE TABLE t1 UPDATE HISTOGRAM ON c2 WITH 3 BUCKETS;
greatsql> SELECT json_pretty(histogram)result FROM information_schema.column_statistics WHERE table_name = 't1';
| {
"buckets": [
[
1,
5,
0.42857142857142855,
3
],
[
10,
10,
0.7142857142857143,
1
],
[
16,
16,
0.8571428571428571,
1
]
],
"data-type": "int",
"null-values": 0.14285714285714285,
"collation-id": 8,
"last-updated": "2024-10-22 08:38:48.858099",
"sampling-rate": 1.0,
"histogram-type": "equi-height",
"number-of-buckets-specified": 3
}
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
"plan_prefix": [
],
"table": "`t1`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 7,
"filtering_effect": [
{
"condition": "(`t1`.`c2` < 5)", 对t1.c2的过滤系数估计用到了直方图
"histogram_selectivity": 0.342857 这里过滤系数算出来为0.342857，即直方图第一个桶小于5的数据占的百分比
}
],
"final_filtering_effect": 1,
"access_type": "scan",
"resulting_rows": 7,
"cost": 0.95,
"chosen": true
}
]
},

复制代码