接下来看几个例子来说明上面的代码。
首先创建等高直方图,看看效果。
[code]greatsql> ANALYZE TABLE t1 UPDATE HISTOGRAM ON c2 WITH 3 BUCKETS;greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2 ANALYZE TABLE t1 UPDATE HISTOGRAM ON c2 WITH 5 BUCKETS;greatsql> select json_pretty(histogram)result from information_schema.column_statistics where table_name = 't1';| { "buckets": [ [ 1, 0.14285714285714285 ], [ 4, 0.2857142857142857 ], [ 5, 0.42857142857142855 ], [ 10, 0.7142857142857143 ], [ 16, 0.8571428571428571 ] ], "data-type": "int", "null-values": 0.14285714285714285, "collation-id": 8, "last-updated": "2024-10-25 02:18:36.107382", "sampling-rate": 1.0, "histogram-type": "singleton", "number-of-buckets-specified": 5} greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2CREATE TABLE t3 (ccc1 INT, ccc2 int,ccc3 datetime(6));greatsql>INSERT INTO t3 VALUES (1,2,'2021-03-25 16:44:00.123456'),(2,10,'2021-03-25 16:44:00.123456'),(3,4,'2022-03-25 16:44:00.123456'),(4,6,'2023-03-25 16:44:00.123456'),(null,7,'2024-03-25 16:44:00.123456'),(4,3,'2024-04-25 16:44:00.123456'),(null,8,'2025-03-25 16:44:00.123456'),(3,4,'2022-06-25 16:44:00.123456'),(5,4,'2021-11-25 16:44:00.123456');greatsql>CREATE TABLE t4 (d1 INT, d2 int, d3 varchar(100));greatsql>INSERT INTO t4 VALUES (1,2,'aa1'),(2,1,'bb1'),(2,3,'cc1'),(3,3,'cc1'),(4,2,'ff1'),(4,4,'ert'),(4,2,'f5fg'),(null,2,'ee'),(5,30,'cc1'),(5,4,'fcc1'),(4,10,'cc1'),(6,4,'ccd1'),(null,1,'fee'),(1,2,'aa1'),(2,1,'bb1'),(2,3,'cc1'),(3,3,'cc1'),(4,2,'ff1'),(4,4,'ert'),(4,2,'f5fg'),(null,2,'ee'),(5,30,'cc1'),(5,4,'fcc1'),(4,10,'cc1'),(6,4,'ccd1'),(null,1,'fee'),(1,2,'aa1'),(2,1,'bb1'),(2,3,'cc1'),(3,3,'cc1'),(4,2,'ff1'),(4,4,'ert'),(4,2,'f5fg'),(null,2,'ee'),(5,30,'cc1'),(5,4,'fcc1'),(4,10,'cc1'),(6,4,'ccd1'),(null,1,'fee');greatsql>CREATE INDEX idx3_2 ON t3(ccc2);greatsql>CREATE INDEX idx3_3 ON t3(ccc3);greatsql>CREATE INDEX idx4_2 ON t4(d2);首先看一下没有建立直方图之前的效果,这里t4用了全表扫描。greatsql> EXPLAIN SELECT * FROM t4 join t3 ON t4.d1=t3.ccc1 and t4.d2=t3.ccc2 where t4.d1 ANALYZE TABLE t4 UPDATE HISTOGRAM ON d1 WITH 5 BUCKETS;-- 可以看到效果已经变为更优的t3作为驱动表了,这里看出直方图对于估计效果更为精确。greatsql> EXPLAIN SELECT * FROM t4 join t3 ON t4.d1=t3.ccc1 and t4.d2=t3.ccc2 where t4.d1