【GreatSQL优化器-18】GROUP_INDEX_SKIP_SCAN - IT评测·应用市场-qidao123.com技术社区

CREATE TABLE t1(c1 INT, c2 INT, c3 INT, c4 INT);
CREATE UNIQUE INDEX i1_t1 ON t1(c1,c2,c3);
INSERT INTO t1 VALUES (1,1,1,1), (1,1,2,2), (1,3,3,3), (1,4,4,4), (1,5,5,5),
(2,1,1,1), (2,2,2,2), (2,3,3,3), (2,4,4,4), (2,5,5,5);
INSERT INTO t1 SELECT c1, c2, c3+5, c4+10 FROM t1;
INSERT INTO t1 SELECT c1, c2, c3+10, c4+20 FROM t1;
INSERT INTO t1 SELECT c1, c2, c3+20, c4+40 FROM t1;
INSERT INTO t1 SELECT c1, c2, c3+40, c4+80 FROM t1;
ANALYZE TABLE t1;
-- 下面的结果用到了group index skip scan，扫描方式是RANGE SCAN。
greatsql> explain SELECT c1, MIN(c2) FROM t1 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | range | i1_t1 | i1_t1 | 10 | NULL | 3 | 100.00 | Using index for group-by |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
/* 运行过程：
1、先找到第一列的唯一值，得到结果为1和2
SELECT distinct c1 FROM t1;
2、根据第一列的分组结果在分组内对c2执行范围扫描
SELECT c1, MIN(c2) FROM t1 WHERE c1 = 1;
SELECT c1, MIN(c2) FROM t1 WHERE c1 = 2;*/
-- 如果没有联合索引，那么这条sql内部要创建临时表用于数据的临时处理，扫描方式是全表扫描。可见用GROUP_INDEX_SKIP_SCAN方式执行groupby可以节省空间和开销，提升执行效率。
greatsql> DROP INDEX i1_t1 ON t1;
greatsql> explain SELECT c1, MIN(c2) FROM t1 GROUP BY c1;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+
| 1 | SIMPLE | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 160 | 100.00 | Using temporary |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+

复制代码

int test_quick_select() {
// 先判断GROUP_INDEX_SKIP_SCAN是否满足条件，不满足的话执行索引合并。
AccessPath *group_path = get_best_group_min_max();
// 不满足GROUP_INDEX_SKIP_SCAN执行索引合并，相关知识看之前知识介绍
}
AccessPath *get_best_group_min_max() {
// 遍历表所有索引，找到联合索引
for (uint cur_param_idx = 0; cur_param_idx < param->keys; ++cur_param_idx) {
// 处理group by语句
if (!join->group_list.empty()) {
// 找到联合索引内的列字段相关mm tree
if (tree) cur_tree = get_index_range_tree(cur_index, tree, param);
// 该条件是否是等号条件或者IS NULL条件
is_eq_range_pred = !(range->min_flag & is_open_range) &&
!(range->max_flag & is_open_range) &&
((range->maybe_null() && range->min_value[0] &&
range->max_value[0]) ||
memcmp(range->min_value, range->max_value,
cur_part->store_length) == 0);
}
// 处理distinct语句
if ((join->group_list.empty() && join->select_distinct) ||
is_agg_distinct) {
检查distinct后面的列字段，必须包含联合索引前缀列或者所有列
}
// 获取第一个不在GROUP BY的索引字段
first_non_group_part =
(cur_group_key_parts < actual_key_parts(cur_index_info))
? cur_index_info->key_part + cur_group_key_parts
: nullptr;
// 获取聚合函数的索引字段
first_non_infix_part = cur_min_max_arg_part
? (cur_min_max_arg_part < last_part)
? cur_min_max_arg_part
: nullptr
: nullptr;
// 接下来是对sql语句的各种检查，确认能否用group index skip scan，代码略，具体见表一
// 获取聚合函数列与第一个非GROUP BY列之间的差值
key_infix_parts = cur_key_infix_len
? (uint)(first_non_infix_part - first_non_group_part)
: 0;
// 获取GROUP BY列与聚合函数列之间的列个数
cur_used_key_parts = cur_group_key_parts + key_infix_parts;
// 计算range范围内行数
if (tree) {
cur_quick_prefix_records = check_quick_select();
}
// 计算rows和cost
cost_group_min_max();
}
// 最后生成AccessPath
AccessPath *path = new (param->return_mem_root) AccessPath;
path->type = AccessPath::GROUP_INDEX_SKIP_SCAN;
}
// 计算rows和cost，这里主要展示的是rows的计算公式
static void cost_group_min_max() {
table_records = table->file->stats.records;
keys_per_block = (table->file->stats.block_size / 2 /
(index_info->key_length + table->file->ref_length) +
1);
num_blocks = (uint)(table_records / keys_per_block) + 1;
if (index_info->has_records_per_key(group_key_parts - 1))
// Use index statistics
keys_per_group = index_info->records_per_key(group_key_parts - 1);
else
/* If there is no statistics try to guess */
keys_per_group = guess_rec_per_key(table, index_info, group_key_parts);
if (single_group)
num_groups = 1;
else {
num_groups = (uint)(table_records / keys_per_group) + 1;
if (range_tree && (quick_prefix_records != HA_POS_ERROR)) {
quick_prefix_selectivity =
(double)quick_prefix_records / (double)table_records;
num_groups = (uint)rint(num_groups * quick_prefix_selectivity);
num_groups = std::max(num_groups, 1U);
}
}
}
最后具体实现过程见代码GroupIndexSkipScanIterator::Read()，这里不再展开赘述。

复制代码

greatsql> explain SELECT c1,c2,MIN(c3) FROM t1 where c1>1 and c2=1 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+---------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+---------------------------------------+
| 1 | SIMPLE | t1 | NULL | range | i1_t1 | i1_t1 | 15 | NULL | 2 | 100.00 | Using where; Using index for group-by |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+---------------------------------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"index_dives_for_eq_ranges": true,
"ranges": [
"1 < c1"
],
"rows": 2, 用group index skip scan扫描满足条件c1>1 and c2=1的一共2条数据，计算见下面
"cost": 0.55
}
]
},
"best_group_range_summary": {
"type": "index_group",
"index": "i1_t1",
"group_attribute": "c3",
"min_aggregate": true,
"max_aggregate": false,
"distinct_aggregate": false,
"rows": 2,
"cost": 0.55,
"key_parts_used_for_access": [
"c1",
"c2",
"c3"
],
"ranges": [
"1 < c1"
],
"chosen": true
},
"skip_scan_range": {
"chosen": false,
"cause": "has_group_by"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "i1_t1",
"ranges": [
"1 < c1"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 80, 单独用索引i1_t1满足1 < c1条件的有80条
"cost": 8.31051,
"chosen": false,
"cause": "cost"
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"chosen_range_access_summary": {
"range_access_plan": {
"type": "index_group",
"index": "i1_t1",
"group_attribute": "c3",
"min_aggregate": true,
"max_aggregate": false,
"distinct_aggregate": false,
"rows": 2,
"cost": 0.55,
"key_parts_used_for_access": [
"c1",
"c2",
"c3"
],
"ranges": [
"1 < c1"
]
},
"rows_for_plan": 2,
"cost_for_plan": 0.55,
"chosen": true
}
}
/* 计算方式：
table_records = 160
keys_per_block = 391
num_blocks = (table_records / keys_per_block) + 1 = 160 / 391 + 1 = 1
keys_per_group = 80 -- group by字段分组后每组的行数
num_groups = (table_records / keys_per_group) + 1 = 160 / 80 + 1 = 3
quick_prefix_selectivity = quick_prefix_records / table_records = 80 / 160 = 0.5 -- 计算前缀选择系数
num_groups = num_groups * quick_prefix_selectivity = 3 * 0.5 = 2 */

复制代码

greatsql> explain SELECT distinct c1,c2 from t1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | range | i1_t1 | i1_t1 | 10 | NULL | 10 | 100.00 | Using index for group-by |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
"group_index_range": {
"distinct_query": true, -- 这里说明用到了distinct关键字
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"rows": 10,
"cost": 1.75
}
]
},
"best_group_range_summary": {
"type": "index_group",
"index": "i1_t1",
"group_attribute": null,
"min_aggregate": false,
"max_aggregate": false,
"distinct_aggregate": false,
"rows": 10,
"cost": 1.75,
"key_parts_used_for_access": [
"c1",
"c2"
],
"ranges": [
],
"chosen": true
},
-- 当然也支持聚合函数+distinct+group by的组合。
greatsql> explain SELECT c1,sum(distinct c2) from t1 group by c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | range | i1_t1 | i1_t1 | 10 | NULL | 10 | 100.00 | Using index for group-by |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+

复制代码

-- 查询条件既包含MIN/MAX列又包含非MIN/MAX列
greatsql> explain SELECT c1, MIN(c2) FROM t1 where c3>1 or c2<6 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 55.55 | Using where; Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
"best_covering_index_scan": {
"index": "i1_t1",
"cost": 17.4066,
"chosen": true
},
"setup_range_conditions": [
],
"range_scan_possible": false,
"cause": "condition_always_true",
"group_index_range": {
"chosen": false,
"cause": "minmax_keypart_in_disjunctive_query"
},
"skip_scan_range": {
"chosen": false,
"cause": "has_group_by"
}
-- group by字段是联合索引的第一列
greatsql> explain SELECT c2, MIN(c1) FROM t1 GROUP BY c2 ;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+------------------------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 100.00 | Using index; Using temporary |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+------------------------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "group_attribute_not_prefix_in_index"
}
]
}
-- 带有聚合函数的group by语句且没有条件，聚合函数列大于group by的列
greatsql> explain SELECT c1, MIN(c3) FROM t1 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 100.00 | Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "no_nongroup_keypart_predicate"
}
]
},
-- 带有聚合函数的group by语句的sql涉及聚合函数之后的列
greatsql> explain SELECT c1,MIN(c2) FROM t1 where c3=2 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 10.00 | Using where; Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "keypart_after_infix_in_query"
}
]
},
-- 带有聚合函数的group by语句的条件不是聚合函数列之前一个列的等号条件
greatsql> explain SELECT c1,c2,MIN(c3) FROM t1 where c3=2 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 10.00 | Using where; Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "non_equality_gap_attribute"
}
]
},
-- 聚合函数列小于等于group by字段
greatsql> explain SELECT c1,c2,MIN(c2) FROM t1 where c1=2 and c2=2 GROUP BY c1,c2;
+----+-------------+-------+------------+------+---------------+-------+---------+-------------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+-------+---------+-------------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | ref | i1_t1 | i1_t1 | 10 | const,const | 16 | 100.00 | Using index |
+----+-------------+-------+------------+------+---------------+-------+---------+-------------+------+----------+-------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "aggregate_column_not_suffix_in_idx"
}
]
},
-- 带有聚合函数的group by语句且没有条件，聚合函数列大于group by的列
greatsql> explain SELECT c1,c2,MIN(c3) FROM t1 GROUP BY c1;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 100.00 | Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+-------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "no_nongroup_keypart_predicate"
}
]
},
-- 没有聚合函数但是有group by语句的条件带有非select和group by的条件列
greatsql> explain SELECT c1,c2 FROM t1 where c1=1 or c3=1 GROUP BY c1,c2;
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | t1 | NULL | index | i1_t1 | i1_t1 | 15 | NULL | 160 | 55.00 | Using where; Using index |
+----+-------------+-------+------------+-------+---------------+-------+---------+------+------+----------+--------------------------+
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "i1_t1",
"covering": true,
"usable": false,
"cause": "keypart_reference_from_where_clause"
}
]
},

复制代码