【GreatSQL优化器-17】DYNAMIC RANGE
【GreatSQL优化器-17】DYNAMIC RANGE一、DYNAMIC RANGE介绍
GreatSQL 的优化器有一种扫描方式是动态范围扫描方式,类似于“已读乱回”模式,这种模式是在表有多个索引的情况下,对驱动表连接的时候部门选择索引的情况。优化器没有找到好的索引可以使用,但发现在知道前面表的列值后,大概会使用某些索引。对于前面表中的每个行组合,优化器查抄是否可以使用 range 或 index merge 访问方法来检索行。虽然这不是很快,但比执行完全没有索引的连接要快。
下面用一个简朴的例子来说明直方图怎么应用在优化器。
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT,date1 DATETIME);
INSERT INTO t1 VALUES (1,10,'2021-03-25 16:44:00.123456'),(2,1,'2022-03-26 16:44:00.123456'),(3,4,'2023-03-27 16:44:00.123456'),(5,5,'2024-03-25 16:44:00.123456'),(7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456');
CREATE TABLE t2 (cc1 INT PRIMARY KEY, cc2 INT);
INSERT INTO t2 VALUES (1,3),(2,1),(3,2),(4,3),(5,15);
CREATE TABLE t3 (ccc1 INT, ccc2 varchar(100));
INSERT INTO t3 VALUES (1,'aa1'),(2,'bb1'),(3,'cc1'),(4,'dd1'),(null,'ee');
CREATE INDEX idx1 ON t1(c2);
CREATE INDEX idx2 ON t1(c2,date1);
CREATE INDEX idx2_1 ON t2(cc2);
CREATE INDEX idx3_1 ON t3(ccc1);
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key| key_len | ref| rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
|1 | SIMPLE | t3 | NULL | ALL| idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
|1 | SIMPLE | t1 | NULL | ALL| PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) |
-- 这里的结果出现了Range checked,说明进行了DYNAMIC_RANGE
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
greatsql> SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+------+---------------------+------+------+
| c1 | c2 | date1 | ccc1 | ccc2 |
+----+------+---------------------+------+------+
|1 | 10 | 2021-03-25 16:44:00 | 1 | aa1|
|2 | 1 | 2022-03-26 16:44:00 | 1 | aa1|
|3 | 4 | 2023-03-27 16:44:00 | 1 | aa1|
|2 | 1 | 2022-03-26 16:44:00 | 2 | bb1|
|3 | 4 | 2023-03-27 16:44:00 | 2 | bb1|
|2 | 1 | 2022-03-26 16:44:00 | 3 | cc1|
|3 | 4 | 2023-03-27 16:44:00 | 3 | cc1|
|2 | 1 | 2022-03-26 16:44:00 | 4 | dd1|
|3 | 4 | 2023-03-27 16:44:00 | 4 | dd1|
|2 | 1 | 2022-03-26 16:44:00 | NULL | ee |
|3 | 4 | 2023-03-27 16:44:00 | NULL | ee |
+----+------+---------------------+------+------+
greatsql> SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
"attaching_conditions_to_tables": {
"original_condition": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))",
"attached_conditions_computation": [
{
"table": "`t1`",
"rechecking_index_usage": { -- 这里对t1进行了recheck
"recheck_reason": "not_first_table",
"range_analysis": {
"table_scan": {
"rows": 7,
"cost": 3.05
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"c1"
]
},
{
"index": "idx1",
"usable": true,
"key_parts": [
"c2",
"c1"
]
},
{
"index": "idx2",
"usable": true,
"key_parts": [
"c2",
"date1",
"c1"
]
}
],
"best_covering_index_scan": {
"index": "idx2",
"cost": 0.952742,
"chosen": true
},
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_single_table"
},
"skip_scan_range": {
"chosen": false,
"cause": "not_single_table"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"analyzing_index_merge_union": [
{
"indexes_to_merge": [
{
"range_scan_alternatives": [
{
"index": "PRIMARY",
"chosen": false,
"cause": "depends_on_unread_values"
}
],
"chosen": false,
"cause": "cost"
},
{
"range_scan_alternatives": [
{
"index": "idx1",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460274,
"chosen": true
},
{
"index": "idx2",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460457,
"chosen": false,
"cause": "cost"
}
],
"chosen": false,
"cause": "cost"
}
],
"cost_of_reading_ranges": 0,
"chosen": false,
"cause": "cost"
}
]
}
}
}
],
"attached_conditions_summary": [
{
"table": "`t3`",
"attached": null
},
{
"table": "`t1`",
"attached": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))"
}
]
}
"join_execution": {
"select#": 1,
"steps": [
{
"rows_estimation_per_outer_row": { -- 这里只截取t3读第一行时候t1的索引选择
"table": "`t1`",
"range_analysis": {
"table_scan": {
"rows": 7,
"cost": 3.05
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"c1"
]
},
{
"index": "idx1",
"usable": true,
"key_parts": [
"c2",
"c1"
]
},
{
"index": "idx2",
"usable": true,
"key_parts": [
"c2",
"date1",
"c1"
]
}
],
"best_covering_index_scan": {
"index": "idx2",
"cost": 0.952742,
"chosen": true
},
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_single_table"
},
"skip_scan_range": {
"chosen": false,
"cause": "not_single_table"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"analyzing_index_merge_union": [
{
"indexes_to_merge": [
{
"range_scan_alternatives": [
{
"index": "PRIMARY",
"ranges": [
"c1 = 1"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 1,
"cost": 0.36,
"chosen": true
}
],
"index_to_merge": "PRIMARY",
"cumulated_cost": 0.36
},
{
"range_scan_alternatives": [
{
"index": "idx1",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460274,
"chosen": true
},
{
"index": "idx2",
"ranges": [
"NULL < c2 < 5"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": true,
"in_memory": 1,
"rows": 2,
"cost": 0.460457,
"chosen": false,
"cause": "cost"
}
],
"index_to_merge": "idx1",
"cumulated_cost": 0.820274
}
],
"cost_of_reading_ranges": 0.820274,
"cost_of_mapping_rowid_in_non_clustered_pk_scan": 0.1,
"cost_sort_rowid_and_read_disk": 0.4375,
"use_roworder_index_merge": true, -- 根据上面结果选择了主键合并idx1的结果
"cause": "cost"
}
]
}
}
},
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5;
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key| key_len | ref| rows | filtered | Extra |
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
|1 | SIMPLE | t3 | NULL | ALL| idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL |
|1 | SIMPLE | t1 | NULL | ALL| PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 42.85 | Range checked for each record (index map: 0x7) | -- 这里0x7指的是t1表的3条索引
+----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+
-- 驱动表t3一共5条数据,每行记录都进行了一次test_quick_select()来查找t1是否有可用的更优索引表一:必要举行 recheck 的原因
场景条件非join表连接的第一张表存在针对这张表的条件连接的第一张表没有第一张表的列与常量值的比较,limit值小于预估的满足条件的行数表二:quick_type 范例
范例说明QS_NONE不使用快速扫描QS_RANGE使用范围扫描QS_DYNAMIC_RANGE使用动态范围表三:表用 QS_DYNAMIC_RANGE 的场合
条件(以下必须全部满足)说明条件列存在索引有机会天生mm treekeyuse_array数组没有值有值的话执行REF扫描,一般有OR条件时候导致该数组没有值表没有对应的mm tree无法天生mm tree的条件这张表不是join连接的第一张表对表执行recheckrecheck的原因见表一recheck以后天生AccessPath但是找到的范围老手数大于等于100行三、实际例子说明
接下来看几个例子来说明上面的代码:
static bool make_join_query_block(JOIN *join, Item *cond) {
// 满足以下条件的需要进行RECHECK,要么是非第一张表要么是有limit语句
if ((tab->type() == JT_ALL || tab->type() == JT_RANGE ||
tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) &&
tab->use_quick != QS_RANGE) {
if (cond && // 1a
(tab->keys() != tab->const_keys) &&// 1b
(i > 0 || // 1c
(join->query_block->master_query_expression()->item &&
cond->is_outer_reference())))
recheck_reason = NOT_FIRST_TABLE;
else if (!tab->const_keys.is_clear_all() &&// 2a
i == join->const_tables && // 2b
(join->query_expression()->select_limit_cnt <
(tab->position()->rows_fetched *
tab->position()->filter_effect)) &&// 2c
!join->calc_found_rows) // 2d
recheck_reason = LOW_LIMIT;
// 满足这个if条件的执行QS_DYNAMIC_RANGE,详情见下面几张表
if (!tab->table()->quick_keys.is_subset(tab->checked_keys) ||
!tab->needed_reg.is_subset(tab->checked_keys)) {
tab->keys().merge(tab->table()->quick_keys);
tab->keys().merge(tab->needed_reg);
if (!tab->needed_reg.is_clear_all() &&
(tab->table()->quick_keys.is_clear_all() ||
(tab->range_scan() &&
(tab->range_scan()->num_output_rows() >= 100.0)))) {
tab->use_quick = QS_DYNAMIC_RANGE;
tab->set_type(JT_ALL);
} else
tab->use_quick = QS_RANGE;
}
}
}
// 实际使用代码
bool DynamicRangeIterator::Init() {
// 每次迭代器开始的时候先获取最佳索引组合和AccessPath
AccessPath *range_scan;
int rc = test_quick_select(&range_scan);
}四、总结
从上面关于 DYNAMIC RANGE 的解释我们知道这个状态必要经过一系列判断,并且只在特定条件才有大概出现,但是每行举行一次索引判断照旧很消耗资源的,最好照旧直接走索引,以是要只管避免这种情况,适当的时候可以举行 SQL 下令干预。
Enjoy GreatSQL
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
页:
[1]