diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c760b19db5..b0f9b2da3d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -242,6 +242,9 @@ static char *convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure); static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure); +static Node *strip_all_phvs_deep(PlannerInfo *root, Node *node); +static bool contain_placeholder_walker(Node *node, void *context); +static Node *strip_all_phvs_mutator(Node *node, void *context); static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, @@ -5593,8 +5596,8 @@ ReleaseDummy(HeapTuple tuple) * varRelid: see specs for restriction selectivity functions * * Outputs: *vardata is filled as follows: - * var: the input expression (with any binary relabeling stripped, if - * it is or contains a variable; but otherwise the type is preserved) + * var: the input expression (with any phvs or binary relabeling stripped, + * if it is or contains a variable; but otherwise unchanged) * rel: RelOptInfo for relation containing variable; NULL if expression * contains no Vars (NOTE this could point to a RelOptInfo of a * subquery, not one in the current query). @@ -5632,22 +5635,31 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, /* Save the exposed type of the expression */ vardata->vartype = exprType(node); - /* Look inside any binary-compatible relabeling */ + /* + * PlaceHolderVars are transparent for the purpose of statistics lookup; + * they do not alter the value distribution of the underlying expression. + * However, they can obscure the structure, preventing us from recognizing + * matches to base columns, index expressions, or extended statistics. So + * strip them out first. + */ + basenode = strip_all_phvs_deep(root, node); - if (IsA(node, RelabelType)) - basenode = (Node *) ((RelabelType *) node)->arg; - else - basenode = node; + /* + * Look inside any binary-compatible relabeling. We need to handle nested + * RelabelType nodes here, because the prior stripping of PlaceHolderVars + * may have brought separate RelabelTypes into adjacency. + */ + while (IsA(basenode, RelabelType)) + basenode = (Node *) ((RelabelType *) basenode)->arg; /* Fast path for a simple Var */ - if (IsA(basenode, Var) && (varRelid == 0 || varRelid == ((Var *) basenode)->varno)) { Var *var = (Var *) basenode; /* Set up result fields other than the stats tuple */ - vardata->var = basenode; /* return Var without relabeling */ + vardata->var = basenode; /* return Var without phvs or relabeling */ vardata->rel = find_base_rel(root, var->varno); vardata->atttype = var->vartype; vardata->atttypmod = var->vartypmod; @@ -5684,7 +5696,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, { onerel = find_base_rel(root, relid); vardata->rel = onerel; - node = basenode; /* strip any relabeling */ + node = basenode; /* strip any phvs or relabeling */ } /* else treat it as a constant */ } @@ -5695,13 +5707,13 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, { /* treat it as a variable of a join relation */ vardata->rel = find_join_rel(root, varnos); - node = basenode; /* strip any relabeling */ + node = basenode; /* strip any phvs or relabeling */ } else if (bms_is_member(varRelid, varnos)) { /* ignore the vars belonging to other relations */ vardata->rel = find_base_rel(root, varRelid); - node = basenode; /* strip any relabeling */ + node = basenode; /* strip any phvs or relabeling */ /* note: no point in expressional-index search here */ } /* else treat it as a constant */ @@ -5934,6 +5946,63 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, bms_free(varnos); } +/* + * strip_all_phvs_deep + * Deeply strip all PlaceHolderVars in an expression. + + * As a performance optimization, we first use a lightweight walker to check + * for the presence of any PlaceHolderVars. The expensive mutator is invoked + * only if a PlaceHolderVar is found, avoiding unnecessary memory allocation + * and tree copying in the common case where no PlaceHolderVars are present. + */ +static Node * +strip_all_phvs_deep(PlannerInfo *root, Node *node) +{ + /* If there are no PHVs anywhere, we needn't work hard */ + if (root->glob->lastPHId == 0) + return node; + + if (!contain_placeholder_walker(node, NULL)) + return node; + return strip_all_phvs_mutator(node, NULL); +} + +/* + * contain_placeholder_walker + * Lightweight walker to check if an expression contains any + * PlaceHolderVars + */ +static bool +contain_placeholder_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, PlaceHolderVar)) + return true; + + return expression_tree_walker(node, contain_placeholder_walker, context); +} + +/* + * strip_all_phvs_mutator + * Mutator to deeply strip all PlaceHolderVars + */ +static Node * +strip_all_phvs_mutator(Node *node, void *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, PlaceHolderVar)) + { + /* Strip it and recurse into its contained expression */ + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + return strip_all_phvs_mutator((Node *) phv->phexpr, context); + } + + return expression_tree_mutator(node, strip_all_phvs_mutator, context); +} + /* * examine_simple_variable * Handle a simple Var for examine_variable diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index edde9e9989..1416f2943b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6877,10 +6877,10 @@ where ss.a = ss.phv and f1 = 0; QUERY PLAN ------------------------------------ Nested Loop - -> Seq Scan on int4_tbl - Filter: (f1 = 0) -> Seq Scan on parttbl1 parttbl Filter: (a = 12) + -> Seq Scan on int4_tbl + Filter: (f1 = 0) (5 rows) select * from @@ -9879,6 +9879,29 @@ GROUP BY s.c1, s.c2; (7 rows) DROP TABLE group_tbl; +-- Test that we ignore PlaceHolderVars when looking up statistics +EXPLAIN (COSTS OFF) +SELECT t1.unique1 FROM tenk1 t1 LEFT JOIN + (SELECT *, 42 AS phv FROM tenk1 t2) ss ON t1.unique2 = ss.unique2 +WHERE ss.unique1 = ss.phv AND t1.unique1 < 100; + QUERY PLAN +-------------------------------------------------- + Nested Loop + -> Seq Scan on tenk1 t2 + Filter: (unique1 = 42) + -> Index Scan using tenk1_unique2 on tenk1 t1 + Index Cond: (unique2 = t2.unique2) + Filter: (unique1 < 100) +(6 rows) + +SELECT t1.unique1 FROM tenk1 t1 LEFT JOIN + (SELECT *, 42 AS phv FROM tenk1 t2) ss ON t1.unique2 = ss.unique2 +WHERE ss.unique1 = ss.phv AND t1.unique1 < 100; + unique1 +--------- + 42 +(1 row) + -- -- Test for a nested loop join involving index scan, transforming OR-clauses -- to SAOP. diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 7ec84f3b14..b91fb7574d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -3764,6 +3764,16 @@ GROUP BY s.c1, s.c2; DROP TABLE group_tbl; +-- Test that we ignore PlaceHolderVars when looking up statistics +EXPLAIN (COSTS OFF) +SELECT t1.unique1 FROM tenk1 t1 LEFT JOIN + (SELECT *, 42 AS phv FROM tenk1 t2) ss ON t1.unique2 = ss.unique2 +WHERE ss.unique1 = ss.phv AND t1.unique1 < 100; + +SELECT t1.unique1 FROM tenk1 t1 LEFT JOIN + (SELECT *, 42 AS phv FROM tenk1 t2) ss ON t1.unique2 = ss.unique2 +WHERE ss.unique1 = ss.phv AND t1.unique1 < 100; + -- -- Test for a nested loop join involving index scan, transforming OR-clauses -- to SAOP.