ClickHouse: Support scalar expressions in WITH clause#2333
ClickHouse: Support scalar expressions in WITH clause#2333alrevuelta wants to merge 1 commit intoapache:mainfrom
Conversation
| #[test] | ||
| fn parse_with_clause_named_expression_unsupported_in_other_dialects() { | ||
| // The named-expression form is only enabled for ClickHouse; other | ||
| // dialects should still reject `WITH 42 AS answer …`. | ||
| let res = sqlparser::parser::Parser::parse_sql( | ||
| &GenericDialect {}, | ||
| "WITH 42 AS answer SELECT answer FROM t", | ||
| ); | ||
| assert!(res.is_err(), "expected parse error, got {res:?}"); | ||
| } |
There was a problem hiding this comment.
I think we can skip this, no need to test what the parser does for an invalid query for unsupported dialects
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression_ast() { |
There was a problem hiding this comment.
we can merge this with the test above since they cover the same feature
| #[test] | ||
| fn parse_with_clause_named_expression() { | ||
| // Plain literal scalar. | ||
| clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t"); |
There was a problem hiding this comment.
for the tests, lets use all_dialects_where instead of hardcoding it to clickhouse
| // CTE form must start with an identifier. If the leading token | ||
| // can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this | ||
| // is unambiguously the named-expression form. | ||
| if matches!(self.peek_token().token, Token::Word(_)) { | ||
| if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? { | ||
| return Ok(WithItem::Cte(cte)); | ||
| } | ||
| } |
There was a problem hiding this comment.
Can this be simplified to use peek_subquery_or_cte_start? e.g.
that we do if self.dialect.supports() and !self.peek_subquery_or_cte_start { parse expr } else { parse cte }
| /// Parse a single item in a `WITH` clause. | ||
| /// | ||
| /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`). | ||
| /// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`] | ||
| /// — currently only ClickHouse — also accept the reversed form | ||
| /// `<expression> AS <identifier>`, which can be freely interleaved with | ||
| /// CTEs in the same comma-separated list. |
There was a problem hiding this comment.
| /// Parse a single item in a `WITH` clause. | |
| /// | |
| /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`). | |
| /// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`] | |
| /// — currently only ClickHouse — also accept the reversed form | |
| /// `<expression> AS <identifier>`, which can be freely interleaved with | |
| /// CTEs in the same comma-separated list. | |
| /// Parse a single item in a `WITH` clause. |
| #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||
| pub enum WithItem { |
There was a problem hiding this comment.
repr wise can we do something like?
pub enum WithExpression {
Cte(...),
Cse(ExprWithAlias), // can we reuse exprwithalias?
}| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||
| pub enum WithItem { | ||
| /// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`. |
There was a problem hiding this comment.
| /// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`. | |
| /// A common table expression. |
| /// `<expr> AS <alias>` — binds an expression (literal, scalar subquery, | ||
| /// lambda, …) to a name visible in the surrounding query. | ||
| /// | ||
| /// See ClickHouse's [common scalar expressions][1]. | ||
| /// | ||
| /// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions |
There was a problem hiding this comment.
| /// `<expr> AS <alias>` — binds an expression (literal, scalar subquery, | |
| /// lambda, …) to a name visible in the surrounding query. | |
| /// | |
| /// See ClickHouse's [common scalar expressions][1]. | |
| /// | |
| /// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions | |
| /// A common scalar expression. | |
| /// | |
| /// [Clickhouse]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions |
| pub cte_tables: Vec<Cte>, | ||
| /// The items declared by this `WITH` clause: traditional CTEs and, | ||
| /// for dialects that support it, named expressions. | ||
| pub items: Vec<WithItem>, |
There was a problem hiding this comment.
| pub items: Vec<WithItem>, | |
| pub exprs: Vec<WithExpression>, |
| /// The items declared by this `WITH` clause: traditional CTEs and, | ||
| /// for dialects that support it, named expressions. |
There was a problem hiding this comment.
| /// The items declared by this `WITH` clause: traditional CTEs and, | |
| /// for dialects that support it, named expressions. | |
| /// The expressions declared by this `WITH` clause. |
Fixes #2221.
ClickHouse's
WITHclause supports a reversed-order form<expression> AS <identifier>(alongside, and freely interleaved with, traditional CTEs). Currently the parser hard-errors on the leading non-identifier token:Changes
WithItemenum (Cte(Cte)|Named { expr, alias });With::cte_tables: Vec<Cte>becomesWith::items: Vec<WithItem>.Dialect::supports_with_clause_scalar_expression(defaultfalse,trueforClickHouseDialectonly).parse_with_itemroutes each WITH-list element: when the leading token can't begin a CTE name, it parses<expr> AS <ident>directly; otherwise it tries the CTE form viamaybe_parseand falls back.parse_cteitself is unchanged.Tests
Added in
tests/sqlparser_clickhouse.rs, covering: literal scalar (the issue's example), string literal, aggregate, scalar subquery, bare-identifier disambiguation, mixing scalar + CTE in one list, lambda, and a negative test confirming non-ClickHouse dialects still reject the form.