apache
diff --git a/‎docs/sql-manual/sql-functions/aggregate-functions/regr-avgx.md‎
Lines changed: 64 additions & 0 deletions b/‎docs/sql-manual/sql-functions/aggregate-functions/regr-avgx.md‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎docs/sql-manual/sql-functions/aggregate-functions/regr-avgy.md‎
Lines changed: 64 additions & 0 deletions b/‎docs/sql-manual/sql-functions/aggregate-functions/regr-avgy.md‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎docs/sql-manual/sql-functions/aggregate-functions/regr-count.md‎
Lines changed: 66 additions & 0 deletions b/‎docs/sql-manual/sql-functions/aggregate-functions/regr-count.md‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎docs/sql-manual/sql-functions/aggregate-functions/regr-intercept.md‎
Lines changed: 22 additions & 38 deletions b/‎docs/sql-manual/sql-functions/aggregate-functions/regr-intercept.md‎
Lines changed: 22 additions & 38 deletions
diff --git a/‎docs/sql-manual/sql-functions/aggregate-functions/regr-r2.md‎
Lines changed: 75 additions & 0 deletions b/‎docs/sql-manual/sql-functions/aggregate-functions/regr-r2.md‎
Lines changed: 75 additions & 0 deletions
@@ -0,0 +1,64 @@
+---
+{
+    "title": "REGR_AVGX",
+    "language": "en",
+    "description": "Returns the average of the independent variable (x) for non-null pairs in a group."
+}
+---
+
+## Description
+
+Returns the average of the independent variable `x` over non-null `(y, x)` pairs in a group, where `x` is the independent variable and `y` is the dependent variable.
+
+## Syntax
+
+```sql
+REGR_AVGX(<y>, <x>)
+```
+
+## Parameters
+
+| Parameter | Description |
+| -- | -- |
+| `<y>` | The dependent variable. Supported type: Double. |
+| `<x>` | The independent variable. Supported type: Double. |
+
+## Return Value
+
+Returns a Double value representing the average of `x` for non-null `(y, x)` pairs.
+If there are no rows in the group, or all rows contain NULLs for the expressions, the function returns `NULL`.
+
+## Example
+
+```sql
+CREATE TABLE test_regr (
+  `id` int,
+  `x` double,
+  `y` double
+) DUPLICATE KEY (`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+PROPERTIES (
+  "replication_allocation" = "tag.location.default: 1"
+);
+
+INSERT INTO test_regr VALUES
+(1, 0, NULL),
+(2, 1, 3),
+(2, 2, 5),
+(2, 3, 7),
+(2, 4, 9),
+(2, 5, NULL);
+```
+
+```sql
+SELECT id, REGR_AVGX(y, x) FROM test_regr GROUP BY id ORDER BY id;
+```
+
+```text
++------+--------------------+
+| id   | REGR_AVGX(y, x)    |
++------+--------------------+
+|    1 |               NULL |
+|    2 |                2.5 |
++------+--------------------+
+```
@@ -0,0 +1,64 @@
+---
+{
+    "title": "REGR_AVGY",
+    "language": "en",
+    "description": "Returns the average of the dependent variable (y) for non-null pairs in a group."
+}
+---
+
+## Description
+
+Returns the average of the dependent variable `y` over non-null `(y, x)` pairs in a group, where `x` is the independent variable and `y` is the dependent variable.
+
+## Syntax
+
+```sql
+REGR_AVGY(<y>, <x>)
+```
+
+## Parameters
+
+| Parameter | Description |
+| -- | -- |
+| `<y>` | The dependent variable. Supported type: Double. |
+| `<x>` | The independent variable. Supported type: Double. |
+
+## Return Value
+
+Returns a Double value representing the average of `y` for non-null `(y, x)` pairs.
+If there are no rows in the group, or all rows contain NULLs for the expressions, the function returns `NULL`.
+
+## Example
+
+```sql
+CREATE TABLE test_regr (
+  `id` int,
+  `x` double,
+  `y` double
+) DUPLICATE KEY (`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+PROPERTIES (
+  "replication_allocation" = "tag.location.default: 1"
+);
+
+INSERT INTO test_regr VALUES
+(1, 0, NULL),
+(2, 1, 3),
+(2, 2, 5),
+(2, 3, 7),
+(2, 4, 9),
+(2, 5, NULL);
+```
+
+```sql
+SELECT id, REGR_AVGY(y, x) FROM test_regr GROUP BY id ORDER BY id;
+```
+
+```text
++------+------------------+
+| id   | REGR_AVGY(y, x)  |
++------+------------------+
+|    1 |             NULL |
+|    2 |              6.0 |
++------+------------------+
+```
@@ -0,0 +1,66 @@
+---
+{
+    "title": "REGR_COUNT",
+    "language": "en",
+    "description": "Returns the number of non-null (y, x) pairs in a group."
+}
+---
+
+## Description
+
+Returns the number of non-null `(y, x)` pairs in a group, where `x` is the independent variable and `y` is the dependent variable. If there are no valid non-null pairs, the function returns `0`.
+
+## Syntax
+
+```sql
+REGR_COUNT(<y>, <x>)
+```
+
+## Parameters
+
+| Parameter | Description |
+| -- | -- |
+| `<y>` | The dependent variable. Supported type: Double. |
+| `<x>` | The independent variable. Supported type: Double. |
+
+## Return Value
+
+Returns a BIGINT value representing the number of non-null `(y, x)` pairs.
+If there are no rows in the group, or there are no valid non-null `(y, x)` pairs, the function returns `0`.
+
+## Example
+
+```sql
+CREATE TABLE test_regr (
+  `id` int,
+  `x` double,
+  `y` double
+) DUPLICATE KEY (`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+PROPERTIES (
+  "replication_allocation" = "tag.location.default: 1"
+);
+
+INSERT INTO test_regr VALUES
+(1, 0, NULL),
+(2, 1, 3),
+(2, 2, 5),
+(2, 3, 7),
+(2, 4, 9),
+(2, 5, NULL);
+```
+
+```sql
+SELECT id, REGR_COUNT(y, x) FROM test_regr GROUP BY id ORDER BY id;
+```
+
+```text
++------+-------------------+
+| id   | REGR_COUNT(y, x)  |
++------+-------------------+
+|    1 |                 0 |
+|    2 |                 4 |
++------+-------------------+
+```
+
+REGR_COUNT counts only non-null `(y, x)` pairs, so group 1 returns `0`.
@@ -2,17 +2,13 @@
 {
     "title": "REGR_INTERCEPT",
     "language": "en",
-    "description": "Returns the intercept of the univariate linear regression line for non-null pairs in a group."
+    "description": "Returns the intercept of the linear regression line for non-null pairs in a group."
 }
 ---
 
 ## Description
 
-Returns the intercept of the univariate linear regression line for non-null pairs in a group. It is computed for non-null pairs using the following formula:
-
-`AVG(y) - REGR_SLOPE(y, x) * AVG(x)`
-
-Where `x` is the independent variable and y is the dependent variable.
+Returns the intercept of the linear regression line computed over non-null `(y, x)` pairs in a group, where `x` is the independent variable and `y` is the dependent variable. It is equivalent to `AVG(y) - REGR_SLOPE(y, x) * AVG(x)`.
 
 ## Syntax
 
@@ -29,52 +25,40 @@ REGR_INTERCEPT(<y>, <x>)
 
 ## Return Value
 
-Returns a Double value representing the intercept of the univariate linear regression line for non-null pairs in a group. If there are no rows, or only rows that contain nulls, the function returns NULL.
+Returns a Double value representing the intercept of the linear regression line.
+If there are no rows in the group, or all rows contain NULLs for the expressions, the function returns `NULL`.
 
-## Examples
+## Example
 
 ```sql
--- Create sample table
-CREATE TABLE test_regr_intercept (
+CREATE TABLE test_regr (
   `id` int,
-  `x` int,
-  `y` int
+  `x` double,
+  `y` double
 ) DUPLICATE KEY (`id`)
 DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
 PROPERTIES (
   "replication_allocation" = "tag.location.default: 1"
 );
 
--- Insert sample data
-INSERT INTO test_regr_intercept VALUES
-(1, 18, 13),
-(2, 14, 27),
-(3, 12, 2),
-(4, 5, 6),
-(5, 10, 20);
-
--- Calculate the linear regression intercept of x and y
-SELECT REGR_INTERCEPT(y, x) FROM test_regr_intercept;
-```
-
-```text
-+----------------------+
-| REGR_INTERCEPT(y, x) |
-+----------------------+
-|    5.512931034482759 |
-+----------------------+
+INSERT INTO test_regr VALUES
+(1, 0, NULL),
+(2, 1, 3),
+(2, 2, 5),
+(2, 3, 7),
+(2, 4, 9),
+(2, 5, NULL);
 ```
 
 ```sql
-SELECT REGR_INTERCEPT(y, x) FROM test_regr_intercept where x>100;
+SELECT id, REGR_INTERCEPT(y, x) FROM test_regr GROUP BY id ORDER BY id;
 ```
 
-When there are no rows in the group, the function returns `NULL`.
-
 ```text
-+----------------------+
-| REGR_INTERCEPT(y, x) |
-+----------------------+
-|                 NULL |
-+----------------------+
++------+------------------------+
+| id   | REGR_INTERCEPT(y, x)   |
++------+------------------------+
+|    1 |                   NULL |
+|    2 |                    1.0 |
++------+------------------------+
 ```
@@ -0,0 +1,75 @@
+---
+{
+    "title": "REGR_R2",
+    "language": "en",
+    "description": "Returns the coefficient of determination of the linear regression for non-null pairs in a group."
+}
+---
+
+## Description
+
+Returns the coefficient of determination of the linear regression computed over non-null `(y, x)` pairs in a group, where `x` is the independent variable and `y` is the dependent variable.
+
+## Syntax
+
+```sql
+REGR_R2(<y>, <x>)
+```
+
+## Parameters
+
+| Parameter | Description |
+| -- | -- |
+| `<y>` | The dependent variable. Supported type: Double. |
+| `<x>` | The independent variable. Supported type: Double. |
+
+## Return Value
+
+Returns a Double value representing the coefficient of determination (R-squared).
+- If `REGR_COUNT(y, x) < 1`, the function returns `NULL`.
+- If `VAR_POP(x) = 0`, the function returns `NULL`.
+- If `VAR_POP(y) = 0`, the function returns `1`.
+- Otherwise, the function returns `POWER(CORR(y, x), 2)`.
+
+## Example
+
+```sql
+CREATE TABLE test_regr (
+  `id` int,
+  `x` double,
+  `y` double
+) DUPLICATE KEY (`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+PROPERTIES (
+  "replication_allocation" = "tag.location.default: 1"
+);
+
+INSERT INTO test_regr VALUES
+(1, 0, NULL),
+(2, 1, 3),
+(2, 2, 5),
+(2, 3, 7),
+(2, 4, 9),
+(2, 5, NULL),
+(3, 1, 5),
+(3, 1, 7),
+(4, 1, 5),
+(4, 2, 5);
+```
+
+```sql
+SELECT id, REGR_R2(y, x) FROM test_regr GROUP BY id ORDER BY id;
+```
+
+```text
++------+---------------------+
+| id   | REGR_R2(y, x)       |
++------+---------------------+
+|    1 |                NULL |
+|    2 |                 1.0 |
+|    3 |                NULL |
+|    4 |                 1.0 |
++------+---------------------+
+```
+
+Group 3 shows the `VAR_POP(x) = 0` case, so the result is `NULL`, and group 4 shows the `VAR_POP(y) = 0` case, so the result is `1.0`.