Skip to content

Commit 9b29766

Browse files
committed
Minor changes to make script idempotent
1 parent e55d245 commit 9b29766

3 files changed

Lines changed: 53 additions & 43 deletions

File tree

samples/features/sql-big-data-cluster/bootstrap-sample-db.sql

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,12 @@ AS
6464
GO
6565

6666
-- Create table for storing the machine learning models
67-
CREATE TABLE sales_models (
68-
model_name varchar(100) NOT NULL PRIMARY KEY,
69-
model varbinary(max) NOT NULL,
70-
model_native varbinary(max) NOT NULL,
71-
created_by nvarchar(300) NOT NULL DEFAULT(SYSTEM_USER),
72-
create_time datetime2 NOT NULL DEFAULT(SYSDATETIME())
73-
);
67+
IF NOT EXISTS(SELECT * FROM sys.tables WHERE name = 'sales_models')
68+
CREATE TABLE sales_models (
69+
model_name varchar(100) NOT NULL PRIMARY KEY,
70+
model varbinary(max) NOT NULL,
71+
model_native varbinary(max) NOT NULL,
72+
created_by nvarchar(300) NOT NULL DEFAULT(SYSTEM_USER),
73+
create_time datetime2 NOT NULL DEFAULT(SYSDATETIME())
74+
);
7475
GO

samples/features/sql-big-data-cluster/data-pool/data-ingestion-spark.sql

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ GO
66
-- any new database in SQL Master instance. This is used to reference the
77
-- data pool in a SQL Server 2019 big data cluster.
88
--
9-
CREATE EXTERNAL TABLE [web_clickstreams_spark_results]
10-
("wcs_click_date_sk" BIGINT , "wcs_click_time_sk" BIGINT , "wcs_sales_sk" BIGINT , "wcs_item_sk" BIGINT , "wcs_web_page_sk" BIGINT , "wcs_user_sk" BIGINT)
11-
WITH
12-
(
13-
DATA_SOURCE = SqlDataPool,
14-
DISTRIBUTION = ROUND_ROBIN
15-
);
9+
IF NOT EXISTS(SELECT * FROM sys.external_tables WHERE name = 'web_clickstreams_spark_results')
10+
CREATE EXTERNAL TABLE [web_clickstreams_spark_results]
11+
("wcs_click_date_sk" BIGINT , "wcs_click_time_sk" BIGINT , "wcs_sales_sk" BIGINT , "wcs_item_sk" BIGINT , "wcs_web_page_sk" BIGINT , "wcs_user_sk" BIGINT)
12+
WITH
13+
(
14+
DATA_SOURCE = SqlDataPool,
15+
DISTRIBUTION = ROUND_ROBIN
16+
);
1617

1718
-- Data can be ingested into the external table from a spark job.
1819
--

samples/features/sql-big-data-cluster/data-pool/data-ingestion-sql.sql

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,61 @@ GO
66
-- any new database in SQL Master instance. This is used to reference the
77
-- data pool in a SQL Server 2019 big data cluster.
88
--
9-
CREATE EXTERNAL TABLE [web_clickstreams_dp]
10-
("wcs_click_date_sk" BIGINT , "wcs_click_time_sk" BIGINT , "wcs_sales_sk" BIGINT , "wcs_item_sk" BIGINT , "wcs_web_page_sk" BIGINT , "wcs_user_sk" BIGINT)
11-
WITH
12-
(
13-
DATA_SOURCE = SqlDataPool,
14-
DISTRIBUTION = ROUND_ROBIN
15-
);
9+
IF NOT EXISTS(SELECT * FROM sys.external_tables WHERE name = 'web_clickstream_clicks_data_pool')
10+
CREATE EXTERNAL TABLE [web_clickstream_clicks_data_pool]
11+
("wcs_user_sk" BIGINT , "i_category_id" BIGINT , "clicks" BIGINT)
12+
WITH
13+
(
14+
DATA_SOURCE = SqlDataPool,
15+
DISTRIBUTION = ROUND_ROBIN
16+
);
1617
GO
1718
-- Currently the create external table operation is asynchronous and there is no
1819
-- way to determine completion of the operation. To prevent failures of the insert
1920
-- into the external table, wait for few minutes.
2021
WAITFOR DELAY '00:02:00';
2122
GO
22-
-- Insert results of a SELECT statement into the external table created on the data pool
23+
-- Insert results of a SELECT statement into the external table created on the data pool.
24+
-- Store summary results for quick access instead of going to the source tables always.
2325
--
2426
DECLARE @db_name SYSNAME = 'sales'
2527
DECLARE @schema_name SYSNAME = 'dbo'
26-
DECLARE @table_name SYSNAME = 'web_clickstreams_dp'
27-
DECLARE @query SYSNAME = 'SELECT TOP(1000) * FROM sales.dbo.web_clickstreams WHERE wcs_user_sk IS NOT NULL'
28-
28+
DECLARE @table_name SYSNAME = 'web_clickstream_clicks_data_pool'
29+
DECLARE @query NVARCHAR(MAX) = '
30+
SELECT wcs_user_sk, i_category_id, COUNT_BIG(*) as clicks
31+
FROM sales.dbo.web_clickstreams
32+
INNER JOIN sales.dbo.item it ON (wcs_item_sk = i_item_sk
33+
AND wcs_user_sk IS NOT NULL)
34+
GROUP BY wcs_user_sk, i_category_id
35+
HAVING COUNT_BIG(*) > 100;
36+
'
2937
EXEC model..sp_data_pool_table_insert_data @db_name, @schema_name, @table_name, @query
3038
GO
3139

3240
-- Query data inserted from sp_data_pool_table_insert_data
3341
--
34-
SELECT count(*) FROM [dbo].[web_clickstreams_dp]
35-
SELECT TOP 10 * FROM [dbo].[web_clickstreams_dp]
42+
SELECT count(*) FROM [dbo].[web_clickstream_clicks_data_pool]
43+
SELECT TOP 10 * FROM [dbo].[web_clickstream_clicks_data_pool]
3644

3745
-- Join external table with local tables
3846
--
3947
SELECT TOP (100)
40-
wcs_user_sk,
41-
SUM( CASE WHEN i_category = 'Books' THEN 1 ELSE 0 END) AS book_category_clicks,
42-
SUM( CASE WHEN i_category_id = 1 THEN 1 ELSE 0 END) AS [Home & Kitchen],
43-
SUM( CASE WHEN i_category_id = 2 THEN 1 ELSE 0 END) AS [Music],
44-
SUM( CASE WHEN i_category_id = 3 THEN 1 ELSE 0 END) AS [Books],
45-
SUM( CASE WHEN i_category_id = 4 THEN 1 ELSE 0 END) AS [Clothing & Accessories],
46-
SUM( CASE WHEN i_category_id = 5 THEN 1 ELSE 0 END) AS [Electronics],
47-
SUM( CASE WHEN i_category_id = 6 THEN 1 ELSE 0 END) AS [Tools & Home Improvement],
48-
SUM( CASE WHEN i_category_id = 7 THEN 1 ELSE 0 END) AS [Toys & Games],
49-
SUM( CASE WHEN i_category_id = 8 THEN 1 ELSE 0 END) AS [Movies & TV],
50-
SUM( CASE WHEN i_category_id = 9 THEN 1 ELSE 0 END) AS [Sports & Outdoors]
51-
FROM [dbo].[web_clickstreams_dp]
52-
INNER JOIN item it ON (wcs_item_sk = i_item_sk
53-
AND wcs_user_sk IS NOT NULL)
54-
GROUP BY wcs_user_sk;
48+
w.wcs_user_sk,
49+
SUM( CASE WHEN i.i_category = 'Books' THEN 1 ELSE 0 END) AS book_category_clicks,
50+
SUM( CASE WHEN w.i_category_id = 1 THEN 1 ELSE 0 END) AS [Home & Kitchen],
51+
SUM( CASE WHEN w.i_category_id = 2 THEN 1 ELSE 0 END) AS [Music],
52+
SUM( CASE WHEN w.i_category_id = 3 THEN 1 ELSE 0 END) AS [Books],
53+
SUM( CASE WHEN w.i_category_id = 4 THEN 1 ELSE 0 END) AS [Clothing & Accessories],
54+
SUM( CASE WHEN w.i_category_id = 5 THEN 1 ELSE 0 END) AS [Electronics],
55+
SUM( CASE WHEN w.i_category_id = 6 THEN 1 ELSE 0 END) AS [Tools & Home Improvement],
56+
SUM( CASE WHEN w.i_category_id = 7 THEN 1 ELSE 0 END) AS [Toys & Games],
57+
SUM( CASE WHEN w.i_category_id = 8 THEN 1 ELSE 0 END) AS [Movies & TV],
58+
SUM( CASE WHEN w.i_category_id = 9 THEN 1 ELSE 0 END) AS [Sports & Outdoors]
59+
FROM [dbo].[web_clickstream_clicks_data_pool] as w
60+
INNER JOIN (SELECT DISTINCT i_category_id, i_category FROM item) as i
61+
ON i.i_category_id = w.i_category_id
62+
GROUP BY w.wcs_user_sk;
5563
GO
5664

57-
DROP EXTERNAL TABLE [dbo].[web_clickstreams_dp];
65+
DROP EXTERNAL TABLE [dbo].[web_clickstream_clicks_data_pool];
5866
GO

0 commit comments

Comments
 (0)