Skip to content

Commit d2f2834

Browse files
committed
Added data source DDLs
so script can be self-contained.
1 parent 960f469 commit d2f2834

9 files changed

Lines changed: 84 additions & 24 deletions

samples/features/sql-big-data-cluster/data-pool/data-ingestion-sql.sql

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for Data Pool inside a SQL big data cluster
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlDataPool')
7+
CREATE EXTERNAL DATA SOURCE SqlDataPool
8+
WITH (LOCATION = 'sqldatapool://service-mssql-controller:8080/datapools/default');
9+
410
-- Create external table in a data pool in SQL Server 2019 big data cluster.
511
-- The SqlDataPool data source is a special data source that is available in
612
-- any new database in SQL Master instance. This is used to reference the
@@ -25,30 +31,13 @@ GO
2531
-- Insert results of a SELECT statement into the external table created on the data pool.
2632
-- Store summary results for quick access instead of going to the source tables always.
2733
--
28-
IF SERVERPROPERTY('ProductLevel') = 'CTP2.3'
29-
BEGIN
30-
DECLARE @db_name SYSNAME = 'sales'
31-
DECLARE @schema_name SYSNAME = 'dbo'
32-
DECLARE @table_name SYSNAME = 'web_clickstream_clicks_data_pool'
33-
DECLARE @query NVARCHAR(MAX) = '
34-
SELECT wcs_user_sk, i_category_id, COUNT_BIG(*) as clicks
35-
FROM sales.dbo.web_clickstreams
36-
INNER JOIN sales.dbo.item it ON (wcs_item_sk = i_item_sk
37-
AND wcs_user_sk IS NOT NULL)
38-
GROUP BY wcs_user_sk, i_category_id
39-
HAVING COUNT_BIG(*) > 100;
40-
'
41-
EXEC model..sp_data_pool_table_insert_data @db_name, @schema_name, @table_name, @query
42-
END;
43-
44-
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
45-
INSERT INTO web_clickstream_clicks_data_pool
46-
SELECT wcs_user_sk, i_category_id, COUNT_BIG(*) as clicks
47-
FROM sales.dbo.web_clickstreams_hdfs_parquet
48-
INNER JOIN sales.dbo.item it ON (wcs_item_sk = i_item_sk
49-
AND wcs_user_sk IS NOT NULL)
50-
GROUP BY wcs_user_sk, i_category_id
51-
HAVING COUNT_BIG(*) > 100;
34+
INSERT INTO web_clickstream_clicks_data_pool
35+
SELECT wcs_user_sk, i_category_id, COUNT_BIG(*) as clicks
36+
FROM sales.dbo.web_clickstreams_hdfs_parquet
37+
INNER JOIN sales.dbo.item it ON (wcs_item_sk = i_item_sk
38+
AND wcs_user_sk IS NOT NULL)
39+
GROUP BY wcs_user_sk, i_category_id
40+
HAVING COUNT_BIG(*) > 100;
5241
GO
5342

5443
-- Query data inserted into the data pool table

samples/features/sql-big-data-cluster/data-virtualization/hadoop/inventory-export-hdfs-rcfile.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
USE sales
22
GO
33

4+
-- Enable option to allow INSERT against external table defined on HADOOP data source
5+
--
46
DECLARE @config_option nvarchar(100) = 'allow polybase export';
57
IF NOT EXISTS(SELECT * FROM sys.configurations WHERE name = @config_option and value_in_use = 1)
68
BEGIN
@@ -9,6 +11,19 @@ BEGIN
911
END;
1012
GO
1113

14+
-- Create data source for HDFS inside SQL big data cluster using the HADOOP type.
15+
-- The HADOOP data source type was introduced in SQL Server 2016 to query data in
16+
-- Hadoop clusters and relies on Java Hadoop client libraries and Map/Reduce for query
17+
-- execution.
18+
--
19+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
20+
CREATE EXTERNAL DATA SOURCE HadoopData
21+
WITH(
22+
TYPE=HADOOP,
23+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
24+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
25+
);
26+
1227
-- Create file format for RCFILE with appropriate properties.
1328
--
1429
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'RCFILE')

samples/features/sql-big-data-cluster/data-virtualization/hadoop/product-reviews-hdfs-orc.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
USE sales
22
GO
33

4+
-- Create data source for HDFS inside SQL big data cluster using the HADOOP type.
5+
-- The HADOOP data source type was introduced in SQL Server 2016 to query data in
6+
-- Hadoop clusters and relies on Java Hadoop client libraries and Map/Reduce for query
7+
-- execution.
8+
--
9+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
10+
CREATE EXTERNAL DATA SOURCE HadoopData
11+
WITH(
12+
TYPE=HADOOP,
13+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
14+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
15+
);
16+
417
-- Create file format for orc file with appropriate properties.
518
--
619
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'orc_file')

samples/features/sql-big-data-cluster/data-virtualization/hadoop/web-clickstreams-hdfs-orc.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
USE sales
22
GO
33

4+
-- Create data source for HDFS inside SQL big data cluster using the HADOOP type.
5+
-- The HADOOP data source type was introduced in SQL Server 2016 to query data in
6+
-- Hadoop clusters and relies on Java Hadoop client libraries and Map/Reduce for query
7+
-- execution.
8+
--
9+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
10+
CREATE EXTERNAL DATA SOURCE HadoopData
11+
WITH(
12+
TYPE=HADOOP,
13+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
14+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
15+
);
16+
417
-- Create file format for orc file with appropriate properties.
518
--
619
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'orc_file')

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-csv.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for HDFS inside SQ: big data cluster.
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
9+
410
-- Create file format for CSV separated file with appropriate properties.
511
--
612
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'csv_file')

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-parquet.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for HDFS inside SQ: big data cluster.
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
9+
410
-- Create file format for parquet file with appropriate properties.
511
--
612
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'parquet_file')

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-tsv.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for HDFS inside SQ: big data cluster.
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
9+
410
-- Create file format for tab separated file with appropriate properties.
511
--
612
CREATE EXTERNAL FILE FORMAT tsv_file

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/web-clickstreams-hdfs-csv.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for HDFS inside SQ: big data cluster.
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
9+
410
-- Create file format for CSV file with appropriate properties.
511
--
612
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'csv_file')

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/web-clickstreams-hdfs-parquet.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
USE sales
22
GO
33

4+
-- Create external data source for HDFS inside SQ: big data cluster.
5+
--
6+
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
9+
410
-- Create file format for parquet file with appropriate properties.
511
--
612
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'parquet_file')

0 commit comments

Comments
 (0)