Skip to content

Commit 9b611a3

Browse files
committed
Updated for CTP2.5 changes
1 parent 5783811 commit 9b611a3

10 files changed

Lines changed: 81 additions & 41 deletions

samples/features/sql-big-data-cluster/data-virtualization/hadoop/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@ In SQL Server 2019 big data cluster, the storage pool consists of HDFS data node
1010

1111
### Instructions
1212

13-
1. Connect to HDFS/Knox gateway from Azure Data Studio using SQL Server big data cluster connection type.
14-
15-
1. Run the [../../spark/spark-sql.ipynb](../../spark/spark-sql.ipynb/) notebook to generate the sample parquet file(s).
16-
1713
1. Connect to SQL Server Master instance.
1814

15+
1. Run the [../../spark/dataloading/transform-csv-files.ipynb](../../spark/dataloading/transform-csv-files.ipynb/) notebook to generate the sample parquet file(s).
16+
1917
1. Execute the [web-clickstreams-hdfs-orc.sql](web-clickstreams-hdfs-orc.sql). This script demonstrates how to read ORC file(s) stored in HDFS.
2018

2119
1. Execute the [product-reviews-hdfs-orc.sql](product-reviews-hdfs-orc.sql). This script demonstrates how to read ORC file(s) stored in HDFS.

samples/features/sql-big-data-cluster/data-virtualization/hadoop/inventory-export-hdfs-rcfile.sql

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,20 @@ GO
1717
-- execution.
1818
--
1919
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
20-
CREATE EXTERNAL DATA SOURCE HadoopData
21-
WITH(
22-
TYPE=HADOOP,
23-
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
24-
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
25-
);
20+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
21+
CREATE EXTERNAL DATA SOURCE HadoopData
22+
WITH(
23+
TYPE=HADOOP,
24+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
25+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
26+
);
27+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
28+
CREATE EXTERNAL DATA SOURCE HadoopData
29+
WITH(
30+
TYPE=HADOOP,
31+
LOCATION='hdfs://nmnode-0-0.nmnode-0-svc:9000/',
32+
RESOURCE_MANAGER_LOCATION='master-0.master-svc:8032'
33+
);
2634

2735
-- Create file format for RCFILE with appropriate properties.
2836
--

samples/features/sql-big-data-cluster/data-virtualization/hadoop/product-reviews-hdfs-orc.sql

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,20 @@ GO
77
-- execution.
88
--
99
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
10-
CREATE EXTERNAL DATA SOURCE HadoopData
11-
WITH(
12-
TYPE=HADOOP,
13-
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
14-
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
15-
);
10+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
11+
CREATE EXTERNAL DATA SOURCE HadoopData
12+
WITH(
13+
TYPE=HADOOP,
14+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
15+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
16+
);
17+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
18+
CREATE EXTERNAL DATA SOURCE HadoopData
19+
WITH(
20+
TYPE=HADOOP,
21+
LOCATION='hdfs://nmnode-0-0.nmnode-0-svc:9000/',
22+
RESOURCE_MANAGER_LOCATION='master-0.master-svc:8032'
23+
);
1624

1725
-- Create file format for orc file with appropriate properties.
1826
--

samples/features/sql-big-data-cluster/data-virtualization/hadoop/web-clickstreams-hdfs-orc.sql

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,20 @@ GO
77
-- execution.
88
--
99
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
10-
CREATE EXTERNAL DATA SOURCE HadoopData
11-
WITH(
12-
TYPE=HADOOP,
13-
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
14-
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
15-
);
10+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
11+
CREATE EXTERNAL DATA SOURCE HadoopData
12+
WITH(
13+
TYPE=HADOOP,
14+
LOCATION='hdfs://mssql-master-pool-0.service-master-pool:9000/',
15+
RESOURCE_MANAGER_LOCATION='mssql-master-pool-0.service-master-pool:8032'
16+
);
17+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
18+
CREATE EXTERNAL DATA SOURCE HadoopData
19+
WITH(
20+
TYPE=HADOOP,
21+
LOCATION='hdfs://nmnode-0-0.nmnode-0-svc:9000/',
22+
RESOURCE_MANAGER_LOCATION='master-0.master-svc:8032'
23+
);
1624

1725
-- Create file format for orc file with appropriate properties.
1826
--

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@ In SQL Server 2019 big data cluster, the storage pool consists of HDFS data node
1010

1111
### Instructions
1212

13-
1. Connect to HDFS/Knox gateway from Azure Data Studio using SQL Server big data cluster connection type.
14-
15-
1. Run the [../../spark/spark-sql.ipynb](../../spark/spark-sql.ipynb/) notebook to generate the sample parquet file(s).
16-
1713
1. Connect to SQL Server Master instance.
1814

15+
1. Run the [../../spark/dataloading/transform-csv-files.ipynb](../../spark/dataloading/transform-csv-files.ipynb/) notebook to generate the sample parquet file(s).
16+
1917
1. Execute the [web-clickstreams-hdfs-csv.sql](web-clickstreams-hdfs-csv.sql). This script demonstrates how to read CSV file(s) stored in HDFS.
2018

2119
1. Execute the [web-clickstreams-parquet.sql](web-clickstreams-hdfs-parquet.sql). This script demonstrates how to read parquet file(s) stored in HDFS.

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-csv.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
USE sales
22
GO
33

4-
-- Create external data source for HDFS inside SQ: big data cluster.
4+
-- Create external data source for HDFS inside SQL big data cluster.
55
--
66
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7-
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8-
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
7+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
8+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
9+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
10+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
11+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
12+
WITH (LOCATION = 'sqlhdfs://nmnode-0-0.nmnode-0-svc:50070');
913

1014
-- Create file format for CSV separated file with appropriate properties.
1115
--

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-parquet.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
USE sales
22
GO
33

4-
-- Create external data source for HDFS inside SQ: big data cluster.
4+
-- Create external data source for HDFS inside SQL big data cluster.
55
--
66
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7-
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8-
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
7+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
8+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
9+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
10+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
11+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
12+
WITH (LOCATION = 'sqlhdfs://nmnode-0-0.nmnode-0-svc:50070');
913

1014
-- Create file format for parquet file with appropriate properties.
1115
--

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/product-reviews-hdfs-tsv.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
USE sales
22
GO
33

4-
-- Create external data source for HDFS inside SQ: big data cluster.
4+
-- Create external data source for HDFS inside SQL big data cluster.
55
--
66
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7-
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8-
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
7+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
8+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
9+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
10+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
11+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
12+
WITH (LOCATION = 'sqlhdfs://nmnode-0-0.nmnode-0-svc:50070');
913

1014
-- Create file format for tab separated file with appropriate properties.
1115
--

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/web-clickstreams-hdfs-csv.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
USE sales
22
GO
33

4-
-- Create external data source for HDFS inside SQ: big data cluster.
4+
-- Create external data source for HDFS inside SQL big data cluster.
55
--
66
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7-
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8-
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
7+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
8+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
9+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
10+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
11+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
12+
WITH (LOCATION = 'sqlhdfs://nmnode-0-0.nmnode-0-svc:50070');
913

1014
-- Create file format for CSV file with appropriate properties.
1115
--

samples/features/sql-big-data-cluster/data-virtualization/storage-pool/web-clickstreams-hdfs-parquet.sql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
USE sales
22
GO
33

4-
-- Create external data source for HDFS inside SQ: big data cluster.
4+
-- Create external data source for HDFS inside SQL big data cluster.
55
--
66
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'SqlStoragePool')
7-
CREATE EXTERNAL DATA SOURCE SqlStoragePool
8-
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
7+
IF SERVERPROPERTY('ProductLevel') = 'CTP2.4'
8+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
9+
WITH (LOCATION = 'sqlhdfs://service-master-pool:50070');
10+
ELSE IF SERVERPROPERTY('ProductLevel') = 'CTP2.5'
11+
CREATE EXTERNAL DATA SOURCE SqlStoragePool
12+
WITH (LOCATION = 'sqlhdfs://nmnode-0-0.nmnode-0-svc:50070');
913

1014
-- Create file format for parquet file with appropriate properties.
1115
--

0 commit comments

Comments
 (0)