Skip to content

Commit 8821c2f

Browse files
elbutterelbutter
authored andcommitted
changed name fo folder
1 parent d5f0f93 commit 8821c2f

16 files changed

Lines changed: 336 additions & 0 deletions
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- CTAS statement to create Trip table with hashed distribution on DateID column
2+
CREATE TABLE dbo.TripHashed
3+
WITH
4+
(
5+
DISTRIBUTION = Hash(DateID),
6+
CLUSTERED COLUMNSTORE INDEX
7+
)
8+
AS SELECT * FROM dbo.Trip;
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
-- CTAS (Create Table as Select) Creates tables in the SQL DW from external tables
2+
3+
CREATE TABLE [dbo].[Date]
4+
WITH
5+
( DISTRIBUTION = ROUND_ROBIN
6+
, CLUSTERED COLUMNSTORE INDEX
7+
)
8+
AS
9+
SELECT *
10+
FROM [ext].[Date]
11+
OPTION (LABEL = 'CTAS : Load [dbo].[Date]')
12+
;
13+
14+
15+
CREATE TABLE [dbo].[Geography]
16+
WITH
17+
( DISTRIBUTION = ROUND_ROBIN
18+
, CLUSTERED COLUMNSTORE INDEX
19+
)
20+
AS
21+
SELECT *
22+
FROM [ext].[Geography]
23+
OPTION (LABEL = 'CTAS : Load [dbo].[Geography]')
24+
;
25+
26+
CREATE TABLE [dbo].[HackneyLicense]
27+
WITH
28+
( DISTRIBUTION = ROUND_ROBIN
29+
, CLUSTERED COLUMNSTORE INDEX
30+
)
31+
AS
32+
SELECT *
33+
FROM [ext].[HackneyLicense]
34+
OPTION (LABEL = 'CTAS : Load [dbo].[HackneyLicense]')
35+
;
36+
37+
CREATE TABLE [dbo].[Medallion]
38+
WITH
39+
( DISTRIBUTION = ROUND_ROBIN
40+
, CLUSTERED COLUMNSTORE INDEX
41+
)
42+
AS
43+
SELECT *
44+
FROM [ext].[Medallion]
45+
OPTION (LABEL = 'CTAS : Load [dbo].[Medallion]')
46+
;
47+
48+
CREATE TABLE [dbo].[Time]
49+
WITH
50+
( DISTRIBUTION = ROUND_ROBIN
51+
, CLUSTERED COLUMNSTORE INDEX
52+
)
53+
AS
54+
SELECT *
55+
FROM [ext].[Time]
56+
OPTION (LABEL = 'CTAS : Load [dbo].[Time]')
57+
;
58+
59+
CREATE TABLE [dbo].[Weather]
60+
WITH
61+
( DISTRIBUTION = ROUND_ROBIN
62+
, CLUSTERED COLUMNSTORE INDEX
63+
)
64+
AS
65+
SELECT *
66+
FROM [ext].[Weather]
67+
OPTION (LABEL = 'CTAS : Load [dbo].[Weather]')
68+
;
69+
70+
CREATE TABLE [dbo].[Trip]
71+
WITH
72+
( DISTRIBUTION = ROUND_ROBIN
73+
, CLUSTERED COLUMNSTORE INDEX
74+
)
75+
AS
76+
SELECT *
77+
FROM [ext].[Trip]
78+
OPTION (LABEL = 'CTAS : Load [dbo].[Trip]')
79+
;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This folder contains the PDF and SQL files in order to complete the SQL Data Warehouse Free Trial lab.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- Adds user XLRCUser to the xlargerc resource class role
2+
EXEC sp_addrolemember 'xlargerc', 'XLRCUser'
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- Define an external data source
2+
-- For accessing non-public external data sources, make sure to setup credentials
3+
-- Read more here: https://azure.microsoft.com/en-us/documentation/articles/sql-data-warehouse-get-started-load-with-polybase/#step-2-create-an-external-table-for-the-sample-data
4+
CREATE EXTERNAL DATA SOURCE NYTPublic
5+
WITH
6+
(
7+
TYPE = Hadoop
8+
, LOCATION = 'wasbs://2013@nytpublic.blob.core.windows.net/'
9+
);
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Defines external file format for the NYT data in Azure Blob Storage
2+
3+
CREATE EXTERNAL FILE FORMAT uncompressedcsv
4+
WITH
5+
( FORMAT_TYPE = DELIMITEDTEXT
6+
, FORMAT_OPTIONS ( FIELD_TERMINATOR = ','
7+
, STRING_DELIMITER = ''
8+
, DATE_FORMAT = ''
9+
, USE_TYPE_DEFAULT = False
10+
)
11+
);
12+
13+
CREATE EXTERNAL FILE FORMAT compressedcsv
14+
WITH
15+
( FORMAT_TYPE = DELIMITEDTEXT
16+
, FORMAT_OPTIONS ( FIELD_TERMINATOR = '|'
17+
, STRING_DELIMITER = ''
18+
, DATE_FORMAT = ''
19+
, USE_TYPE_DEFAULT = False
20+
)
21+
, DATA_COMPRESSION = 'org.apache.hadoop.io.compress.GzipCodec'
22+
);
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Creates a schema for the external data
2+
CREATE SCHEMA ext;
3+
GO
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
-- Creates external tables
2+
3+
CREATE EXTERNAL TABLE [ext].[Date]
4+
(
5+
[DateID] int NOT NULL,
6+
[Date] datetime NULL,
7+
[DateBKey] char(10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
8+
[DayOfMonth] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
9+
[DaySuffix] varchar(4) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
10+
[DayName] varchar(9) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
11+
[DayOfWeek] char(1) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
12+
[DayOfWeekInMonth] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
13+
[DayOfWeekInYear] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
14+
[DayOfQuarter] varchar(3) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
15+
[DayOfYear] varchar(3) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
16+
[WeekOfMonth] varchar(1) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
17+
[WeekOfQuarter] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
18+
[WeekOfYear] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
19+
[Month] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
20+
[MonthName] varchar(9) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
21+
[MonthOfQuarter] varchar(2) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
22+
[Quarter] char(1) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
23+
[QuarterName] varchar(9) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
24+
[Year] char(4) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
25+
[YearName] char(7) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
26+
[MonthYear] char(10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
27+
[MMYYYY] char(6) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
28+
[FirstDayOfMonth] date NULL,
29+
[LastDayOfMonth] date NULL,
30+
[FirstDayOfQuarter] date NULL,
31+
[LastDayOfQuarter] date NULL,
32+
[FirstDayOfYear] date NULL,
33+
[LastDayOfYear] date NULL,
34+
[IsHolidayUSA] bit NULL,
35+
[IsWeekday] bit NULL,
36+
[HolidayUSA] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
37+
)
38+
WITH
39+
(
40+
LOCATION = 'Date'
41+
, DATA_SOURCE = NYTPublic
42+
, FILE_FORMAT = uncompressedcsv
43+
, REJECT_TYPE = value
44+
, REJECT_VALUE = 0
45+
)
46+
47+
48+
CREATE EXTERNAL TABLE [ext].[Geography]
49+
(
50+
[GeographyID] int NOT NULL,
51+
[ZipCodeBKey] varchar(10) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
52+
[County] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
53+
[City] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
54+
[State] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
55+
[Country] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
56+
[ZipCode] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
57+
)
58+
WITH
59+
(
60+
LOCATION = 'Geography'
61+
, DATA_SOURCE = NYTPublic
62+
, FILE_FORMAT = uncompressedcsv
63+
, REJECT_TYPE = value
64+
, REJECT_VALUE = 0
65+
)
66+
;
67+
68+
CREATE EXTERNAL TABLE [ext].[HackneyLicense]
69+
(
70+
[HackneyLicenseID] int NOT NULL,
71+
[HackneyLicenseBKey] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
72+
[HackneyLicenseCode] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
73+
)
74+
WITH
75+
(
76+
LOCATION = 'HackneyLicense'
77+
, DATA_SOURCE = NYTPublic
78+
, FILE_FORMAT = uncompressedcsv
79+
, REJECT_TYPE = value
80+
, REJECT_VALUE = 0
81+
)
82+
;
83+
CREATE EXTERNAL TABLE [ext].[Medallion]
84+
(
85+
[MedallionID] int NOT NULL,
86+
[MedallionBKey] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
87+
[MedallionCode] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
88+
)
89+
WITH
90+
(
91+
LOCATION = 'Medallion'
92+
, DATA_SOURCE = NYTPublic
93+
, FILE_FORMAT = uncompressedcsv
94+
, REJECT_TYPE = value
95+
, REJECT_VALUE = 0
96+
)
97+
;
98+
CREATE EXTERNAL TABLE [ext].[Time]
99+
(
100+
[TimeID] int NOT NULL,
101+
[TimeBKey] varchar(8) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
102+
[HourNumber] tinyint NOT NULL,
103+
[MinuteNumber] tinyint NOT NULL,
104+
[SecondNumber] tinyint NOT NULL,
105+
[TimeInSecond] int NOT NULL,
106+
[HourlyBucket] varchar(15) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL,
107+
[DayTimeBucketGroupKey] int NOT NULL,
108+
[DayTimeBucket] varchar(100) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL
109+
)
110+
WITH
111+
(
112+
LOCATION = 'Time'
113+
, DATA_SOURCE = NYTPublic
114+
, FILE_FORMAT = uncompressedcsv
115+
, REJECT_TYPE = value
116+
, REJECT_VALUE = 0
117+
)
118+
;
119+
120+
CREATE EXTERNAL TABLE [ext].[Trip]
121+
(
122+
[DateID] int NOT NULL,
123+
[MedallionID] int NOT NULL,
124+
[HackneyLicenseID] int NOT NULL,
125+
[PickupTimeID] int NOT NULL,
126+
[DropoffTimeID] int NOT NULL,
127+
[PickupGeographyID] int NULL,
128+
[DropoffGeographyID] int NULL,
129+
[PickupLatitude] float NULL,
130+
[PickupLongitude] float NULL,
131+
[PickupLatLong] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
132+
[DropoffLatitude] float NULL,
133+
[DropoffLongitude] float NULL,
134+
[DropoffLatLong] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
135+
[PassengerCount] int NULL,
136+
[TripDurationSeconds] int NULL,
137+
[TripDistanceMiles] float NULL,
138+
[PaymentType] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
139+
[FareAmount] money NULL,
140+
[SurchargeAmount] money NULL,
141+
[TaxAmount] money NULL,
142+
[TipAmount] money NULL,
143+
[TollsAmount] money NULL,
144+
[TotalAmount] money NULL
145+
)
146+
WITH
147+
(
148+
LOCATION = 'Trip2013'
149+
, DATA_SOURCE = NYTPublic
150+
, FILE_FORMAT = compressedcsv
151+
, REJECT_TYPE = value
152+
, REJECT_VALUE = 0
153+
)
154+
;
155+
CREATE EXTERNAL TABLE [ext].[Weather]
156+
(
157+
[DateID] int NOT NULL,
158+
[GeographyID] int NOT NULL,
159+
[PrecipitationInches] float NOT NULL,
160+
[AvgTemperatureFahrenheit] float NOT NULL
161+
)
162+
WITH
163+
(
164+
LOCATION = 'Weather2013'
165+
, DATA_SOURCE = NYTPublic
166+
, FILE_FORMAT = uncompressedcsv
167+
, REJECT_TYPE = value
168+
, REJECT_VALUE = 0
169+
)
170+
;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Connect to master database and create a login
2+
CREATE LOGIN XLRCLogin WITH PASSWORD = ' a123reallySTRONGpassword!';
3+
CREATE USER XLRCUser FOR LOGIN XLRCLogin;
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Creates statistics on the Date and Trip DateID columns to check join performance improvements
2+
CREATE STATISTICS [dbo.Date DateID stats] ON dbo.Date (DateID);
3+
CREATE STATISTICS [dbo.Trip DateID stats] ON dbo.Trip (DateID);

0 commit comments

Comments
 (0)