@@ -45,6 +45,54 @@ SELECT
4545GROUP BY wcs_user_sk;
4646GO
4747
48+ -- Create view used for ML services training stored procedure
49+ CREATE OR ALTER VIEW [dbo].[web_clickstreams_hdfs_book_clicks]
50+ AS
51+ SELECT
52+ /* There is bug in TPCx-BB data generator which results in data where all users have purchased books.
53+ This will not work for the ML training purposes. So we will treat users with 1-5 clicks in the book category as
54+ not interested in books. */
55+ CASE WHEN q .clicks_in_category < 6 THEN 0 ELSE q .clicks_in_category END AS clicks_in_category,
56+ CASE WHEN cd .cd_education_status IN (' Advanced Degree' , ' College' , ' 4 yr Degree' , ' 2 yr Degree' ) THEN 1 ELSE 0 END AS college_education,
57+ CASE WHEN cd .cd_gender = ' M' THEN 1 ELSE 0 END AS male,
58+ COALESCE (cd .cd_credit_rating , ' Unknown' ) as cd_credit_rating,
59+ q .clicks_in_1 ,
60+ q .clicks_in_2 ,
61+ q .clicks_in_3 ,
62+ q .clicks_in_4 ,
63+ q .clicks_in_5 ,
64+ q .clicks_in_6 ,
65+ q .clicks_in_7 ,
66+ q .clicks_in_8 ,
67+ q .clicks_in_9 ,
68+ q .wcs_user_sk
69+ FROM (
70+ SELECT
71+ w .wcs_user_sk ,
72+ SUM ( CASE WHEN i .i_category = ' Books' THEN 1 ELSE 0 END ) AS clicks_in_category,
73+ SUM ( CASE WHEN i .i_category_id = 1 THEN 1 ELSE 0 END ) AS clicks_in_1,
74+ SUM ( CASE WHEN i .i_category_id = 2 THEN 1 ELSE 0 END ) AS clicks_in_2,
75+ SUM ( CASE WHEN i .i_category_id = 3 THEN 1 ELSE 0 END ) AS clicks_in_3,
76+ SUM ( CASE WHEN i .i_category_id = 4 THEN 1 ELSE 0 END ) AS clicks_in_4,
77+ SUM ( CASE WHEN i .i_category_id = 5 THEN 1 ELSE 0 END ) AS clicks_in_5,
78+ SUM ( CASE WHEN i .i_category_id = 6 THEN 1 ELSE 0 END ) AS clicks_in_6,
79+ SUM ( CASE WHEN i .i_category_id = 7 THEN 1 ELSE 0 END ) AS clicks_in_7,
80+ SUM ( CASE WHEN i .i_category_id = 8 THEN 1 ELSE 0 END ) AS clicks_in_8,
81+ SUM ( CASE WHEN i .i_category_id = 9 THEN 1 ELSE 0 END ) AS clicks_in_9
82+ FROM web_clickstreams_hdfs_parquet as w
83+ INNER JOIN item as i ON (w .wcs_item_sk = i_item_sk
84+ AND w .wcs_user_sk IS NOT NULL )
85+ GROUP BY w .wcs_user_sk
86+ ) AS q
87+ INNER JOIN customer as c ON q .wcs_user_sk = c .c_customer_sk
88+ INNER JOIN customer_demographics as cd ON c .c_current_cdemo_sk = cd .cd_demo_sk ;
89+ GO
90+
91+
92+ -- Inspect top 100 rows
93+ SELECT TOP (100 ) * FROM web_clickstreams_hdfs_book_clicks;
94+ GO
95+
4896-- Cleanup
4997/*
5098DROP EXTERNAL TABLE [dbo].[web_clickstreams_hdfs_parquet];
0 commit comments