Merge pull request #268 from NelGson/master

uc-msft · web-flow · commit 95a634626679 · 2017-08-14T12:07:41.000-07:00
Updates to ski rental python sample for SQL Server 2017 RC2.
diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.py
@@ -2,68 +2,72 @@
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error
 
-from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer
-from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData
-from revoscalepy.etl.RxImport import rx_import_datasource
-
+#If you are running SQL Server 2017 RC1 and above:
+from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData
+from revoscalepy import rx_import
 
 def get_rental_predictions():
-    conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
-    column_info = { 
-            "Year" : { "type" : "integer" },
-            "Month" : { "type" : "integer" }, 
-            "Day" : { "type" : "integer" }, 
-            "RentalCount" : { "type" : "integer" }, 
-            "WeekDay" : { 
-                "type" : "factor", 
-                "levels" : ["1", "2", "3", "4", "5", "6", "7"]
-            },
-            "Holiday" : { 
-                "type" : "factor", 
-                "levels" : ["1", "0"]
-            },
-            "Snow" : { 
-                "type" : "factor", 
-                "levels" : ["1", "0"]
-            }
-        }
+#Connection string to connect to SQL Server named instance
+ conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
+
+#Define the columns we wish to import
+ column_info = {
+         "Year" : { "type" : "integer" },
+         "Month" : { "type" : "integer" },
+         "Day" : { "type" : "integer" },
+         "RentalCount" : { "type" : "integer" },
+         "WeekDay" : {
+             "type" : "factor",
+             "levels" : ["1", "2", "3", "4", "5", "6", "7"]
+         },
+         "Holiday" : {
+             "type" : "factor",
+             "levels" : ["1", "0"]
+         },
+         "Snow" : {
+             "type" : "factor",
+             "levels" : ["1", "0"]
+         }
+     }
+
+ #Get the data from SQL Server Table
+ data_source = RxSqlServerData(table="dbo.rental_data",
+                              connection_string=conn_str, column_info=column_info)
+ computeContext = RxInSqlServer(
+     connection_string = conn_str,
+     num_tasks = 1,
+     auto_cleanup = False
+)
+
+
+ RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)
+
+ # import data source and convert to pandas dataframe
+ df = pd.DataFrame(rx_import(input_data = data_source))
+ print("Data frame:", df)
+ # Get all the columns from the dataframe.
+ columns = df.columns.tolist()
+ # Filter the columns to remove ones we don't want to use in the training
+ columns = [c for c in columns if c not in ["Year"]]
+  # Store the variable we'll be predicting on.
+ target = "RentalCount"
+ # Generate the training set.  Set random_state to be able to replicate results.
+ train = df.sample(frac=0.8, random_state=1)
+ # Select anything not in the training set and put it in the testing set.
+ test = df.loc[~df.index.isin(train.index)]
+ # Print the shapes of both sets.
+ print("Training set shape:", train.shape)
+ print("Testing set shape:", test.shape)
+ # Initialize the model class.
+ lin_model = LinearRegression()
+ # Fit the model to the training data.
+ lin_model.fit(train[columns], train[target])
 
-    data_source = RxSqlServerData(table="dbo.rental_data",
-                                  connectionString=conn_str, colInfo=column_info)
-    computeContext = RxInSqlServer(
-        connectionString = conn_str,
-        numTasks = 1,
-        autoCleanup = False
-        )
-     
-    
-    RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False)
-    
-    # import data source and convert to pandas dataframe
-    df = pd.DataFrame(rx_import_datasource(data_source))
-    print("Data frame:", df)
-    # Get all the columns from the dataframe.
-    columns = df.columns.tolist()
-    # Filter the columns to remove ones we don't want.
-    columns = [c for c in columns if c not in ["Year"]]
-    # Store the variable we'll be predicting on.
-    target = "RentalCount"
-    # Generate the training set.  Set random_state to be able to replicate results.
-    train = df.sample(frac=0.8, random_state=1)
-    # Select anything not in the training set and put it in the testing set.
-    test = df.loc[~df.index.isin(train.index)]
-    # Print the shapes of both sets.
-    print("Training set shape:", train.shape)
-    print("Testing set shape:", test.shape)
-    # Initialize the model class.
-    lin_model = LinearRegression()
-    # Fit the model to the training data.
-    lin_model.fit(train[columns], train[target])
-    # Generate our predictions for the test set.
-    lin_predictions = lin_model.predict(test[columns])
-    print("Predictions:", lin_predictions)
-    # Compute error between our test predictions and the actual values.
-    lin_mse = mean_squared_error(lin_predictions, test[target])
-    print("Computed error:", lin_mse)
+ # Generate our predictions for the test set.
+ lin_predictions = lin_model.predict(test[columns])
+ print("Predictions:", lin_predictions)
+ # Compute error between our test predictions and the actual values.
+ lin_mse = mean_squared_error(lin_predictions, test[target])
+ print("Computed error:", lin_mse)
 
 get_rental_predictions()
diff --git a/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql b/samples/features/machine-learning-services/python/getting-started/rental-prediction/rental_prediction.sql
@@ -27,23 +27,24 @@ BEGIN
       @language = N'Python'
     , @script = N'
 
+from sklearn import linear_model
+
+import pickle
+
+
 df = rental_train_data
 
 # Get all the columns from the dataframe.
 columns = df.columns.tolist()
 
-
 # Store the variable well be predicting on.
 target = "RentalCount"
 
-from sklearn.linear_model import LinearRegression
-
 # Initialize the model class.
-lin_model = LinearRegression()
+lin_model = linear_model.LinearRegression()
 # Fit the model to the training data.
 lin_model.fit(df[columns], df[target])
 
-import pickle
 #Before saving the model to the DB table, we need to convert it to a binary object
 trained_model = pickle.dumps(lin_model)
 '
@@ -75,15 +76,15 @@ AS
 BEGIN
 	DECLARE @py_model varbinary(max) = (select model from rental_py_models where model_name = @model);
 
-	EXEC sp_execute_external_script 
+	EXEC sp_execute_external_script
 					@language = N'Python'
 				  , @script = N'
 
 
 import pickle
 rental_model = pickle.loads(py_model)
 
-  
+
 df = rental_score_data
 #print(df)
 
@@ -106,15 +107,15 @@ lin_mse = mean_squared_error(lin_predictions, df[target])
 #print(lin_mse)
 
 import pandas as pd
-predictions_df = pd.DataFrame(lin_predictions)  
+predictions_df = pd.DataFrame(lin_predictions)
 OutputDataSet = pd.concat([predictions_df, df["RentalCount"], df["Month"], df["Day"], df["WeekDay"], df["Snow"], df["Holiday"], df["Year"]], axis=1)
 '
 	, @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday"  from rental_data where Year = 2015'
 	, @input_data_1_name = N'rental_score_data'
 	, @params = N'@py_model varbinary(max)'
 	, @py_model = @py_model
 	with result sets (("RentalCount_Predicted" float, "RentalCount" float, "Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float));
-			  
+
 END;
 GO