1- import pandas as pd
2- from sklearn .linear_model import LinearRegression
3- from sklearn .metrics import mean_squared_error
1+ import pandas
2+ import sklearn .linear_model
3+ import sklearn .metrics
44
55from revoscalepy .computecontext .RxInSqlServer import RxInSqlServer
66from revoscalepy .computecontext .RxInSqlServer import RxSqlServerData
99
1010def get_rental_predictions ():
1111 conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;'
12- column_info = {
13- "Year" : { "type" : "integer" },
14- "Month" : { "type" : "integer" },
15- "Day" : { "type" : "integer" },
16- "RentalCount" : { "type" : "integer" },
17- "WeekDay" : {
18- "type" : "factor" ,
19- "levels" : ["1" , "2" , "3" , "4" , "5" , "6" , "7" ]
20- },
21- "Holiday" : {
22- "type" : "factor" ,
23- "levels" : ["1" , "0" ]
24- },
25- "Snow" : {
26- "type" : "factor" ,
27- "levels" : ["1" , "0" ]
28- }
12+ column_info = {
13+ "Year" : {"type" : "integer" },
14+ "Month" : {"type" : "integer" },
15+ "Day" : {"type" : "integer" },
16+ "RentalCount" : {"type" : "integer" },
17+ "WeekDay" : {
18+ "type" : "factor" ,
19+ "levels" : ["1" , "2" , "3" , "4" , "5" , "6" , "7" ],
20+ },
21+ "Holiday" : {
22+ "type" : "factor" ,
23+ "levels" : ["1" , "0" ],
24+ },
25+ "Snow" : {
26+ "type" : "factor" ,
27+ "levels" : ["1" , "0" ],
2928 }
29+ }
3030
3131 data_source = RxSqlServerData (table = "dbo.rental_data" ,
32- connectionString = conn_str , colInfo = column_info )
33- computeContext = RxInSqlServer (
34- connectionString = conn_str ,
35- numTasks = 1 ,
36- autoCleanup = False
37- )
38-
39-
32+ connectionString = conn_str ,
33+ colInfo = column_info )
4034 RxInSqlServer (connectionString = conn_str , numTasks = 1 , autoCleanup = False )
41-
35+
4236 # import data source and convert to pandas dataframe
43- df = pd .DataFrame (rx_import_datasource (data_source ))
37+ df = pandas .DataFrame (rx_import_datasource (data_source ))
4438 print ("Data frame:" , df )
45- # Get all the columns from the dataframe.
46- columns = df .columns .tolist ()
47- # Filter the columns to remove ones we don't want.
48- columns = [c for c in columns if c not in ["Year" ]]
39+ # Get all the columns from the dataframe and filter out the ones we don't
40+ # want.
41+ columns = [x for x in df .columns if x == "Year" ]
4942 # Store the variable we'll be predicting on.
5043 target = "RentalCount"
5144 # Generate the training set. Set random_state to be able to replicate results.
@@ -56,14 +49,16 @@ def get_rental_predictions():
5649 print ("Training set shape:" , train .shape )
5750 print ("Testing set shape:" , test .shape )
5851 # Initialize the model class.
59- lin_model = LinearRegression ()
52+ lin_model = sklearn . linear_model . LinearRegression ()
6053 # Fit the model to the training data.
6154 lin_model .fit (train [columns ], train [target ])
6255 # Generate our predictions for the test set.
6356 lin_predictions = lin_model .predict (test [columns ])
6457 print ("Predictions:" , lin_predictions )
6558 # Compute error between our test predictions and the actual values.
66- lin_mse = mean_squared_error (lin_predictions , test [target ])
59+ lin_mse = sklearn . metrics . mean_squared_error (lin_predictions , test [target ])
6760 print ("Computed error:" , lin_mse )
6861
69- get_rental_predictions ()
62+
63+ if __name__ == "__main__" :
64+ get_rental_predictions ()
0 commit comments