|
2 | 2 | from sklearn.linear_model import LinearRegression |
3 | 3 | from sklearn.metrics import mean_squared_error |
4 | 4 |
|
5 | | -from revoscalepy.computecontext.RxInSqlServer import RxInSqlServer |
6 | | -from revoscalepy.computecontext.RxInSqlServer import RxSqlServerData |
7 | | -from revoscalepy.etl.RxImport import rx_import_datasource |
8 | | - |
| 5 | +#If you are running SQL Server 2017 RC1 and above: |
| 6 | +from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData |
| 7 | +from revoscalepy import rx_import |
9 | 8 |
|
10 | 9 | def get_rental_predictions(): |
11 | | - conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' |
12 | | - column_info = { |
13 | | - "Year" : { "type" : "integer" }, |
14 | | - "Month" : { "type" : "integer" }, |
15 | | - "Day" : { "type" : "integer" }, |
16 | | - "RentalCount" : { "type" : "integer" }, |
17 | | - "WeekDay" : { |
18 | | - "type" : "factor", |
19 | | - "levels" : ["1", "2", "3", "4", "5", "6", "7"] |
20 | | - }, |
21 | | - "Holiday" : { |
22 | | - "type" : "factor", |
23 | | - "levels" : ["1", "0"] |
24 | | - }, |
25 | | - "Snow" : { |
26 | | - "type" : "factor", |
27 | | - "levels" : ["1", "0"] |
28 | | - } |
29 | | - } |
| 10 | +#Connection string to connect to SQL Server named instance |
| 11 | + conn_str = 'Driver=SQL Server;Server=MYSQLSERVER;Database=TutorialDB;Trusted_Connection=True;' |
| 12 | + |
| 13 | +#Define the columns we wish to import |
| 14 | + column_info = { |
| 15 | + "Year" : { "type" : "integer" }, |
| 16 | + "Month" : { "type" : "integer" }, |
| 17 | + "Day" : { "type" : "integer" }, |
| 18 | + "RentalCount" : { "type" : "integer" }, |
| 19 | + "WeekDay" : { |
| 20 | + "type" : "factor", |
| 21 | + "levels" : ["1", "2", "3", "4", "5", "6", "7"] |
| 22 | + }, |
| 23 | + "Holiday" : { |
| 24 | + "type" : "factor", |
| 25 | + "levels" : ["1", "0"] |
| 26 | + }, |
| 27 | + "Snow" : { |
| 28 | + "type" : "factor", |
| 29 | + "levels" : ["1", "0"] |
| 30 | + } |
| 31 | + } |
| 32 | + |
| 33 | + #Get the data from SQL Server Table |
| 34 | + data_source = RxSqlServerData(table="dbo.rental_data", |
| 35 | + connection_string=conn_str, column_info=column_info) |
| 36 | + computeContext = RxInSqlServer( |
| 37 | + connection_string = conn_str, |
| 38 | + num_tasks = 1, |
| 39 | + auto_cleanup = False |
| 40 | +) |
| 41 | + |
| 42 | + |
| 43 | + RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False) |
| 44 | + |
| 45 | + # import data source and convert to pandas dataframe |
| 46 | + df = pd.DataFrame(rx_import(input_data = data_source)) |
| 47 | + print("Data frame:", df) |
| 48 | + # Get all the columns from the dataframe. |
| 49 | + columns = df.columns.tolist() |
| 50 | + # Filter the columns to remove ones we don't want to use in the training |
| 51 | + columns = [c for c in columns if c not in ["Year"]] |
| 52 | + # Store the variable we'll be predicting on. |
| 53 | + target = "RentalCount" |
| 54 | + # Generate the training set. Set random_state to be able to replicate results. |
| 55 | + train = df.sample(frac=0.8, random_state=1) |
| 56 | + # Select anything not in the training set and put it in the testing set. |
| 57 | + test = df.loc[~df.index.isin(train.index)] |
| 58 | + # Print the shapes of both sets. |
| 59 | + print("Training set shape:", train.shape) |
| 60 | + print("Testing set shape:", test.shape) |
| 61 | + # Initialize the model class. |
| 62 | + lin_model = LinearRegression() |
| 63 | + # Fit the model to the training data. |
| 64 | + lin_model.fit(train[columns], train[target]) |
30 | 65 |
|
31 | | - data_source = RxSqlServerData(table="dbo.rental_data", |
32 | | - connectionString=conn_str, colInfo=column_info) |
33 | | - computeContext = RxInSqlServer( |
34 | | - connectionString = conn_str, |
35 | | - numTasks = 1, |
36 | | - autoCleanup = False |
37 | | - ) |
38 | | - |
39 | | - |
40 | | - RxInSqlServer(connectionString=conn_str, numTasks=1, autoCleanup=False) |
41 | | - |
42 | | - # import data source and convert to pandas dataframe |
43 | | - df = pd.DataFrame(rx_import_datasource(data_source)) |
44 | | - print("Data frame:", df) |
45 | | - # Get all the columns from the dataframe. |
46 | | - columns = df.columns.tolist() |
47 | | - # Filter the columns to remove ones we don't want. |
48 | | - columns = [c for c in columns if c not in ["Year"]] |
49 | | - # Store the variable we'll be predicting on. |
50 | | - target = "RentalCount" |
51 | | - # Generate the training set. Set random_state to be able to replicate results. |
52 | | - train = df.sample(frac=0.8, random_state=1) |
53 | | - # Select anything not in the training set and put it in the testing set. |
54 | | - test = df.loc[~df.index.isin(train.index)] |
55 | | - # Print the shapes of both sets. |
56 | | - print("Training set shape:", train.shape) |
57 | | - print("Testing set shape:", test.shape) |
58 | | - # Initialize the model class. |
59 | | - lin_model = LinearRegression() |
60 | | - # Fit the model to the training data. |
61 | | - lin_model.fit(train[columns], train[target]) |
62 | | - # Generate our predictions for the test set. |
63 | | - lin_predictions = lin_model.predict(test[columns]) |
64 | | - print("Predictions:", lin_predictions) |
65 | | - # Compute error between our test predictions and the actual values. |
66 | | - lin_mse = mean_squared_error(lin_predictions, test[target]) |
67 | | - print("Computed error:", lin_mse) |
| 66 | + # Generate our predictions for the test set. |
| 67 | + lin_predictions = lin_model.predict(test[columns]) |
| 68 | + print("Predictions:", lin_predictions) |
| 69 | + # Compute error between our test predictions and the actual values. |
| 70 | + lin_mse = mean_squared_error(lin_predictions, test[target]) |
| 71 | + print("Computed error:", lin_mse) |
68 | 72 |
|
69 | 73 | get_rental_predictions() |
0 commit comments