Skip to content

Commit e5295bc

Browse files
authored
Merge pull request #538 from inchiosa/tf-notebooks
Add tensorflow gpu notebooks
2 parents a380e07 + c375c60 commit e5295bc

3 files changed

Lines changed: 198 additions & 0 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# TensorFlow on GPUs in SQL Server 2019 big data cluster
2+
3+
The notebooks in this directory illustrate fitting TensorFlow image classification models using GPU acceleration.
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"metadata": {
3+
"kernelspec": {
4+
"name": "pyspark3kernel",
5+
"display_name": "PySpark3"
6+
},
7+
"language_info": {
8+
"name": "pyspark3",
9+
"mimetype": "text/x-python",
10+
"codemirror_mode": {
11+
"name": "python",
12+
"version": 3
13+
},
14+
"pygments_lexer": "python3"
15+
}
16+
},
17+
"nbformat_minor": 2,
18+
"nbformat": 4,
19+
"cells": [
20+
{
21+
"cell_type": "code",
22+
"source": "%%configure -f\r\n{\r\n \"executorMemory\": \"4g\",\r\n \"driverMemory\": \"4g\",\r\n \"executorCores\": 4,\r\n \"driverCores\": 2,\r\n \"numExecutors\": 1\r\n}",
23+
"metadata": {
24+
"language": "python"
25+
},
26+
"outputs": [],
27+
"execution_count": 2
28+
},
29+
{
30+
"cell_type": "code",
31+
"source": "# For informational purposes,\r\n# print the hostname of the container\r\n# where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n \"hostname\",\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
32+
"metadata": {
33+
"language": "python"
34+
},
35+
"outputs": [],
36+
"execution_count": 3
37+
},
38+
{
39+
"cell_type": "code",
40+
"source": "# Install NVIDIA GPU libraries and TensorFlow for GPU\r\n# in the container where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\necho $CUDA_VERSION\r\nexport CUDA_PKG_VERSION=\"8-0=$CUDA_VERSION-1\"\r\necho $CUDA_PKG_VERSION\r\n\r\nexport PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}\r\nexport LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64\r\n\r\n# nvidia-container-runtime\r\nexport NVIDIA_VISIBLE_DEVICES=all\r\nexport NVIDIA_DRIVER_CAPABILITIES=\"compute,utility\"\r\nexport NVIDIA_REQUIRE_CUDA=\"cuda>=8.0\"\r\n\r\napt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \\\\\r\n rm -rf /var/lib/apt/lists/* && \\\\\r\n NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \\\\\r\n NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \\\\\r\n apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \\\\\r\n apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \\\\\r\n echo \"$NVIDIA_GPGKEY_SUM cudasign.pub\" | sha256sum -c --strict - && rm cudasign.pub && \\\\\r\n echo \"deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /\" > /etc/apt/sources.list.d/cuda.list\r\n\r\napt-get update && apt-get install -y --no-install-recommends \\\\\r\n cuda-nvrtc-$CUDA_PKG_VERSION \\\\\r\n cuda-nvgraph-$CUDA_PKG_VERSION \\\\\r\n cuda-cusolver-$CUDA_PKG_VERSION \\\\\r\n cuda-cublas-8-0=8.0.61.2-1 \\\\\r\n cuda-cufft-$CUDA_PKG_VERSION \\\\\r\n cuda-curand-$CUDA_PKG_VERSION \\\\\r\n cuda-cusparse-$CUDA_PKG_VERSION \\\\\r\n cuda-npp-$CUDA_PKG_VERSION \\\\\r\n cuda-cudart-$CUDA_PKG_VERSION && \\\\\r\n ln -s cuda-8.0 /usr/local/cuda && \\\\\r\n rm -rf /var/lib/apt/lists/*\r\n\r\n# Install tensorflow\r\npip3 install tensorflow-gpu==1.4.0\r\n\r\n# add cudnn 6\r\necho \"deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /\" > /etc/apt/sources.list.d/nvidia-ml.list\r\n\r\nexport CUDNN_VERSION=6.0.21\r\n#LABEL com.nvidia.cudnn.version=\"${CUDNN_VERSION}\"\r\n\r\napt-get update && apt-get install -y --no-install-recommends \\\\\r\n libcudnn6=$CUDNN_VERSION-1+cuda8.0 && \\\\\r\n rm -rf /var/lib/apt/lists/*\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
41+
"metadata": {
42+
"language": "python"
43+
},
44+
"outputs": [],
45+
"execution_count": 4
46+
},
47+
{
48+
"cell_type": "code",
49+
"source": "# List CPU and GPU devices\r\nfrom tensorflow.python.client import device_lib\r\n\r\ndevice_lib.list_local_devices()",
50+
"metadata": {
51+
"language": "python"
52+
},
53+
"outputs": [],
54+
"execution_count": 5
55+
},
56+
{
57+
"cell_type": "code",
58+
"source": "# Fit and evaluate TensorFlow model on MNIST data\r\nimport tensorflow as tf\r\nmnist = tf.keras.datasets.mnist\r\n\r\n(x_train, y_train),(x_test, y_test) = mnist.load_data()\r\nx_train, x_test = x_train / 255.0, x_test / 255.0\r\n\r\nmodel = tf.keras.models.Sequential([\r\n tf.keras.layers.Flatten(input_shape=(28, 28)), # input_shape needed for older tensorflow\r\n tf.keras.layers.Dense(512, activation=tf.nn.relu),\r\n tf.keras.layers.Dropout(0.2),\r\n tf.keras.layers.Dense(10, activation=tf.nn.softmax)\r\n])\r\nmodel.compile(optimizer='adam',\r\n loss='sparse_categorical_crossentropy',\r\n metrics=['accuracy'])\r\n\r\nmodel.fit(x_train, y_train, epochs=5)\r\nprint(\"\\n\")\r\nmetrics = model.evaluate(x_test, y_test)\r\nprint(\"\\n\")\r\nprint(metrics)",
59+
"metadata": {
60+
"language": "python"
61+
},
62+
"outputs": [],
63+
"execution_count": 11
64+
},
65+
{
66+
"cell_type": "code",
67+
"source": "# Check available disk space\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\ndf -h\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
68+
"metadata": {
69+
"language": "python"
70+
},
71+
"outputs": [],
72+
"execution_count": 12
73+
},
74+
{
75+
"cell_type": "code",
76+
"source": "# Download code for the CIFAR 10 benchmark\r\nimport subprocess\r\nimport os\r\n\r\nif os.path.isdir(\"/tmp/models\"):\r\n print(\"CIFAR 10 repo already cloned\")\r\nelse:\r\n stdout = subprocess.check_output(\r\n'''\r\napt-get update && apt-get install -y git\r\ncd /tmp\r\ngit clone https://github.com/tensorflow/models.git\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\n print(stdout)",
77+
"metadata": {
78+
"language": "python"
79+
},
80+
"outputs": [],
81+
"execution_count": 13
82+
},
83+
{
84+
"cell_type": "code",
85+
"source": "# Run the CIFAR 10 benchmark\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\npython3 /tmp/models/tutorials/image/cifar10/cifar10_train.py --max_steps 100\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
86+
"metadata": {
87+
"language": "python"
88+
},
89+
"outputs": [],
90+
"execution_count": 14
91+
}
92+
]
93+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
{
2+
"metadata": {
3+
"kernelspec": {
4+
"name": "pyspark3kernel",
5+
"display_name": "PySpark3"
6+
},
7+
"language_info": {
8+
"name": "pyspark3",
9+
"mimetype": "text/x-python",
10+
"codemirror_mode": {
11+
"name": "python",
12+
"version": 3
13+
},
14+
"pygments_lexer": "python3"
15+
}
16+
},
17+
"nbformat_minor": 2,
18+
"nbformat": 4,
19+
"cells": [
20+
{
21+
"cell_type": "code",
22+
"source": "%%configure -f\r\n{\r\n \"executorMemory\": \"4g\",\r\n \"driverMemory\": \"4g\",\r\n \"executorCores\": 4,\r\n \"driverCores\": 2,\r\n \"numExecutors\": 1\r\n}",
23+
"metadata": {
24+
"language": "python"
25+
},
26+
"outputs": [],
27+
"execution_count": 2
28+
},
29+
{
30+
"cell_type": "code",
31+
"source": "# For informational purposes,\r\n# print the hostname of the container\r\n# where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n \"hostname\",\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
32+
"metadata": {
33+
"language": "python"
34+
},
35+
"outputs": [],
36+
"execution_count": 3
37+
},
38+
{
39+
"cell_type": "code",
40+
"source": "# Check that the CUDA_VERSION environment variable is set.\r\n# Its precise value does not matter: one can install CUDA 9 even if the \r\n# CUDA_VERSION environment variable is set to 8.0.61.\r\nimport os\r\nprint(os.environ['CUDA_VERSION'])",
41+
"metadata": {
42+
"language": "python"
43+
},
44+
"outputs": [],
45+
"execution_count": 4
46+
},
47+
{
48+
"cell_type": "code",
49+
"source": "# Install NVIDIA GPU libraries and TensorFlow for GPU\r\n# in the container where the Spark driver is running\r\n# per https://www.tensorflow.org/install/gpu#ubuntu_1604_cuda_90_for_tensorflow_1130\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\n# Add NVIDIA package repository\r\napt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub\r\n\r\nwget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\nchown _apt cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\nchmod u+rwx cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\napt install ./cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\nwget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\nchown _apt nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\nchmod u+rwx nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\napt install ./nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\napt update\r\n\r\n# Install CUDA and tools. Include optional NCCL 2.x\r\napt install -y cuda9.0 cuda-cublas-9-0 cuda-cufft-9-0 cuda-curand-9-0 \\\\\r\n cuda-cusolver-9-0 cuda-cusparse-9-0 libcudnn7=7.2.1.38-1+cuda9.0 \\\\\r\n libnccl2=2.2.13-1+cuda9.0 cuda-command-line-tools-9-0\r\n\r\n# Optional: Install the TensorRT runtime (must be after CUDA install)\r\napt update\r\napt install libnvinfer4=4.1.2-1+cuda9.0\r\n\r\npip3 install tensorflow-gpu==1.12.0\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
50+
"metadata": {
51+
"language": "python"
52+
},
53+
"outputs": [],
54+
"execution_count": 5
55+
},
56+
{
57+
"cell_type": "code",
58+
"source": "# List CPU and GPU devices\r\nfrom tensorflow.python.client import device_lib\r\n\r\ndevice_lib.list_local_devices()",
59+
"metadata": {
60+
"language": "python"
61+
},
62+
"outputs": [],
63+
"execution_count": 6
64+
},
65+
{
66+
"cell_type": "code",
67+
"source": "# Fit and evaluate TensorFlow model on MNIST data\r\nimport tensorflow as tf\r\nmnist = tf.keras.datasets.mnist\r\n\r\n(x_train, y_train),(x_test, y_test) = mnist.load_data()\r\nx_train, x_test = x_train / 255.0, x_test / 255.0\r\n\r\nmodel = tf.keras.models.Sequential([\r\n tf.keras.layers.Flatten(),\r\n tf.keras.layers.Dense(512, activation=tf.nn.relu),\r\n tf.keras.layers.Dropout(0.2),\r\n tf.keras.layers.Dense(10, activation=tf.nn.softmax)\r\n])\r\nmodel.compile(optimizer='adam',\r\n loss='sparse_categorical_crossentropy',\r\n metrics=['accuracy'])\r\n\r\nmodel.fit(x_train, y_train, epochs=5)\r\nprint(\"\\n\")\r\nmetrics = model.evaluate(x_test, y_test)\r\nprint(\"\\n\")\r\nprint(metrics)",
68+
"metadata": {
69+
"language": "python"
70+
},
71+
"outputs": [],
72+
"execution_count": 7
73+
},
74+
{
75+
"cell_type": "code",
76+
"source": "# Check available disk space\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\ndf -h\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
77+
"metadata": {
78+
"language": "python"
79+
},
80+
"outputs": [],
81+
"execution_count": 8
82+
},
83+
{
84+
"cell_type": "code",
85+
"source": "# Download code for the CIFAR 10 benchmark\r\nimport subprocess\r\nimport os\r\n\r\nif os.path.isdir(\"/tmp/models\"):\r\n print(\"CIFAR 10 repo already cloned\")\r\nelse:\r\n stdout = subprocess.check_output(\r\n'''\r\napt-get update && apt-get install -y git\r\ncd /tmp\r\ngit clone https://github.com/tensorflow/models.git\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\n print(stdout)",
86+
"metadata": {
87+
"language": "python"
88+
},
89+
"outputs": [],
90+
"execution_count": 9
91+
},
92+
{
93+
"cell_type": "code",
94+
"source": "# Run the CIFAR 10 benchmark\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\npython3 /tmp/models/tutorials/image/cifar10/cifar10_train.py --max_steps 100\r\n''',\r\n stderr=subprocess.STDOUT,\r\n shell=True).decode(\"utf-8\")\r\nprint(stdout)",
95+
"metadata": {
96+
"language": "python"
97+
},
98+
"outputs": [],
99+
"execution_count": 10
100+
}
101+
]
102+
}

0 commit comments

Comments
 (0)