Merge pull request #538 from inchiosa/tf-notebooks

uc-msft · web-flow · commit e5295bcc39c1 · 2019-03-15T21:11:52.000-07:00
Add tensorflow gpu notebooks
diff --git a/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/README.md b/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/README.md
@@ -0,0 +1,3 @@
+# TensorFlow on GPUs in SQL Server 2019 big data cluster
+
+The notebooks in this directory illustrate fitting TensorFlow image classification models using GPU acceleration.
diff --git a/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/tf-cuda8.ipynb b/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/tf-cuda8.ipynb
@@ -0,0 +1,93 @@
+{
+    "metadata": {
+        "kernelspec": {
+            "name": "pyspark3kernel",
+            "display_name": "PySpark3"
+        },
+        "language_info": {
+            "name": "pyspark3",
+            "mimetype": "text/x-python",
+            "codemirror_mode": {
+                "name": "python",
+                "version": 3
+            },
+            "pygments_lexer": "python3"
+        }
+    },
+    "nbformat_minor": 2,
+    "nbformat": 4,
+    "cells": [
+        {
+            "cell_type": "code",
+            "source": "%%configure -f\r\n{\r\n    \"executorMemory\": \"4g\",\r\n    \"driverMemory\": \"4g\",\r\n    \"executorCores\": 4,\r\n    \"driverCores\": 2,\r\n    \"numExecutors\": 1\r\n}",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 2
+        },
+        {
+            "cell_type": "code",
+            "source": "# For informational purposes,\r\n# print the hostname of the container\r\n# where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n    \"hostname\",\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 3
+        },
+        {
+            "cell_type": "code",
+            "source": "# Install NVIDIA GPU libraries and TensorFlow for GPU\r\n# in the container where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\necho $CUDA_VERSION\r\nexport CUDA_PKG_VERSION=\"8-0=$CUDA_VERSION-1\"\r\necho $CUDA_PKG_VERSION\r\n\r\nexport PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}\r\nexport LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64\r\n\r\n# nvidia-container-runtime\r\nexport NVIDIA_VISIBLE_DEVICES=all\r\nexport NVIDIA_DRIVER_CAPABILITIES=\"compute,utility\"\r\nexport NVIDIA_REQUIRE_CUDA=\"cuda>=8.0\"\r\n\r\napt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \\\\\r\n    rm -rf /var/lib/apt/lists/* && \\\\\r\n    NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \\\\\r\n    NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \\\\\r\n    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \\\\\r\n    apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \\\\\r\n    echo \"$NVIDIA_GPGKEY_SUM  cudasign.pub\" | sha256sum -c --strict - && rm cudasign.pub && \\\\\r\n    echo \"deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /\" > /etc/apt/sources.list.d/cuda.list\r\n\r\napt-get update && apt-get install -y --no-install-recommends \\\\\r\n        cuda-nvrtc-$CUDA_PKG_VERSION \\\\\r\n        cuda-nvgraph-$CUDA_PKG_VERSION \\\\\r\n        cuda-cusolver-$CUDA_PKG_VERSION \\\\\r\n        cuda-cublas-8-0=8.0.61.2-1 \\\\\r\n        cuda-cufft-$CUDA_PKG_VERSION \\\\\r\n        cuda-curand-$CUDA_PKG_VERSION \\\\\r\n        cuda-cusparse-$CUDA_PKG_VERSION \\\\\r\n        cuda-npp-$CUDA_PKG_VERSION \\\\\r\n        cuda-cudart-$CUDA_PKG_VERSION && \\\\\r\n    ln -s cuda-8.0 /usr/local/cuda && \\\\\r\n    rm -rf /var/lib/apt/lists/*\r\n\r\n# Install tensorflow\r\npip3 install tensorflow-gpu==1.4.0\r\n\r\n# add cudnn 6\r\necho \"deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /\" > /etc/apt/sources.list.d/nvidia-ml.list\r\n\r\nexport CUDNN_VERSION=6.0.21\r\n#LABEL com.nvidia.cudnn.version=\"${CUDNN_VERSION}\"\r\n\r\napt-get update && apt-get install -y --no-install-recommends \\\\\r\n    libcudnn6=$CUDNN_VERSION-1+cuda8.0 && \\\\\r\n    rm -rf /var/lib/apt/lists/*\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 4
+        },
+        {
+            "cell_type": "code",
+            "source": "# List CPU and GPU devices\r\nfrom tensorflow.python.client import device_lib\r\n\r\ndevice_lib.list_local_devices()",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 5
+        },
+        {
+            "cell_type": "code",
+            "source": "# Fit and evaluate TensorFlow model on MNIST data\r\nimport tensorflow as tf\r\nmnist = tf.keras.datasets.mnist\r\n\r\n(x_train, y_train),(x_test, y_test) = mnist.load_data()\r\nx_train, x_test = x_train / 255.0, x_test / 255.0\r\n\r\nmodel = tf.keras.models.Sequential([\r\n  tf.keras.layers.Flatten(input_shape=(28, 28)),  # input_shape needed for older tensorflow\r\n  tf.keras.layers.Dense(512, activation=tf.nn.relu),\r\n  tf.keras.layers.Dropout(0.2),\r\n  tf.keras.layers.Dense(10, activation=tf.nn.softmax)\r\n])\r\nmodel.compile(optimizer='adam',\r\n              loss='sparse_categorical_crossentropy',\r\n              metrics=['accuracy'])\r\n\r\nmodel.fit(x_train, y_train, epochs=5)\r\nprint(\"\\n\")\r\nmetrics = model.evaluate(x_test, y_test)\r\nprint(\"\\n\")\r\nprint(metrics)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 11
+        },
+        {
+            "cell_type": "code",
+            "source": "# Check available disk space\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\ndf -h\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 12
+        },
+        {
+            "cell_type": "code",
+            "source": "# Download code for the CIFAR 10 benchmark\r\nimport subprocess\r\nimport os\r\n\r\nif os.path.isdir(\"/tmp/models\"):\r\n    print(\"CIFAR 10 repo already cloned\")\r\nelse:\r\n    stdout = subprocess.check_output(\r\n'''\r\napt-get update && apt-get install -y git\r\ncd /tmp\r\ngit clone https://github.com/tensorflow/models.git\r\n''',\r\n        stderr=subprocess.STDOUT,\r\n        shell=True).decode(\"utf-8\")\r\n    print(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 13
+        },
+        {
+            "cell_type": "code",
+            "source": "# Run the CIFAR 10 benchmark\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\npython3 /tmp/models/tutorials/image/cifar10/cifar10_train.py --max_steps 100\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 14
+        }
+    ]
+}
diff --git a/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/tf-cuda9.ipynb b/samples/features/sql-big-data-cluster/machine-learning/spark/tensorflow/tf-cuda9.ipynb
@@ -0,0 +1,102 @@
+{
+    "metadata": {
+        "kernelspec": {
+            "name": "pyspark3kernel",
+            "display_name": "PySpark3"
+        },
+        "language_info": {
+            "name": "pyspark3",
+            "mimetype": "text/x-python",
+            "codemirror_mode": {
+                "name": "python",
+                "version": 3
+            },
+            "pygments_lexer": "python3"
+        }
+    },
+    "nbformat_minor": 2,
+    "nbformat": 4,
+    "cells": [
+        {
+            "cell_type": "code",
+            "source": "%%configure -f\r\n{\r\n    \"executorMemory\": \"4g\",\r\n    \"driverMemory\": \"4g\",\r\n    \"executorCores\": 4,\r\n    \"driverCores\": 2,\r\n    \"numExecutors\": 1\r\n}",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 2
+        },
+        {
+            "cell_type": "code",
+            "source": "# For informational purposes,\r\n# print the hostname of the container\r\n# where the Spark driver is running\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n    \"hostname\",\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 3
+        },
+        {
+            "cell_type": "code",
+            "source": "# Check that the CUDA_VERSION environment variable is set.\r\n# Its precise value does not matter: one can install CUDA 9 even if the \r\n# CUDA_VERSION environment variable is set to 8.0.61.\r\nimport os\r\nprint(os.environ['CUDA_VERSION'])",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 4
+        },
+        {
+            "cell_type": "code",
+            "source": "# Install NVIDIA GPU libraries and TensorFlow for GPU\r\n# in the container where the Spark driver is running\r\n# per https://www.tensorflow.org/install/gpu#ubuntu_1604_cuda_90_for_tensorflow_1130\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\n# Add NVIDIA package repository\r\napt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub\r\n\r\nwget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\nchown _apt cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\nchmod u+rwx cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\napt install ./cuda-repo-ubuntu1604_9.1.85-1_amd64.deb\r\n\r\nwget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\nchown _apt nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\nchmod u+rwx nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\napt install ./nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb\r\n\r\napt update\r\n\r\n# Install CUDA and tools. Include optional NCCL 2.x\r\napt install -y cuda9.0 cuda-cublas-9-0 cuda-cufft-9-0 cuda-curand-9-0 \\\\\r\n    cuda-cusolver-9-0 cuda-cusparse-9-0 libcudnn7=7.2.1.38-1+cuda9.0 \\\\\r\n    libnccl2=2.2.13-1+cuda9.0 cuda-command-line-tools-9-0\r\n\r\n# Optional: Install the TensorRT runtime (must be after CUDA install)\r\napt update\r\napt install libnvinfer4=4.1.2-1+cuda9.0\r\n\r\npip3 install tensorflow-gpu==1.12.0\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 5
+        },
+        {
+            "cell_type": "code",
+            "source": "# List CPU and GPU devices\r\nfrom tensorflow.python.client import device_lib\r\n\r\ndevice_lib.list_local_devices()",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 6
+        },
+        {
+            "cell_type": "code",
+            "source": "# Fit and evaluate TensorFlow model on MNIST data\r\nimport tensorflow as tf\r\nmnist = tf.keras.datasets.mnist\r\n\r\n(x_train, y_train),(x_test, y_test) = mnist.load_data()\r\nx_train, x_test = x_train / 255.0, x_test / 255.0\r\n\r\nmodel = tf.keras.models.Sequential([\r\n  tf.keras.layers.Flatten(),\r\n  tf.keras.layers.Dense(512, activation=tf.nn.relu),\r\n  tf.keras.layers.Dropout(0.2),\r\n  tf.keras.layers.Dense(10, activation=tf.nn.softmax)\r\n])\r\nmodel.compile(optimizer='adam',\r\n              loss='sparse_categorical_crossentropy',\r\n              metrics=['accuracy'])\r\n\r\nmodel.fit(x_train, y_train, epochs=5)\r\nprint(\"\\n\")\r\nmetrics = model.evaluate(x_test, y_test)\r\nprint(\"\\n\")\r\nprint(metrics)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 7
+        },
+        {
+            "cell_type": "code",
+            "source": "# Check available disk space\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\ndf -h\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 8
+        },
+        {
+            "cell_type": "code",
+            "source": "# Download code for the CIFAR 10 benchmark\r\nimport subprocess\r\nimport os\r\n\r\nif os.path.isdir(\"/tmp/models\"):\r\n    print(\"CIFAR 10 repo already cloned\")\r\nelse:\r\n    stdout = subprocess.check_output(\r\n'''\r\napt-get update && apt-get install -y git\r\ncd /tmp\r\ngit clone https://github.com/tensorflow/models.git\r\n''',\r\n        stderr=subprocess.STDOUT,\r\n        shell=True).decode(\"utf-8\")\r\n    print(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 9
+        },
+        {
+            "cell_type": "code",
+            "source": "# Run the CIFAR 10 benchmark\r\nimport subprocess\r\n\r\nstdout = subprocess.check_output(\r\n'''\r\npython3 /tmp/models/tutorials/image/cifar10/cifar10_train.py --max_steps 100\r\n''',\r\n    stderr=subprocess.STDOUT,\r\n    shell=True).decode(\"utf-8\")\r\nprint(stdout)",
+            "metadata": {
+                "language": "python"
+            },
+            "outputs": [],
+            "execution_count": 10
+        }
+    ]
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# TensorFlow on GPUs in SQL Server 2019 big data cluster`
	`2`	`+`
	`3`	`+The notebooks in this directory illustrate fitting TensorFlow image classification models using GPU acceleration.`