Deploying and Using TensorFlow in a CCE Cluster
Preparing Resources
- Create a CCE cluster and GPU nodes, and use the gpu-beta add-on to install the graphics card driver.
- Add an object storage volume to the cluster.
Pre-configuring Data
Download data from https://github.com/zalandoresearch/fashion-mnist.
Obtain the TensorFlow machine learning (ML) example and modify it based on your requirements.
basicClass.py
# TensorFlow and tf.keras import tensorflow as tf from tensorflow import keras # Helper libraries import numpy as np import gzip from tensorflow.python.keras.utils import get_file import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt print(tf.__version__) #fashion_mnist = keras.datasets.fashion_mnist #(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() def load_data(): base = "file:////home/data/" files = [ 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' ] paths = [] for fname in files: paths.append(get_file(fname, origin=base + fname)) with gzip.open(paths[0], 'rb') as lbpath: y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[1], 'rb') as imgpath: x_train = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) with gzip.open(paths[2], 'rb') as lbpath: y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[3], 'rb') as imgpath: x_test = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) return (x_train, y_train), (x_test, y_test) (train_images, train_labels), (test_images, test_labels) = load_data() class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] plt.figure() plt.imshow(train_images[0]) plt.colorbar() plt.grid(False) plt.savefig('/home/img/basicimg1.png') train_images = train_images / 255.0 test_images = test_images / 255.0 plt.figure(figsize=(10,10)) for i in range(25): plt.subplot(5,5,i+1) plt.xticks([]) plt.yticks([]) plt.grid(False) plt.imshow(train_images[i], cmap=plt.cm.binary) plt.xlabel(class_names[train_labels[i]]) plt.savefig('/home/img/basicimg2.png') model = keras.Sequential([ keras.layers.Flatten(input_shape=(28, 28)), keras.layers.Dense(128, activation=tf.nn.relu), keras.layers.Dense(10, activation=tf.nn.softmax) ]) model.compile(optimizer=tf.train.AdamOptimizer(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(train_images, train_labels, epochs=5) test_loss, test_acc = model.evaluate(test_images, test_labels) print('Test accuracy:', test_acc) predictions = model.predict(test_images) def plot_image(i, predictions_array, true_label, img): predictions_array, true_label, img = predictions_array[i], true_label[i], img[i] plt.grid(False) plt.xticks([]) plt.yticks([]) plt.imshow(img, cmap=plt.cm.binary) predicted_label = np.argmax(predictions_array) if predicted_label == true_label: color = 'blue' else: color = 'red' plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label], 100*np.max(predictions_array), class_names[true_label]), color=color) def plot_value_array(i, predictions_array, true_label): predictions_array, true_label = predictions_array[i], true_label[i] plt.grid(False) plt.xticks([]) plt.yticks([]) thisplot = plt.bar(range(10), predictions_array, color="#777777") plt.ylim([0, 1]) predicted_label = np.argmax(predictions_array) thisplot[predicted_label].set_color('red') thisplot[true_label].set_color('blue') i = 0 plt.figure(figsize=(6,3)) plt.subplot(1,2,1) plot_image(i, predictions, test_labels, test_images) plt.subplot(1,2,2) plot_value_array(i, predictions, test_labels) plt.savefig('/home/img/basicimg3.png') i = 12 plt.figure(figsize=(6,3)) plt.subplot(1,2,1) plot_image(i, predictions, test_labels, test_images) plt.subplot(1,2,2) plot_value_array(i, predictions, test_labels) plt.savefig('/home/img/basicimg4.png') # Plot the first X test images, their predicted label, and the true label # Color correct predictions in blue, incorrect predictions in red num_rows = 5 num_cols = 3 num_images = num_rows*num_cols plt.figure(figsize=(2*2*num_cols, 2*num_rows)) for i in range(num_images): plt.subplot(num_rows, 2*num_cols, 2*i+1) plot_image(i, predictions, test_labels, test_images) plt.subplot(num_rows, 2*num_cols, 2*i+2) plot_value_array(i, predictions, test_labels) plt.savefig('/home/img/basicimg5.png')
Go to the OBS bucket page, create the data and img folders, and upload basicClass.py.
Go to the data folder and upload the four .gz files downloaded from GitHub.
ML Example
In this section, the ML example from the TensorFlow official website is used. For details, see https://www.tensorflow.org/tutorials/keras/classification?hl=en-us.
Create a job using the third-party tensorflow/tensorflow:1.15.5-gpu. Set the container specifications.
Add pip install matplotlib;python /home/basicClass.py in the Start Command area.
Mount the created OBS volume.
Click Create. Wait until the job execution is complete. On the OBS page, you can view the execution results that are shown as images.
If you want to use kubectl, you can use the following example YAML:
kind: Job apiVersion: batch/v1 metadata: name: testjob namespace: default spec: parallelism: 1 completions: 1 backoffLimit: 6 template: metadata: name: testjob spec: volumes: - name: cce-obs-tensorflow persistentVolumeClaim: claimName: cce-obs-tensorflow containers: - name: container-0 image: 'tensorflow/tensorflow:1.15.5-gpu' restartPolicy: OnFailure command: - /bin/bash args: - '-c' - pip install matplotlib;python /home/basicClass.py resources: limits: cpu: '2' memory: 4Gi nvidia.com/gpu: '1' requests: cpu: '2' memory: 4Gi nvidia.com/gpu: '1' volumeMounts: - name: cce-obs-tensorflow mountPath: /home imagePullPolicy: IfNotPresent imagePullSecrets: - name: default-secret
Feedback
Was this page helpful?
Provide feedbackThank you very much for your feedback. We will continue working to improve the documentation.See the reply and handling status in My Cloud VOC.
For any further questions, feel free to contact us through the chatbot.
Chatbot