本次要預測的類別一共有以下六種RS: rolled-in scalePa: patchesCr: crazingPS: pitted surfaceIn: inclusionSc: scratches # 下載訓練資料並解壓縮 %mkdir data %mkdir models %cd data !wget https://www.dropbox.com/s/tv08497wvg40ach/NEU-CLS%20%281%29.rar !mv "NEU-CLS (1).rar" NEU-CLS.rar !pip install patool import patoolib patoolib.extract_archive("NEU-CLS.rar", outdir="./") # 最基本的幾個套件,基本上一定要import import numpy as np import time import tensorflow as tf from tensorflow import keras # 跟「繪圖」有關的套件,如果沒有打算繪圖就不用 import %matplotlib inline import matplotlib.pyplot as plt # 跟「Preprocess」有關的套件,如果沒有要訓練模型的話就不用 import from tensorflow.keras.utils import to_categorical from tensorflow.keras.preprocessing.image import ImageDataGenerator # 跟「設計模型」有關的套件,如果沒有要設計或是修改模型的話就不用 import from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, MaxPooling2D # 跟「編譯(compile)模型」有關的套件 from tensorflow.keras.optimizers import Adam, SGD from tensorflow.keras.metrics import categorical_crossentropy from tensorflow.keras.callbacks import ModelCheckpoint, Callback # 跟「混淆矩陣(confusion matrix)」有關的套件 from sklearn.metrics import confusion_matrix import itertools # 繪圖相關 from PIL import Image import random # 定義必要資訊 IMAGE_NUMBER = 300 type_list = ['RS','Pa','Cr','PS','In','Sc'] file_path = "/content/data/NEU-CLS/" # 打亂資料順序 def unison_shuffled_copies(a, b): assert len(a) == len(b) c = list(zip(a, b)) random.shuffle(c) a, b = zip(*c) return a, b # 查看訓練圖片 def view_image(image_data, title1, title2=None): if title2 is None: num = min(len(image_data), len(title1), 9) else: num = min(len(image_data), len(title1), len(title2), 9) plt.figure(figsize=(14,14)) for i in range(num): plt.subplot(3, 3, i+1) plt.imshow(image_data[i]) plt.colorbar(orientation='vertical') if title2 is None: plt.title(str(title1[i])) else: plt.title(str(title1[i]) + ' | ' + str(title2[i])) # python內載入資料 def data_loader(path, test_set, shuffle=True): raw_train_x = [] raw_train_y = [] raw_test_x = [] raw_test_y = [] plot_data = [] # with zipfile.ZipFile(path, 'r') as myzip: if True: for t_index, t in enumerate(type_list): temp_x, temp_y=[],[] for i in range(IMAGE_NUMBER): # with myzip.open('NEU-CLS/' + t + '_' + str(i+1) + '.bmp') as myfile: myfile = path + t + '_' + str(i+1) + '.bmp' img = Image.open(myfile) ary = np.array(img) temp_x.append(ary) temp_y.append(t_index) if i==0: plot_data.append(ary) # imgplot = plt.imshow(img) # plt.colorbar() shuffled_temp_x, shuffled_temp_y = unison_shuffled_copies(temp_x, temp_y) test_size = int(IMAGE_NUMBER*test_set + 0.5) raw_train_x += shuffled_temp_x[test_size:] raw_train_y += shuffled_temp_y[test_size:] raw_test_x += shuffled_temp_x[:test_size] raw_test_y += shuffled_temp_y[:test_size] shuffled_raw_train_x, shuffled_raw_train_y = unison_shuffled_copies(raw_train_x, raw_train_y) train_x = np.array(shuffled_raw_train_x) train_y = np.array(shuffled_raw_train_y) test_x = np.array(raw_test_x) test_y = np.array(raw_test_y) view_image(plot_data, type_list) return train_x, train_y, test_x, test_y # python內載入資料 raw_x_train, raw_y_train, raw_x_test, raw_y_test = data_loader(file_path, 0.2) # 查看資料集大小 print(raw_x_train.shape) print(raw_x_test.shape) # 設定訓練環境 physical_devices = tf.config.experimental.list_physical_devices('GPU') print(" Number of GPUs available: " , len(physical_devices)) tf.config.experimental.set_memory_growth(physical_devices[0], True) Functions # 畫出訓練的訓練的loss下降/acc上升圖 def show_train_history(train_history): fig , ax = plt.subplots() fig.subplots_adjust(hspace=0.4, wspace=0.4) #設定子圖的間隔 fig.set_figwidth(14) ax1 = plt.subplot(1, 2, 1) plt.title("Accuracy") plt.plot(train_history.history['accuracy'],'-', label='accuracy') plt.plot(train_history.history['val_accuracy'],'-', label='val_accuracy') leg = ax1.legend(loc='lower right') plt.ylabel('accuracy') plt.xlabel('Epoch') ax2 = plt.subplot(1, 2, 2) plt.title("Loss") plt.plot(train_history.history['loss'],'-', label='loss') plt.plot(train_history.history['val_loss'],'-', label='val_loss') leg = ax2.legend(loc='upper right') plt.ylabel('loss') plt.xlabel('Epoch') plt.show() # 畫confusion matrix def plot_confusion_matrix( test_labels, results, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues): cm = confusion_matrix(test_labels, results) plt.figure() plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] thresh = cm.max() / 2 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') # 測量時間工具 class TimeHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.times = [] def on_epoch_begin(self, batch, logs={}): self.epoch_time_start = time.time() def on_epoch_end(self, batch, logs={}): self.times.append(time.time() - self.epoch_time_start) Setting and Preprocess # add channel x_train_reshaped = raw_x_train.reshape(1440,200,200,1) x_test_reshaped = raw_x_test.reshape(360,200,200,1) x_train_normalized = x_train_reshaped / 255 x_test_normalized = x_test_reshaped / 255 y_train_onehot = to_categorical(raw_y_train) y_test_onehot = to_categorical(raw_y_test) print(x_train_normalized.shape) print(x_test_normalized.shape) model1 = Sequential([ Conv2D(filters=32, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last", input_shape=(200,200,1)), Conv2D(filters=32, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Conv2D(filters=64, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last"), Conv2D(filters=64, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Conv2D(filters=128, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last"), Conv2D(filters=128, kernel_size=(5,5), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Flatten(), Dense(512, activation="relu"), Dropout(0.5), Dense(256, activation="relu"), Dropout(0.5), Dense(6, activation='softmax'), ]) # 定義模型,可以改改看 model = Sequential([ Conv2D(filters=32, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last", input_shape=(200,200,1)), Conv2D(filters=32, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Conv2D(filters=32, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), Conv2D(filters=32, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same", data_format="channels_last"), MaxPooling2D(pool_size=(2,2), data_format="channels_last"), Flatten(), Dense(512, activation="relu"), Dropout(0.3), Dense(6, activation='softmax'), ]) model.summary() # 定義訓練方式 # loss function: 交叉熵 # optimizer: Adam # 評估模型: 準確率 model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=5e-5), metrics=['accuracy']) # 開始訓練 filepath="/content/models/weights-{epoch:02d}-{val_accuracy:.2f}.hdf5" time_callback = TimeHistory() checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint, time_callback] # TODOS # train_history = model.fit( <訓練資料input> , <訓練資料answer>, validation_split=, epochs=?, batch_size=?, callbacks=callbacks_list, verbose=2) # 儲存模型 model.save("/content/models/") # 看看訓練的怎麼樣了 show_train_history(train_history) # 用用testing set幫模型算分數 score = model.evaluate(x_test_normalized, y_test_onehot) print("Accuracy: ", score[1]*100) # 畫confusion matrix RESULTS = model.predict(x_test_normalized) RESULTS = np.argmax(RESULTS,axis=1) plot_confusion_matrix(raw_y_test, RESULTS, type_list) 📎project.ipynb