3.4 手势识别

问题描述

识别一个人出的是剪刀,石头还是布。

数据预处理

图片下载

!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/rps.zip \
    -O /tmp/rps.zip

!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/rps-test-set.zip \
    -O /tmp/rps-test-set.zip

解压

import os
import zipfile

local_zip = '/tmp/rps.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/')
zip_ref.close()

local_zip = '/tmp/rps-test-set.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/')
zip_ref.close()
rock_dir = os.path.join('/tmp/rps/rock')
paper_dir = os.path.join('/tmp/rps/paper')
scissors_dir = os.path.join('/tmp/rps/scissors')

print('total training rock images:', len(os.listdir(rock_dir)))
print('total training paper images:', len(os.listdir(paper_dir)))
print('total training scissors images:', len(os.listdir(scissors_dir)))

rock_files = os.listdir(rock_dir)
print(rock_files[:10])

paper_files = os.listdir(paper_dir)
print(paper_files[:10])

scissors_files = os.listdir(scissors_dir)
print(scissors_files[:10])
total training rock images: 840
total training paper images: 840
total training scissors images: 840
['rock07-k03-096.png', 'rock03-059.png', 'rock03-058.png', 'rock01-098.png', 'rock05ck01-034.png', 'rock07-k03-056.png', 'rock03-024.png', 'rock06ck02-051.png', 'rock06ck02-080.png', 'rock03-092.png']
['paper03-061.png', 'paper06-071.png', 'paper02-098.png', 'paper06-006.png', 'paper01-065.png', 'paper01-118.png', 'paper02-000.png', 'paper05-043.png', 'paper04-054.png', 'paper04-109.png']
['scissors03-041.png', 'scissors02-000.png', 'testscissors03-039.png', 'scissors02-036.png', 'scissors04-006.png', 'scissors04-010.png', 'testscissors01-012.png', 'scissors01-098.png', 'scissors04-077.png', 'scissors04-112.png']

查看一下素材(各两张)

%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

pic_index = 2

next_rock = [os.path.join(rock_dir, fname) 
                for fname in rock_files[pic_index-2:pic_index]]
next_paper = [os.path.join(paper_dir, fname) 
                for fname in paper_files[pic_index-2:pic_index]]
next_scissors = [os.path.join(scissors_dir, fname) 
                for fname in scissors_files[pic_index-2:pic_index]]

for i, img_path in enumerate(next_rock+next_paper+next_scissors):
  #print(img_path)
  img = mpimg.imread(img_path)
  plt.imshow(img)
  plt.axis('Off')
  plt.show()

预处理

import tensorflow as tf
import keras_preprocessing
from keras_preprocessing import image
from keras_preprocessing.image import ImageDataGenerator

TRAINING_DIR = "/tmp/rps/"
training_datagen = ImageDataGenerator(
      rescale = 1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

VALIDATION_DIR = "/tmp/rps-test-set/"
validation_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = training_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(150,150),
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(150,150),
    class_mode='categorical'
)

模型定义及训练

model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

history = model.fit_generator(train_generator, epochs=5, validation_data = validation_generator, verbose = 1)

model.save("rps.h5")
Epoch 1/5
79/79 [==============================] - 84s 1s/step - loss: 1.1976 - accuracy: 0.3738 - val_loss: 0.9343 - val_accuracy: 0.5780
Epoch 2/5
79/79 [==============================] - 84s 1s/step - loss: 0.9056 - accuracy: 0.5813 - val_loss: 0.3923 - val_accuracy: 0.8333
Epoch 3/5
79/79 [==============================] - 84s 1s/step - loss: 0.5751 - accuracy: 0.7556 - val_loss: 0.2332 - val_accuracy: 0.8898
Epoch 4/5
79/79 [==============================] - 84s 1s/step - loss: 0.3748 - accuracy: 0.8484 - val_loss: 0.0647 - val_accuracy: 0.9704
Epoch 5/5
79/79 [==============================] - 83s 1s/step - loss: 0.2706 - accuracy: 0.8948 - val_loss: 0.0668 - val_accuracy: 0.9812

分析

import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

上传图片测试

import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():   
# predicting images  
path = fn  
img = image.load_img(path, target_size=(150, 150))  
x = image.img_to_array(img)  
x = np.expand_dims(x, axis=0)  
images = np.vstack([x])  
classes = model.predict(images, batch_size=10)
print(fn)  
print(classes)