# MNIST

## Читаем данные

In [None]:
import numpy as np,tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
sns.set_palette("colorblind")
palette = sns.color_palette()
figsize = (10,10)
legend_fontsize = 16

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

## Логистическая регрессия

In [None]:
x = tf.placeholder(tf.float32, [None, 784]) 
y_ = tf.placeholder(tf.float32, [None, 10])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

y = tf.nn.softmax(tf.matmul(x, W) + b)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())

In [None]:
for i in range(1001):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
    if i % 50 == 0:
        print("[%d]\train accuracy=%.6f\ttest accuracy=%.6f" % (i,
            sess.run(accuracy, feed_dict={x: mnist.train.images, y_: mnist.train.labels}),
            sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
        ))

## Двухуровневая сеть с ReLU

In [None]:
x = tf.placeholder(tf.float32, [None, 784]) 
y_ = tf.placeholder(tf.float32, [None, 10])

W_relu = tf.Variable(tf.truncated_normal([784, 2000], stddev=0.1))
b_relu = tf.Variable(tf.truncated_normal([2000], stddev=0.1))

W = tf.Variable(tf.zeros([2000, 10]))
b = tf.Variable(tf.zeros([10]))

h = tf.nn.relu(tf.matmul(x, W_relu) + b_relu)

logit = tf.matmul(h, W) + b
y = tf.nn.softmax(logit)

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit, y_))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())

In [None]:
for i in range(1001):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
    if i % 50 == 0:
        print("[%d]\ttest accuracy=%.6f\ttrain accuracy=%.6f" % (i,
            sess.run(accuracy, feed_dict={x: mnist.train.images, y_: mnist.train.labels}),
            sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
        ))

## Добавим дропаут

In [None]:
x = tf.placeholder(tf.float32, [None, 784]) 
y_ = tf.placeholder(tf.float32, [None, 10])

W_relu = tf.Variable(tf.truncated_normal([784, 200], stddev=0.1))
b_relu = tf.Variable(tf.truncated_normal([200], stddev=0.1))

W = tf.Variable(tf.zeros([200, 10]))
b = tf.Variable(tf.zeros([10]))

h = tf.nn.relu(tf.matmul(x, W_relu) + b_relu)

keep_probability = tf.placeholder(tf.float32)
h_drop = tf.nn.dropout(h, keep_probability)

logit = tf.matmul(h_drop, W) + b
y = tf.nn.softmax(logit)

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logit, y_))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())

In [None]:
for i in range(1001):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys, keep_probability : 0.5})
    if i % 50 == 0:
        print("[%d]\ttest accuracy=%.6f\ttrain accuracy=%.6f" % (i,
            sess.run(accuracy, feed_dict={x: mnist.train.images, y_: mnist.train.labels, keep_probability : 1.0}),
            sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_probability : 1.0})
        ))

# MNIST на Keras

## Предобработка

In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.utils import np_utils
from keras import callbacks
from keras import optimizers, regularizers
tf.python.control_flow_ops = tf

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
X_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2])
X_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

batch_size = 100

In [None]:
X_train.shape

## Логистическая регрессия

In [None]:
model = Sequential()
model.add(Dense(10, input_shape=(784,)))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.5)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=20,
          verbose=1, validation_data=(X_test, Y_test),
         callbacks=[callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1),
#                    ModelCheckpoint(kfold_weights_path, monitor='val_loss', save_best_only=True, verbose=0)
                   ])

In [None]:
model = Sequential()
model.add(Dense(10, input_shape=(784,)))
model.add(Activation('softmax'))
sgd = optimizers.SGD(lr=0.5)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test),
         callbacks=[callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)])

## Сеть с одним скрытым слоем

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('sigmoid'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

## Что если добавить слоёв?

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,), W_regularizer=regularizers.l2(0.001)))
model.add(Activation('relu'))
model.add(Dense(200, input_shape=(784,), W_regularizer=regularizers.l2(0.001)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test),
         callbacks=[callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)])

## Сеть с дропаутом

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

## Правильная инициализация

In [None]:
model = Sequential()
model.add(Dense(200, init='he_uniform', input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, init='glorot_uniform', input_shape=(784,)))
model.add(Activation('sigmoid'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

## Сеть с нормализацией по мини-батчам

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

## Сравним оптимизаторы на сети с одним скрытым слоем ReLU

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [None]:
sgd = optimizers.SGD(lr=0.1)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.SGD(lr=0.1, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.Adagrad()
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.Adadelta()
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

In [None]:
model = Sequential()
model.add(Dense(200, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10,
          verbose=1, validation_data=(X_test, Y_test))

# Соберём всё вместе

In [None]:
model = Sequential()
model.add(Dense(200, init='he_uniform', input_shape=(784,)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = optimizers.Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=20,
          verbose=1, validation_data=(X_test, Y_test),
         callbacks=[callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)])