In [1]:
import numpy as np
from tensorflow import keras

In [7]:
with open("reviews.txt") as f:
    data = " ".join([ x.strip() for x in f ])

In [13]:
chars = set(data)

In [15]:
chars_ind = { x : i for i,x in enumerate(chars) }

In [73]:
chars_array = [' '] * len(chars)
for x,i in chars_ind.items():
    chars_array[i] = x

In [19]:
X = np.zeros( (len(data), len(chars)) )
for i, ch in enumerate(data):
    X[i, chars_ind[ch]] = 1

In [20]:
from keras.models import Sequential
from keras.layers.recurrent import LSTM

In [39]:
def get_minibatch(start, size):
    X_data = np.zeros( (size, 100, len(chars)) )
    for i in range(size):
        for j in range(100):
            X_data[i, j, chars_ind[data[ start+i+j ]]] = 1
    return X_data

def get_target(start, size):
    y_data = np.zeros( (size, len(chars)) )
    for i in range(size):
        y_data[ i, chars_ind[data[start+i+100]] ] = 1
    return y_data

In [41]:
get_target(0, 10).shape

(10, 28)

In [50]:
model = Sequential()
model.add(LSTM(128, input_shape=(100, len(chars)), return_sequences=False))

In [46]:
from keras.layers import TimeDistributed, Activation, Dense

In [51]:
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

In [52]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 128)               80384     
_________________________________________________________________
dense_2 (Dense)              (None, 28)                3612      
_________________________________________________________________
activation_2 (Activation)    (None, 28)                0         
Total params: 83,996
Trainable params: 83,996
Non-trainable params: 0
_________________________________________________________________


In [53]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [90]:
def data_gen():
    iStart, batch_size = 0, 64
    while True:
        yield (get_minibatch(iStart, batch_size), get_target(iStart, batch_size))
        iStart += batch_size
        if iStart > len(data) - 110:
            iStart = 0

In [None]:
model.fit_generator(data_gen(), steps_per_epoch=1000, epochs=20)

Epoch 1/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc8d40db8d0>

In [None]:
x_init = get_minibatch(0, 1)[0]
s = data[0:100]
for i in range(500):
    probs = model.predict_proba(np.array([x_init]), verbose=False)
    next_char = np.random.choice(len(chars), p=probs[0])
    s += chars_array[ next_char ]
    for j in range(99):
        x_init[j] = x_init[j+1]
    x_init[-1] = np.zeros(len(chars))
    x_init[-1, next_char] = 1
print(s)

In [80]:
np.array([x_init]).shape

(1, 99, 28)