Internet Movie DataBase (IMDB) Data: Bidirectional Long Short-Term Memory (LSTM)

In [1]:
'''Train a Bidirectional LSTM on the IMDB sentiment classification task.

Output after 4 epochs on CPU: ~0.8146
Time per epoch on CPU (Core i7): ~150s.
'''

from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb
Using CNTK backend
In [2]:
max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100
batch_size = 32
In [3]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)
Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)
In [4]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
          validation_data=[x_test, y_test])
Train...
Train on 25000 samples, validate on 25000 samples
Epoch 1/4
25000/25000 [==============================] - 130s - loss: 0.4245 - acc: 0.8007 - val_loss: 0.3373 - val_acc: 0.8521
Epoch 2/4
25000/25000 [==============================] - 130s - loss: 0.2260 - acc: 0.9120 - val_loss: 0.3633 - val_acc: 0.8440
Epoch 3/4
25000/25000 [==============================] - 130s - loss: 0.1358 - acc: 0.9512 - val_loss: 0.5835 - val_acc: 0.8352
Epoch 4/4
25000/25000 [==============================] - 129s - loss: 0.0932 - acc: 0.9674 - val_loss: 0.5153 - val_acc: 0.8354
Out[4]:
<keras.callbacks.History at 0x7f7c0ad3c390>
In [ ]: