from keras.layers.core import TimeDistributedDense, Activation
from keras.layers.recurrent import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random
#if you need help installing keras, cuda or running this code let me know:
#djhill715@gmail.com
# Hope is the thing with feathers, Emily Dickenson
print 'Processing Poem'
poem = 'Hope is the thing with feathers // That perches in the soul, // And sings the tune without the words, // And never stops at all, // // And sweetest in the gale is heard; // And sore must be the storm // That could abash the little bird // That kept so many warm. // // I\'ve heard it in the chillest land, // And on the strangest sea; // Yet, never, in extremity, // It asked a crumb of me.'
#break poem into words
poem = poem.split(' ')
#build a dictionary
wordList = []
for word in poem:
wordList.append(word)
#list(set()) is slow for large datasets, use bloom filters for those
wordList = list(set(wordList))
#insert system start token into dictionary
wordList.insert(0, '#START#')
#convert words into one-hot tokens
wordtoix = {}
ixtoword = {}
for i, word in enumerate(wordList):
wordtoix[word] = i
ixtoword[i] = word
#construct the input numpy arrays for the model
print 'Building Training Data'
#array has shape of num_seq X num_time_steps X dimensionality of features
ins = np.zeros( (1, len(poem)+2, len(wordList)) )
gts = np.zeros_like(ins)
#encode the poem into the training sequences
#ins begins with start
ins[0, 0, wordtoix['#START#']] = 1
for t, word in enumerate(poem):
#the ground truth at time t is the next word
gts[0, t, wordtoix[word]] = 1
#the input at time t+1 is the previous ground truth
ins[0, t+1, wordtoix[word]] = 1
#ground truth ends with the end token (or reuse start token)
gts[:,-1,wordtoix['#START#']] = 1
#the complete model definition
#We include the first dense layer because we have one-hot tokens
#this improves the performance by allowing the model to project the one-hot tokens into a dense encoding
#where distances between encodings matter
print 'Building Model'
model = Sequential()
#time distributed dense is a fully-connected layer with weights shared across time. updates are averaged across time