
34、深度学习-自学之路-深入理解-NLP自然语言处理-RNN一个简单的程序,可以从程序中理解RNN的基本思想。
f.close()return idxlayer = {}loss = 0layer = {}# forwardelse:# forwardelse:else:运行结果:''''''
import sys,random,math from collections import Counter import numpy as np f = open('tasks_1-20_v1/en/qa1_single-supporting-fact_train.txt','r') raw = f.readlines() f.close() tokens = list() for line in raw[0:1000]: tokens.append(line.lower().replace("\n","").split(" ")[1:]) print(tokens[0:3]) vocab = set() for sent in tokens: for word in sent: vocab.add(word) vocab = list(vocab) word2index = {} for i, word in enumerate(vocab): word2index[word] = i def words2indices(sentence): idx = list() for word in sentence: idx.append(word2index[word]) return idx def softmax(x): e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0) np.random.seed(1) embed_size = 10 # word embeddings embed = (np.random.rand(len(vocab),embed_size) - 0.5) * 0.1 # embedding -> embedding (initially the identity matrix) recurrent = np.eye(embed_size) # sentence embedding for empty sentence start = np.zeros(embed_size) # embedding -> output weights decoder = (np.random.rand(embed_size, len(vocab)) - 0.5) * 0.1 # one hot lookups (for loss function) one_hot = np.eye(len(vocab)) def predict(sent): layers = list() layer = {} layer['hidden'] = start layers.append(layer) loss = 0 # forward propagate preds = list() for target_i in range(len(sent)): layer = {} # try to predict the next term layer['pred'] = softmax(layers[-1]['hidden'].dot(decoder)) loss += -np.log(layer['pred'][sent[target_i]]) # generate the next hidden state layer['hidden'] = layers[-1]['hidden'].dot(recurrent) + embed[sent[target_i]] layers.append(layer) return layers, loss # forward for iter in range(30000): alpha = 0.001 sent = words2indices(tokens[iter%len(tokens)][1:]) layers,loss = predict(sent) # back propagate for layer_idx in reversed(range(len(layers))): layer = layers[layer_idx] target = sent[layer_idx-1] if(layer_idx > 0): # if not the first layer layer['output_delta'] = layer['pred'] - one_hot[target] new_hidden_delta = layer['output_delta'].dot(decoder.transpose()) # if the last layer - don't pull from a later one becasue it doesn't exist if(layer_idx == len(layers)-1): layer['hidden_delta'] = new_hidden_delta else: layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose()) else: # if the first layer layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose()) # forward for iter in range(30000): alpha = 0.001 sent = words2indices(tokens[iter%len(tokens)][1:]) layers,loss = predict(sent) # back propagate for layer_idx in reversed(range(len(layers))): layer = layers[layer_idx] target = sent[layer_idx-1] if(layer_idx > 0): layer['output_delta'] = layer['pred'] - one_hot[target] new_hidden_delta = layer['output_delta'].dot(decoder.transpose()) # if the last layer - don't pull from a # later one becasue it doesn't exist if(layer_idx == len(layers)-1): layer['hidden_delta'] = new_hidden_delta else: layer['hidden_delta'] = new_hidden_delta + layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose()) else: layer['hidden_delta'] = layers[layer_idx+1]['hidden_delta'].dot(recurrent.transpose()) # update weights start -= layers[0]['hidden_delta'] * alpha / float(len(sent)) for layer_idx, layer in enumerate(layers[1:]): decoder -= np.outer(layers[layer_idx]['hidden'], layer['output_delta']) * alpha / float(len(sent)) embed_idx = sent[layer_idx] embed[embed_idx] -= layers[layer_idx]['hidden_delta'] * alpha / float(len(sent)) recurrent -= np.outer(layers[layer_idx]['hidden'], layer['hidden_delta']) * alpha / float(len(sent)) if (iter % 1000 == 0): print("Perplexity:" + str(np.exp(loss / len(sent)))) sent_index = 4 l,_ = predict(words2indices(tokens[sent_index])) print(tokens[sent_index]) for i,each_layer in enumerate(l[1:-1]): input = tokens[sent_index][i] true = tokens[sent_index][i+1] pred = vocab[each_layer['pred'].argmax()] print("Prev Input:" + input + (' ' * (12 - len(input))) +\ "True:" + true + (" " * (15 - len(true))) + "Pred:" + pred)
运行结果:
'''
[['mary', 'moved', 'to', 'the', 'bathroom.'], ['john', 'went', 'to', 'the', 'hallway.'], ['where', 'is', 'mary?', '\tbathroom\t1']]
Perplexity:82.00628585171349
Perplexity:81.85211883471023
Perplexity:81.65770644998716
Perplexity:81.35306453182142
Perplexity:80.81390886064067
Perplexity:79.77590929663761
Perplexity:77.56372974860284
Perplexity:71.83277995805608
Perplexity:48.417320803100246
Perplexity:24.839039726192727
Perplexity:19.975487836618008
Perplexity:18.460238985588635
Perplexity:17.029965398985233
Perplexity:14.893859382635188
Perplexity:11.770260202396468
Perplexity:8.635593964846068
Perplexity:6.948911974251331
Perplexity:6.020200624240951
Perplexity:5.409527179381012
Perplexity:5.0520385889435815
Perplexity:4.836185470740281
Perplexity:4.699695331198478
Perplexity:4.619284223100641
Perplexity:4.5672934716850975
Perplexity:4.516766550556743
Perplexity:4.453346872739982
Perplexity:4.37458647209406
Perplexity:4.283212137192868
Perplexity:4.180822355576833
Perplexity:4.0651542349348615
['sandra', 'moved', 'to', 'the', 'garden.']
Prev Input:sandra True:moved Pred:is
Prev Input:moved True:to Pred:to
Prev Input:to True:the Pred:the
Prev Input:the True:garden. Pred:bedroom.
'''
更多推荐
所有评论(0)