This article describes how to use PyTorch to reproduce TextRNN to predict the next word in a sentence
refer to this paper Finding Structure in Time(1990) , if you have a certain understanding of RNN, you don't actually need to read it, just look carefully at how my code is implemented. If you don't know much about RNN, please read my article carefully RNN Layer , which is very detailed in combination with PyTorch
Now the background of the problem is that I have n sentences, each of which consists of and only consists of 3 words. What I'm trying to do is, take the first two words of each sentence as input and the last word as output, and train an RNN model
guide library
''' code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor ''' import torch import numpy as np import torch.nn as nn import torch.optim as optim import torch.utils.data as Data dtype = torch.FloatTensor
Prepare data
sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() vocab = list(set(word_list)) word2idx = {w: i for i, w in enumerate(vocab)} idx2word = {i: w for i, w in enumerate(vocab)} n_class = len(vocab)
Preprocess data, build Dataset, define DataLoader, input data with one-hot encoding
# TextRNN Parameter batch_size = 2 n_step = 2 # number of cells(= number of Step) n_hidden = 5 # number of hidden units in one cell def make_data(sentences): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() input = [word2idx[n] for n in word[:-1]] target = word2idx[word[-1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(target) return input_batch, target_batch input_batch, target_batch = make_data(sentences) input_batch, target_batch = torch.Tensor(input_batch), torch.LongTensor(target_batch) dataset = Data.TensorDataset(input_batch, target_batch) loader = Data.DataLoader(dataset, batch_size, True)
I think everyone should have no problem with the above code, the next step is to define the network architecture
class TextRNN(nn.Module): def __init__(self): super(TextRNN, self).__init__() self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden) # fc self.fc = nn.Linear(n_hidden, n_class) def forward(self, hidden, X): # X: [batch_size, n_step, n_class] X = X.transpose(0, 1) # X : [n_step, batch_size, n_class] out, hidden = self.rnn(X, hidden) # out : [n_step, batch_size, num_directions(=1) * n_hidden] # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] out = out[-1] # [batch_size, num_directions(=1) * n_hidden] ⭐ model = self.fc(out) return model model = TextRNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001)
Each step of the above code is worth talking about. The first is the two parameters of nn.RNN(input_size, hidden_size). input_size represents the encoding dimension of each word. Since I use one-hot encoding instead of WordEmbedding, input_size is It is equal to the size of the thesaurus len(vocab), that is, n_class. Then there is hidden_size. There is no fixed requirement for this parameter. You can set as many dimensions as you want to convert the dimension of the input data to.
For the usual neural network, the first dimension of the input data is generally batch_size. In PyTorch, nn.RNN() requires batch_size to be placed on the second dimension, so you need to use x.transpose(0, 1) to swap the first and second dimensions of the input data
Then there is the output of rnn, rnn will return two results, namely out and hidden of the above code, about the difference between these two variables, I have also mentioned in the previous blog, if it is not clear, you can see what I mentioned above The RNN Layer of this blog. Simply put, out refers to all the values boxed in the red box in the figure below; hidden refers to all the values boxed in the blue box in the figure below. What we need is the output of the last layer at the last moment, that is, the value of Y3Y_3Y3, so use out=out[-1] to get it
The rest is relatively simple, just train and test
# Training for epoch in range(5000): for x, y in loader: # hidden : [num_layers * num_directions, batch, hidden_size] hidden = torch.zeros(1, x.shape[0], n_hidden) # x : [batch_size, n_step, n_class] pred = model(hidden, x) # pred : [batch_size, n_class], y : [batch_size] (LongTensor, not one-hot) loss = criterion(pred, y) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) optimizer.zero_grad() loss.backward() optimizer.step() input = [sen.split()[:2] for sen in sentences] # Predict hidden = torch.zeros(1, len(input), n_hidden) predict = model(hidden, input_batch).data.max(1, keepdim=True)[1] print([sen.split()[:2] for sen in sentences], '->', [idx2word[n.item()] for n in predict.squeeze()])
The complete code is as follows
''' code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor ''' import torch import numpy as np import torch.nn as nn import torch.optim as optim import torch.utils.data as Data dtype = torch.FloatTensor sentences = [ "i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() vocab = list(set(word_list)) word2idx = {w: i for i, w in enumerate(vocab)} idx2word = {i: w for i, w in enumerate(vocab)} n_class = len(vocab) # TextRNN Parameter batch_size = 2 n_step = 2 # number of cells(= number of Step) n_hidden = 5 # number of hidden units in one cell def make_data(sentences): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() input = [word2idx[n] for n in word[:-1]] target = word2idx[word[-1]] input_batch.append(np.eye(n_class)[input]) target_batch.append(target) return input_batch, target_batch input_batch, target_batch = make_data(sentences) input_batch, target_batch = torch.Tensor(input_batch), torch.LongTensor(target_batch) dataset = Data.TensorDataset(input_batch, target_batch) loader = Data.DataLoader(dataset, batch_size, True) class TextRNN(nn.Module): def __init__(self): super(TextRNN, self).__init__() self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden) # fc self.fc = nn.Linear(n_hidden, n_class) def forward(self, hidden, X): # X: [batch_size, n_step, n_class] X = X.transpose(0, 1) # X : [n_step, batch_size, n_class] out, hidden = self.rnn(X, hidden) # out : [n_step, batch_size, num_directions(=1) * n_hidden] # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] out = out[-1] # [batch_size, num_directions(=1) * n_hidden] ⭐ model = self.fc(out) return model model = TextRNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Training for epoch in range(5000): for x, y in loader: # hidden : [num_layers * num_directions, batch, hidden_size] hidden = torch.zeros(1, x.shape[0], n_hidden) # x : [batch_size, n_step, n_class] pred = model(hidden, x) # pred : [batch_size, n_class], y : [batch_size] (LongTensor, not one-hot) loss = criterion(pred, y) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) optimizer.zero_grad() loss.backward() optimizer.step() input = [sen.split()[:2] for sen in sentences] # Predict hidden = torch.zeros(1, len(input), n_hidden) predict = model(hidden, input_batch).data.max(1, keepdim=True)[1] print([sen.split()[:2] for sen in sentences], '->', [idx2word[n.item()] for n in predict.squeeze()])