hiddenlayer
hiddenlayer copied to clipboard
Pointer Network - 2 Inputs issue
When calling:
hl_graph = hl.build_graph(pointer, (inputs, target))
with inputs: tensor([[3, 7, 2, 9, 0, 1, 8, 5, 4, 6]]) and target: tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) I get the following error:
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _slow_forward(self, *input, **kwargs)
463 tracing_state._traced_module_stack.append(self)
464 try:
--> 465 result = self.forward(*input, **kwargs)
466 finally:
467 tracing_state.pop_scope()
<ipython-input-217-844f2c5b2d6a> in forward(self, inputs, target)
80
81
---> 82 return loss / seq_len
RuntimeError: Expected object of type torch.FloatTensor but found type torch.LongTensor for argument #2 'other'
And PointerNetwork is:
class PointerNet(nn.Module):
def __init__(self,
embedding_size,
hidden_size,
seq_len,
n_glimpses,
tanh_exploration,
use_tanh,
use_cuda=USE_CUDA):
super(PointerNet, self).__init__()
self.embedding_size = embedding_size
self.hidden_size = hidden_size
self.n_glimpses = n_glimpses
self.seq_len = seq_len
self.use_cuda = use_cuda
self.embedding = nn.Embedding(seq_len, embedding_size)
self.encoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
self.decoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
self.pointer = Attention(hidden_size, use_tanh=use_tanh, C=tanh_exploration, use_cuda=use_cuda)
self.glimpse = Attention(hidden_size, use_tanh=False, use_cuda=use_cuda)
self.decoder_start_input = nn.Parameter(torch.FloatTensor(embedding_size))
self.decoder_start_input.data.uniform_(-(1. / math.sqrt(embedding_size)), 1. / math.sqrt(embedding_size))
self.criterion = nn.CrossEntropyLoss()
def apply_mask_to_logits(self, logits, mask, idxs):
batch_size = logits.size(0)
clone_mask = mask.clone()
if idxs is not None:
clone_mask[[i for i in range(batch_size)], idxs.data] = 1
logits[clone_mask] = -np.inf
return logits, clone_mask
def forward(self, inputs, target):
"""
Args:
inputs: [batch_size x sourceL]
"""
batch_size = inputs.size(0)
seq_len = inputs.size(1)
assert seq_len == self.seq_len
embedded = self.embedding(inputs)
target_embedded = self.embedding(target)
encoder_outputs, (hidden, context) = self.encoder(embedded)
mask = torch.zeros(batch_size, seq_len).byte()
if self.use_cuda:
mask = mask.cuda()
idxs = None
decoder_input = self.decoder_start_input.unsqueeze(0).repeat(batch_size, 1)
loss = 0
for i in range(seq_len):
_, (hidden, context) = self.decoder(decoder_input.unsqueeze(1), (hidden, context))
query = hidden.squeeze(0)
for i in range(self.n_glimpses):
ref, logits = self.glimpse(query, encoder_outputs)
logits, mask = self.apply_mask_to_logits(logits, mask, idxs)
query = torch.bmm(ref, F.softmax(logits).unsqueeze(2)).squeeze(2)
_, logits = self.pointer(query, encoder_outputs)
logits, mask = self.apply_mask_to_logits(logits, mask, idxs)
decoder_input = target_embedded[:,i,:]
loss += self.criterion(logits, target[:,i])
return loss / seq_len
Are you sure you're feeding tensors of the correct data type? It seems to be complaining that it's receiving Long when it's expecting a float.