problem with converting custom Pytorch model to TensorFlow
Platform (like ubuntu 16.04/win10): Ubuntu 16.04.5 LTS
Python version: Python 3.7
Source framework with version (like Tensorflow 1.4.1 with GPU): PyTorch 1.6.0
Destination framework with version (like CNTK 2.3 with GPU): TensorFlow 2.2.0
Pre-trained model path (webpath or webdisk path): A Structured Self-attentive Sentence Embedding model
class SentenceEmbeddingsModel(nn.Module):
def __init__(self,
vocab_size,
embedding_dim,
max_length=40,
use_pretrained_word_vectors=True,
word_vectors=None,
device=device,
C=0.001,
d_a=10,
r_a=4,
hidden_size=100,
need_masking=True,
need_attention=True,
need_lstm=False):
super(SentenceEmbeddingsModel, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.d_a = d_a
self.C = C
self.r_a = r_a
self.rnn_hidden_size = hidden_size
if use_pretrained_word_vectors:
w =torch.FloatTensor(word_vectors)
self.embeddings = self.embeddings.from_pretrained(w)
self.embeddings.weight.requires_grad = False
self.need_masking = need_masking
self.need_attention = need_attention
self.need_lstm = need_lstm
if self.need_lstm:
ws_d = 2 * self.rnn_hidden_size
else:
ws_d = embedding_dim
self.ws1 = nn.Parameter(torch.FloatTensor(1, self.d_a, ws_d))
nn.init.xavier_uniform_(self.ws1)
self.ws1.requires_grad = True
self.ws2 = nn.Parameter(torch.FloatTensor(1, self.r_a, self.d_a))
nn.init.xavier_uniform_(self.ws2)
self.ws2.requires_grad = True
self.dropout1 = nn.Dropout(0.1)
self.need_masking = need_masking
self.need_attention = need_attention
self.need_lstm = need_lstm
self.device = device
self.dense = nn.Sequential(
nn.Linear(ws_d, 20, bias=True),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(20, num_classes, bias=True),
)
self.linear = nn.Linear(ws_d * self.r_a, ws_d)
def forward(self, inputs):
e = self.embeddings(inputs)
if self.need_masking:
mask = (inputs != 0)[:, :, None].float().to(self.device)
masked = e.mul(mask)
r = self.dropout1(masked)
else:
r = self.dropout1(e)
z = r
if self.need_attention:
a1 =torch.tanh(self.ws1.matmul(z.transpose(dim0=1, dim1=2)))
attention = F.softmax(self.ws2.matmul(a1), dim=2) # n_batch - r_a - max_lentgh
m = attention.matmul(z) # n_batch - r_a - ws_d
# here we get r_a * ws_d embedding matrix per sentence
flatten = m.view(z.shape[0], -1, 1)[:, :, 0]
m = self.linear(flatten)
else:
m = z.sum(1)
attention = None
out = torch.sigmoid(self.dense(m))
return out
model = SentenceEmbeddingsModel(vocab_len,
vocab_dim,
50,
use_pretrained_word_vectors=True,
word_vectors=word_vectors,
device=device,
hidden_size=100,
need_masking=True,
need_attention=True,
need_lstm=False)
I'm saving my model with torch.save(model, model path)
then I'm trying to convert Pytorch model to TF via mmconvert -sf pytorch -in model.pth -df tensorflow -om model.ckpt
I get
Traceback (most recent call last): File "/usr/local/bin/mmconvert", line 10, in <module> sys.exit(_main()) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/_script/convert.py", line 102, in _main ret = convertToIR._convert(ir_args) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/_script/convertToIR.py", line 97, in _convert parser = PytorchParser151(model, inputshape[0]) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/pytorch/pytorch_parser.py", line 526, in __init__ super(PytorchParser151, self).__init__(model_file_name, input_shape) File "/usr/local/lib/python3.6/dist-packages/mmdnn/conversion/pytorch/pytorch_parser.py", line 83, in __init__ model = torch.load(model_file_name, map_location='cpu') File "/usr/local/lib/python3.6/dist-packages/torch/serialization.py", line 584, in load return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args) File "/usr/local/lib/python3.6/dist-packages/torch/serialization.py", line 842, in _load result = unpickler.load() AttributeError: Can't get attribute 'SentenceEmbeddingsModel' on <module '__main__' from '/usr/local/bin/mmconvert'>
Could anyone help me, please?)
@nestyme , thank you very much for the feedback. You need to keep the model module in the same folder when loading the serialized model. Please refer to https://github.com/pytorch/pytorch/issues/18325 and https://github.com/pytorch/pytorch/issues/18325 for solution.