not able to reproduce the training procedure
I referred to the website where I found a piece of code to reproduce training of Char2Vec but it produces an error, can you please help. If it works I want to train it for my purpose on the german text. website: https://hackernoon.com/chars2vec-character-based-language-model-for-handling-real-world-texts-with-spelling-errors-and-a3e4053a147d
code:
import chars2vec
dim = 50
path_to_model = 'path/to/model/directory'
X_train = [('mecbanizing', 'mechanizing'), # similar words, target is equal 0
('dicovery', 'dis7overy'), # similar words, target is equal 0
('prot$oplasmatic', 'prtoplasmatic'), # similar words, target is equal 0
('copulateng', 'lzateful'), # not similar words, target is equal 1
('estry', 'evadin6'), # not similar words, target is equal 1
('cirrfosis', 'afear') # not similar words, target is equal 1
]
y_train = [0, 0, 0, 1, 1, 1]
model_chars = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.',
'/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
'=', '>', '?', '@', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z']
my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars)
chars2vec.save_model(my_c2v_model, path_to_model)
words = ['list', 'of', 'words']
c2v_model = chars2vec.load_model(path_to_model)
word_embeddings = c2v_model.vectorize_words(words)
Error:
ValueError Traceback (most recent call last)
<ipython-input-14-23a592d19001> in <module>
1 # Create and train chars2vec model using given training data
----> 2 my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars)
3
4 # Save pretrained model
5 chars2vec.save_model(my_c2v_model, path_to_model)
C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in train_model(emb_dim, X_train, y_train, model_chars, max_epochs, patience, validation_split, batch_size)
235
236 targets = [float(el) for el in y_train]
--> 237 c2v_model.fit(X_train, targets, max_epochs, patience, validation_split, batch_size)
238
239 return c2v_model
C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in fit(self, word_pairs, targets, max_epochs, patience, validation_split, batch_size)
105 x_2_pad_seq = keras.preprocessing.sequence.pad_sequences(x_2)
106
--> 107 self.model.fit([x_1_pad_seq, x_2_pad_seq], targets,
108 batch_size=batch_size, epochs=max_epochs,
109 validation_split=validation_split,
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1037 # `Tensor` and `NumPy` input.
1038 (x, y, sample_weight), validation_data = (
-> 1039 data_adapter.train_validation_split(
1040 (x, y, sample_weight), validation_split=validation_split))
1041
~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\data_adapter.py in train_validation_split(arrays, validation_split)
1372 unsplitable = [type(t) for t in flat_arrays if not _can_split(t)]
1373 if unsplitable:
-> 1374 raise ValueError(
1375 "`validation_split` is only supported for Tensors or NumPy "
1376 "arrays, found following types in the input: {}".format(unsplitable))
ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>]
I tried but even then not resolved
y_train = [0, 0, 0, 1, 1, 1]
y_train = np.array(y_train)
I referred to the website where I found a piece of code to reproduce training of Char2Vec but it produces an error, can you please help. If it works I want to train it for my purpose on the german text. website: https://hackernoon.com/chars2vec-character-based-language-model-for-handling-real-world-texts-with-spelling-errors-and-a3e4053a147d
code:
import chars2vec dim = 50 path_to_model = 'path/to/model/directory' X_train = [('mecbanizing', 'mechanizing'), # similar words, target is equal 0 ('dicovery', 'dis7overy'), # similar words, target is equal 0 ('prot$oplasmatic', 'prtoplasmatic'), # similar words, target is equal 0 ('copulateng', 'lzateful'), # not similar words, target is equal 1 ('estry', 'evadin6'), # not similar words, target is equal 1 ('cirrfosis', 'afear') # not similar words, target is equal 1 ] y_train = [0, 0, 0, 1, 1, 1] model_chars = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars) chars2vec.save_model(my_c2v_model, path_to_model) words = ['list', 'of', 'words'] c2v_model = chars2vec.load_model(path_to_model) word_embeddings = c2v_model.vectorize_words(words)Error:
ValueError Traceback (most recent call last) <ipython-input-14-23a592d19001> in <module> 1 # Create and train chars2vec model using given training data ----> 2 my_c2v_model = chars2vec.train_model(dim, X_train, y_train, model_chars) 3 4 # Save pretrained model 5 chars2vec.save_model(my_c2v_model, path_to_model) C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in train_model(emb_dim, X_train, y_train, model_chars, max_epochs, patience, validation_split, batch_size) 235 236 targets = [float(el) for el in y_train] --> 237 c2v_model.fit(X_train, targets, max_epochs, patience, validation_split, batch_size) 238 239 return c2v_model C:\ProgramData\Anaconda3\lib\site-packages\chars2vec\model.py in fit(self, word_pairs, targets, max_epochs, patience, validation_split, batch_size) 105 x_2_pad_seq = keras.preprocessing.sequence.pad_sequences(x_2) 106 --> 107 self.model.fit([x_1_pad_seq, x_2_pad_seq], targets, 108 batch_size=batch_size, epochs=max_epochs, 109 validation_split=validation_split, ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs) 106 def _method_wrapper(self, *args, **kwargs): 107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access --> 108 return method(self, *args, **kwargs) 109 110 # Running inside `run_distribute_coordinator` already. ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1037 # `Tensor` and `NumPy` input. 1038 (x, y, sample_weight), validation_data = ( -> 1039 data_adapter.train_validation_split( 1040 (x, y, sample_weight), validation_split=validation_split)) 1041 ~\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\data_adapter.py in train_validation_split(arrays, validation_split) 1372 unsplitable = [type(t) for t in flat_arrays if not _can_split(t)] 1373 if unsplitable: -> 1374 raise ValueError( 1375 "`validation_split` is only supported for Tensors or NumPy " 1376 "arrays, found following types in the input: {}".format(unsplitable)) ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>, <class 'float'>]
I found the solution by editing the code snippet in model.py:
from: targets = [float(el) for el in y_train]
to: targets = np.array(y_train)
But Authors and collaborators please confirm if this is right. Because I will be using it for a crucial project.
I've hit the same error. Any chance we can get a fix for this? Thanks!
I've hit the same error. Any chance we can get a fix for this? Thanks! Try this: I found the solution by editing the code snippet in model.py: from: targets = [float(el) for el in y_train] to: targets = np.array(y_train)
Can we have this fix pushed in the pip repository, so that we don't have to manually download this project?