您好~ 我按照您的要求配置对应环境,并且下载了相应的预训练模型和数据后还是执行不了,具体错误如下所示:怎么回事儿呢?
File "/home/.local/lib/python3.8/site-packages/datasets/dataset dict,py", line 472, in k: dataset.map(File "/home/local/lib/python3,8/site-packages/datasets/arrow dataset.py", line 1657, in mapreturn self.map singleFile "/home/.local/lib/python3,8/sitepackages/datasets/arrow dataset.py", line 185, in wrapperout: Union["DatasetuDatasetDictu] func(self,*args,**kwargs)File "/home/.local/lib/python3,8/site-packages/datasets/fingerprint,py", line 397, in wrapperout = func(self,*args,**kwargs)File "/home/.local/lib/python3,8/site-packages/datasets/arrow dataset,py", line 2024, in _-map_singlewriter.write batch(batch)File "/home/.local/ib/python3.8/site-packages/datasets/arrow writer.py", line 388, in write batchpa table = pa.Tablefrom pydict(typed sequence examples)File "pyarrow/table.pxi",line 1631,in pyarrow,Tib,Table,from pydictFile"pyarrow/array.pxi",line 332,in pyarrow,lib,asarray223File"pyarrow/array.pxi"linein pyarrow,lib,arrayFile"pyarrow/array.pxi"line 110,in pyarrow,lib,handle arrow array protocolocal/lib/python3,8/site-packages/datasets/arrow writer.py", line 99, in arrow_arrayFile/home/shaiiu/pa.array(self.data,type=type)outFile"pyarrow/array.pxi",line 306,in pyarrow,lib,arrayFile"pyarrow/array.pxin.line 39,in pyarrow,lib,sequence to arrayF妊i烩跋唉leline 141,in pyarrow,lib.pyarrow internal check status"pyarrow/error.pxi",line 97,in pyarrow,lib,check statusFilepyarrow/error.pxi",Could not convert 'input ids': [[101,23236,24853,2578,203854801996175310U00200lib,ArrowInvalid:1124119991059421042231,1997,2634pyarrow204219992144102]286491007,1012,102],[101,2087,1997,9666,2884,1005,1055,39342031572624121012306s1006,213074523。56794712,2029274350951996169562006101020764918,2829,1998,29044,2100。2265。1036,10362006,1999,2804,2027,8678,1036,1036,1996[101999,289115851102],1036,3964,20081996,4234,2792,18712,20512015286610361006,1036,1036,249018952,1036,1021[10110365052004201920383687,1309920153614,1999,9440,1998,14266,2013,2624,5277,2110,2118,10101998106182363,5065,1997,2671.2014.,2015,9450,1012,102],token type ids':lo.o0,0,日,日.0.6,0,0,616661,1010,10516,9450,1010,1998,18440,13592,000e6,0,0,0l,[0,,,,, ,o.o..6,0l.0,,0,,0,0,,,,,,.6,、,el,[0,,,6,,,0,,,,0、、0.日,0,6,日,.6,6.0.0.0,0,0,0l日[,6,0,0o00, , o, , o, o,, ,,6, , , , ,o.,2,1,1,1,1,l.l.,l.1.0,0,0,0,0,oj],'attention mask': [[l,1,1,1,1.01,1,1,1,1,1,0.6dui1.1. 1.1.1L1,1, 1],[l, l, 1,1, 1, l, l, I, 1, 1,l, l,1,l, l, 1, 1, 1,1, 1, 1, 1,1.11,1]],'offset_mappi1,l, 1, l, 1l, [l, 1, 1, , 1, 1, 1, 1, 1, l, l, , l, l, 1, 1, 1, 1, 1, 1, 1, l,1,1,1,1,1.[[(0,0),(0, 4),(5,16),(17,25), (26, 29),(30, 38),(39, 41), (42, 46),(46, 48),(49,54), (55, 58), (59, 69), (70, 72)(73(79,81),(8178),(83,92),(93,9),(100, 101),(102, 104), (104, 106),(107, 108),(109, 110), (, 0), [(0, ), (0,4),(5, 7),(8, 11), (11, 13),15),(15,16),(14.25). (26, 30),(31,35),(3,38), (39,43), (43,49),(5,54),(55,60),(61,62),(0,0)1, [(0,0),0,2),(3,11),(12, 16),(7, 29),(30, 32),(3(34,35),(36,39),(40,47),(48,53),(54,57),(58,63),(63,64),(65,69),(7,71),(71, 72),(73,74),(75,80), 81,84),(85,87),18,96),5),(106, 112),(113,116),(17,122), (123,124),(0,0)1,[(0,0), (0, 1),(1, 2), (3, 8),(9, 11),(12, 13),(13, 14),(15, 20), (2, 25),(26,29),(40,47),(48,49),(50,51),(51,52),(53,55),55,57),(57,60),(61,65),(65,69),(69,70),(7,72), (,0),[0,0),(0,3),14,12),1(17,25),(26,28),(29,36),(37,43),(44, 46),(47,52),(53, 56),(57,6),67,71),(72,75),(76,81),(82,87),(88,98),(99,100),(101,104,108),(109,115),(116,126),(127,140),(140, 141),(142, 14),(145,147),(148,157),(15,165),(166,176),(17,178),(179, 186),(187,197)199), (200, 203),(204,208),(28,211),(211, 212),(213,223),(224, 225),(,0)1》 with type BatchEncoding: did not recognize python valwue typeierring an Arrow data typeajiu@9f-03-00-27-fc-3c:/googol/nlp/SpanProtos
您好!我还是出现如下问题:麻烦您帮我看看么?
ou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fail to resize token embeddings.
Running tokenizer on dataset: 0%| | 0/20 [00:00<?, ?ba/s]
Traceback (most recent call last):
File "/code/SpanProto/nlp_trainer.py", line 285, in
main()
File "/code/SpanProto/nlp_trainer.py", line 135, in main
tokenized_datasets = processor.get_tokenized_datasets()
File "/code/SpanProto/processor/ProcessorBase.py", line 308, in get_tokenized_datasets
raw_datasets = raw_datasets.map(
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/dataset_dict.py", line 494, in map
{
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/dataset_dict.py", line 495, in
k: dataset.map(
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 2092, in map
return self._map_single(
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 518, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 485, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/fingerprint.py", line 411, in wrapper
out = func(self, *args, **kwargs)
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 2486, in _map_single
writer.write_batch(batch)
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_writer.py", line 458, in write_batch
pa_table = pa.Table.from_pydict(typed_sequence_examples)
File "pyarrow/table.pxi", line 1868, in pyarrow.lib.Table.from_pydict
File "pyarrow/table.pxi", line 2658, in pyarrow.lib._from_pydict
File "pyarrow/array.pxi", line 342, in pyarrow.lib.asarray
File "pyarrow/array.pxi", line 230, in pyarrow.lib.array
File "pyarrow/array.pxi", line 110, in pyarrow.lib._handle_arrow_array_protocol
File "/home/shajiu/anaconda3/lib/python3.9/site-packages/datasets/arrow_writer.py", line 140, in arrow_array
out = pa.array(cast_to_python_objects(self.data, only_1d_for_numpy=True), type=type)
File "pyarrow/array.pxi", line 316, in pyarrow.lib.array
File "pyarrow/array.pxi", line 39, in pyarrow.lib._sequence_to_array
File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/error.pxi", line 99, in pyarrow.lib.check_status
pyarrow.lib.ArrowInvalid: Could not convert {'input_ids': [[101, 23236, 24853, 2578, 2038, 11241, 1999, 10594, 5480, 2104, 1996, 2231, 1997, 2634, 17531, 2080, 4712, 5679, 1006, 21307, 4523, 1007, 1012, 102], [101, 2087, 1997, 9666, 2884, 1005, 1055, 3934, 2031, 2042, 1999, 5726, 28649, 2412, 2144, 1012, 102], [101, 1999, 2804, 2027, 8678, 2006, 1036, 1036, 1996, 4918, 2829, 1998, 29044, 2100, 2265, 1036, 1036, 1010, 2029, 2743, 2006, 5095, 16956, 2076, 1996, 3865, 1012, 102], [101, 1036, 1036, 8952, 2866, 1036, 1036, 3964, 2008, 1996, 4234, 2792, 1006, 1036, 1036, 18712, 2891, 15851, 2051, 24901, 2015, 999, 102], [101, 2016, 2363, 2014, 5065, 1997, 2671, 3014, 1999, 9440, 1998, 14266, 2013, 2624, 5277, 2110, 2118, 1010, 1998, 2038, 3687, 13099, 10618, 2015, 2004, 2019, 5057, 2966, 16661, 1010, 10516, 9450, 1010, 1998, 18440, 13592, 2015, 9450, 1012, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'offset_mapping': [[(0, 0), (0, 4), (5, 16), (17, 25), (26, 29), (30, 38), (39, 41), (42, 46), (46, 48), (49, 54), (55, 58), (59, 69), (70, 72), (73, 78), (79, 81), (81, 82), (83, 92), (93, 99), (100, 101), (102, 104), (104, 106), (107, 108), (109, 110), (0, 0)], [(0, 0), (0, 4), (5, 7), (8, 11), (11, 13), (14, 15), (15, 16), (17, 25), (26, 30), (31, 35), (36, 38), (39, 43), (43, 49), (50, 54), (55, 60), (61, 62), (0, 0)], [(0, 0), (0, 2), (3, 11), (12, 16), (17, 29), (30, 32), (33, 34), (34, 35), (36, 39), (40, 47), (48, 53), (54, 57), (58, 63), (63, 64), (65, 69), (70, 71), (71, 72), (73, 74), (75, 80), (81, 84), (85, 87), (88, 96), (97, 105), (106, 112), (113, 116), (117, 122), (123, 124), (0, 0)], [(0, 0), (0, 1), (1, 2), (3, 8), (9, 11), (12, 13), (13, 14), (15, 20), (21, 25), (26, 29), (30, 39), (40, 47), (48, 49), (50, 51), (51, 52), (53, 55), (55, 57), (57, 60), (61, 65), (65, 69), (69, 70), (71, 72), (0, 0)], [(0, 0), (0, 3), (4, 12), (13, 16), (17, 25), (26, 28), (29, 36), (37, 43), (44, 46), (47, 52), (53, 56), (57, 66), (67, 71), (72, 75), (76, 81), (82, 87), (88, 98), (99, 100), (101, 104), (105, 108), (109, 115), (116, 126), (127, 140), (140, 141), (142, 144), (145, 147), (148, 157), (158, 165), (166, 176), (177, 178), (179, 186), (187, 197), (198, 199), (200, 203), (204, 208), (208, 211), (211, 212), (213, 223), (224, 225), (0, 0)]]} with type BatchEncoding: did not recognize Python value type when inferring an Arrow data type