Message: 'Error occurs in lazy tokenize:' 这个报错怎么解决
发下完整的栈信息
Traceback (most recent call last):
File "/home/junhang/swift/swift/llm/utils/utils.py", line 266, in _try_fetch
res = self.template.encode(data)
File "/home/junhang/swift/swift/llm/utils/template.py", line 447, in encode
return _encode(example) if not streaming else _encode(example)[0]
File "/home/junhang/swift/swift/llm/utils/template.py", line 1714, in _encode
pixel_values = [transform_image(image, max_num=1 if has_video else 12) for image in images]
File "/home/junhang/swift/swift/llm/utils/template.py", line 1714, in
pixel_values = [transform_image(image, max_num=1 if has_video else 12) for image in images]
File "/home/junhang/swift/swift/llm/utils/vision_utils.py", line 167, in transform_image
images = _dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
File "/home/junhang/swift/swift/llm/utils/vision_utils.py", line 63, in _dynamic_preprocess
resized_img = image.resize((target_width, target_height))
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/PIL/Image.py", line 2293, in resize
self.load()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/PIL/ImageFile.py", line 312, in load
raise _get_oserror(err_code, encoder=False)
OSError: broken data stream when reading image file
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/logging/init.py", line 1100, in emit
msg = self.format(record)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/logging/init.py", line 943, in format
return fmt.format(record)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/logging/init.py", line 678, in format
record.message = record.getMessage()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/logging/init.py", line 368, in getMessage
msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
File "/home/junhang/swift/swift/cli/sft.py", line 5, in
sft_main()
File "/home/junhang/swift/swift/utils/run_utils.py", line 32, in x_main
result = llm_x(args, **kwargs)
File "/home/junhang/swift/swift/llm/sft.py", line 405, in llm_sft
trainer.train(training_args.resume_from_checkpoint)
File "/home/junhang/swift/swift/trainers/mixin.py", line 538, in train
res = super().train(resume_from_checkpoint, *args, **kwargs)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/transformers/trainer.py", line 1948, in train
return inner_training_loop(
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/transformers/trainer.py", line 2246, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/accelerate/data_loader.py", line 451, in iter
dataloader_iter = super().iter()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 440, in iter
return self._get_iterator()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 388, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1038, in init
w.start()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/context.py", line 281, in _Popen
return Popen(process_obj)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/popen_fork.py", line 19, in init
self._launch(process_obj)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/popen_fork.py", line 71, in _launch
code = process_obj._bootstrap(parent_sentinel=child_r)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
data = fetcher.fetch(index) # type: ignore[possibly-undefined]
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/junhang/anaconda3/envs/VL/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/junhang/swift/swift/llm/utils/utils.py", line 255, in getitem
res = self._try_fetch(idx)
File "/home/junhang/swift/swift/llm/utils/utils.py", line 268, in _try_fetch
logger.error('Error occurs in lazy tokenize:', e)
Message: 'Error occurs in lazy tokenize:'
Arguments: (OSError('broken data stream when reading image file'),) 是有图片无法读取吗
尝试拉取一下main分支最新的代码
如果不行的话,发一下sh看看