Issue with tokenizer
Describe the bug
ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (input_ids in this case) have excessive nesting (inputs type list where type int is expected)
I see None at the end of list output from the tokenization process of the text 'a photo of sks person'. I checked this holds true for any text.
Reproduction
accelerate launch train_dreambooth.py --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 --instance_data_dir My_pics_png/ --output_dir outputs/ --train_text_encoder --instance_prompt "a photo of sks person" --resolution 512 --train_batch_size 1 --gradient_checkpointing --learning_rate 2e-6 --lr_scheduler constant --lr_warmup_steps 0 --num_class_images 200 --max_train_steps 800
Logs
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Steps: 0%| | 0/800 [00:00<?, ?it/s]╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data_SIR/Dreambooth-Stable-Diffusion/train_dreambooth.py:674 in <module> │
│ │
│ 671 │
│ 672 if __name__ == "__main__": │
│ 673 │ args = parse_args() │
│ ❱ 674 │ main(args) │
│ │
│ /data_SIR/Dreambooth-Stable-Diffusion/train_dreambooth.py:591 in main │
│ │
│ 588 │ │ unet.train() │
│ 589 │ │ if args.train_text_encoder: │
│ 590 │ │ │ text_encoder.train() │
│ ❱ 591 │ │ for step, batch in enumerate(train_dataloader): │
│ 592 │ │ │ with accelerator.accumulate(unet): │
│ 593 │ │ │ │ # Convert images to latent space │
│ 594 │ │ │ │ latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).laten │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/accelerate/data_loader.py:376 in │
│ __iter__ │
│ │
│ 373 │ │ dataloader_iter = super().__iter__() │
│ 374 │ │ # We iterate one batch ahead to check when we are at the end │
│ 375 │ │ try: │
│ ❱ 376 │ │ │ current_batch = next(dataloader_iter) │
│ 377 │ │ except StopIteration: │
│ 378 │ │ │ yield │
│ 379 │ │ while True: │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/utils/data/dataloader.py:530 │
│ in __next__ │
│ │
│ 527 │ │ with torch.autograd.profiler.record_function(self._profile_name): │
│ 528 │ │ │ if self._sampler_iter is None: │
│ 529 │ │ │ │ self._reset() │
│ ❱ 530 │ │ │ data = self._next_data() │
│ 531 │ │ │ self._num_yielded += 1 │
│ 532 │ │ │ if self._dataset_kind == _DatasetKind.Iterable and \ │
│ 533 │ │ │ │ │ self._IterableDataset_len_called is not None and \ │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/utils/data/dataloader.py:1224 │
│ in _next_data │
│ │
│ 1221 │ │ │ │ self._task_info[idx] += (data,) │
│ 1222 │ │ │ else: │
│ 1223 │ │ │ │ del self._task_info[idx] │
│ ❱ 1224 │ │ │ │ return self._process_data(data) │
│ 1225 │ │
│ 1226 │ def _try_put_index(self): │
│ 1227 │ │ assert self._tasks_outstanding < self._prefetch_factor * self._num_workers │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/utils/data/dataloader.py:1250 │
│ in _process_data │
│ │
│ 1247 │ │ self._rcvd_idx += 1 │
│ 1248 │ │ self._try_put_index() │
│ 1249 │ │ if isinstance(data, ExceptionWrapper): │
│ ❱ 1250 │ │ │ data.reraise() │
│ 1251 │ │ return data │
│ 1252 │ │
│ 1253 │ def _mark_worker_as_unavailable(self, worker_id, shutdown=False): │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/_utils.py:457 in reraise │
│ │
│ 454 │ │ │ # If the exception takes multiple arguments, don't try to │
│ 455 │ │ │ # instantiate since we don't know how to │
│ 456 │ │ │ raise RuntimeError(msg) from None │
│ ❱ 457 │ │ raise exception │
│ 458 │
│ 459 │
│ 460 def _get_available_device_type(): │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 715, in convert_to_tensors
tensor = as_tensor(value)
RuntimeError: Could not infer dtype of NoneType
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
return self.collate_fn(data)
File "train_dreambooth.py", line 506, in collate_fn
input_ids = tokenizer.pad(
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 2993, in pad
return BatchEncoding(batch_outputs, tensor_type=return_tensors)
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 210, in __init__
self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
File "/home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 731, in convert_to_tensors
raise ValueError(
ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list`
where type `int` is expected).
Steps: 0%| | 0/800 [00:00<?, ?it/s]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/ubuntu/anaconda3/envs/ldm/bin/accelerate:8 in <module> │
│ │
│ 5 from accelerate.commands.accelerate_cli import main │
│ 6 if __name__ == '__main__': │
│ 7 │ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) │
│ ❱ 8 │ sys.exit(main()) │
│ 9 │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/accelerate/commands/accelerate_cli.p │
│ y:45 in main │
│ │
│ 42 │ │ exit(1) │
│ 43 │ │
│ 44 │ # Run │
│ ❱ 45 │ args.func(args) │
│ 46 │
│ 47 │
│ 48 if __name__ == "__main__": │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/accelerate/commands/launch.py:1069 │
│ in launch_command │
│ │
│ 1066 │ elif defaults is not None and defaults.compute_environment == ComputeEnvironment.AMA │
│ 1067 │ │ sagemaker_launcher(defaults, args) │
│ 1068 │ else: │
│ ❱ 1069 │ │ simple_launcher(args) │
│ 1070 │
│ 1071 │
│ 1072 def main(): │
│ │
│ /home/ubuntu/anaconda3/envs/ldm/lib/python3.8/site-packages/accelerate/commands/launch.py:551 in │
│ simple_launcher │
│ │
│ 548 │ process = subprocess.Popen(cmd, env=current_env) │
│ 549 │ process.wait() │
│ 550 │ if process.returncode != 0: │
│ ❱ 551 │ │ raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) │
│ 552 │
│ 553 │
│ 554 def multi_gpu_launcher(args): │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
CalledProcessError: Command '['/home/ubuntu/anaconda3/envs/ldm/bin/python', 'train_dreambooth.py', '--pretrained_model_name_or_path', 'CompVis/stable-diffusion-v1-4', '--instance_data_dir', '/data_SIR/Dreambooth-Stable-Diffusion/My_pics_png', '--output_dir',
'/data_SIR/Koutilya_SD', '--train_text_encoder', '--instance_prompt', 'a photo of sks person', '--resolution', '512', '--train_batch_size', '1', '--gradient_checkpointing', '--learning_rate', '2e-6', '--lr_scheduler', 'constant', '--lr_warmup_steps', '0',
'--num_class_images', '200', '--max_train_steps', '800']' returned non-zero exit status 1.
(ldm
System Info
-
diffusersversion: 0.8.1 - Platform: Linux-5.4.0-1084-aws-x86_64-with-glibc2.10
- Python version: 3.8.5
- PyTorch version (GPU?): 1.11.0 (True)
- Huggingface_hub version: 0.11.0
- Transformers version: 4.24.0
- Using GPU in script?: Yes
- Using distributed or parallel set-up in script?: No
I also have this when trying to run convert_stable_diffusion_checkpoint_to_onnx.py. On Windows 11, Python 3.9.13, diffusers 0.9.0.
@patil-suraj could you take a look here?
Also @koutilya-pnvr could you please attach a google colab that directly reproduces this error? We can not run the above command because we don't have a My_pics_png/ folder
This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.
Please note that issues that do not follow the contributing guidelines are likely to be ignored.