ComfyUI
ComfyUI copied to clipboard
Fix on-load VRAM OOM
slow down the CPU on model load to not run ahead. This fixes a VRAM on flux 2 load.
I went to try and debug this with the memory trace pickles, which needs --disable-cuda-malloc which made the bug go away. So I tried this synchronize and it worked.
The has some very complex interactions with the cuda malloc async and I dont have solid theory on this one yet.
Still debugging but this gets us over the OOM for the moment.
Example test conditions:
RTX5090, Flux2 with Lora workflow.json
Error:
model_type FLUX
Requested to load Flux2
ERROR lora diffusion_model.single_blocks.19.linear1.weight Allocation on device
ERROR lora diffusion_model.single_blocks.18.linear1.weight Allocation on device
ERROR lora diffusion_model.single_blocks.17.linear1.weight Allocation on device
!!! Exception during processing !!! Allocation on device
Traceback (most recent call last):
File "/home/rattus/ComfyUI/execution.py", line 515, in execute
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/execution.py", line 329, in get_output_data
return_values = await _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/execution.py", line 303, in _async_map_node_over_list
await process_inputs(input_dict, i)
File "/home/rattus/ComfyUI/execution.py", line 291, in process_inputs
result = f(**inputs)
^^^^^^^^^^^
File "/home/rattus/ComfyUI/nodes.py", line 1538, in sample
return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/nodes.py", line 1505, in common_ksampler
samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/sample.py", line 60, in sample
samples = sampler.sample(noise, positive, negative, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/comfy-base/custom_nodes/ComfyUI-TiledDiffusion/utils.py", line 51, in KSampler_sample
return orig_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/samplers.py", line 1163, in sample
return sample(self.model, noise, positive, negative, cfg, self.device, sampler, sigmas, self.model_options, latent_image=latent_image, denoise_mask=denoise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/samplers.py", line 1053, in sample
return cfg_guider.sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/samplers.py", line 1035, in sample
output = executor.execute(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/patcher_extension.py", line 112, in execute
return self.original(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/samplers.py", line 984, in outer_sample
self.inner_model, self.conds, self.loaded_models = comfy.sampler_helpers.prepare_sampling(self.model_patcher, noise.shape, self.conds, self.model_options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/sampler_helpers.py", line 130, in prepare_sampling
return executor.execute(model, noise_shape, conds, model_options=model_options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/patcher_extension.py", line 112, in execute
return self.original(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/sampler_helpers.py", line 138, in _prepare_sampling
comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory)
File "/home/rattus/ComfyUI/comfy/model_management.py", line 701, in load_models_gpu
loaded_model.model_load(lowvram_model_memory, force_patch_weights=force_patch_weights)
File "/home/rattus/ComfyUI/comfy/model_management.py", line 506, in model_load
self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
File "/home/rattus/ComfyUI/comfy/model_management.py", line 536, in model_use_more_vram
return self.model.partially_load(self.device, extra_memory, force_patch_weights=force_patch_weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/model_patcher.py", line 965, in partially_load
raise e
File "/home/rattus/ComfyUI/comfy/model_patcher.py", line 962, in partially_load
self.load(device_to, lowvram_model_memory=current_used + extra_memory, force_patch_weights=force_patch_weights, full_load=full_load)
File "/home/rattus/ComfyUI/comfy/model_patcher.py", line 763, in load
self.patch_weight_to_device(key, device_to=device_to)
File "/home/rattus/ComfyUI/comfy/model_patcher.py", line 632, in patch_weight_to_device
set_func(out_weight, inplace_update=inplace_update, seed=string_to_seed(key))
File "/home/rattus/ComfyUI/comfy/ops.py", line 622, in set_weight
weight = QuantizedTensor.from_float(weight, self.layout_type, scale="recalculate", dtype=self.weight.dtype, stochastic_rounding=seed, inplace_ops=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/quant_ops.py", line 193, in from_float
qdata, layout_params = LAYOUTS[layout_type].quantize(tensor, **quantize_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rattus/ComfyUI/comfy/quant_ops.py", line 402, in quantize
scale = torch.amax(tensor.abs()) / torch.finfo(dtype).max
^^^^^^^^^^^^
torch.OutOfMemoryError: Allocation on device
Got an OOM, unloading all loaded models.
Prompt executed in 21.62 seconds