Hi All
I am pruning model Llama 3.2 1b, getting following error, any idea of resolution:
ault0]:[rank0]: Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/nemo_run/core/runners/fdl_runner.py", line 66, in
fdl_runner_app()
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/typer/main.py", line 340, in call
raise e
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/typer/main.py", line 323, in call
return get_command(self)(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/click/core.py", line 1161, in call
return self.main(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/typer/core.py", line 677, in main
return _main(
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/typer/core.py", line 195, in _main
rv = self.invoke(ctx)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/click/core.py", line 1443, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/click/core.py", line 788, in invoke
return __callback(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/typer/main.py", line 698, in wrapper
return callback(**use_params)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/nemo_run/core/runners/fdl_runner.py", line 62, in fdl_direct_run
fdl_fn()
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/nemo/collections/llm/api.py", line 360, in prune
prune_gpt_model(model, pruning_config, data, trainer)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/nemo/collections/llm/modelopt/prune/pruner.py", line 112, in prune_gpt_model
mtp.prune(
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/prune/pruning.py", line 203, in prune
model = apply_mode(model, mode, registry=PruneModeRegistry)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/conversion.py", line 417, in apply_mode
model, metadata = get_mode(m).convert(model, config, **kwargs) # type: ignore [call-arg]
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/prune/fastnas.py", line 57, in convert_fastnas_searchspace
return convert_searchspace(model, config, FastNASPatchManager)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/autonas.py", line 515, in convert_searchspace
search_space = generate_search_space(model, rules=config.model_dump())
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/search_space.py", line 244, in generate_search_space
search_space.generate(rules=rules)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/search_space.py", line 68, in generate
mods_converted = self.convert_to_dynamic(rules, DMRegistry)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 1175, in convert_to_dynamic
dm_registry.convert(mod)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 1132, in convert
return self[type(nn_mod)].convert(nn_mod)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 676, in convert
module._setup()
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/plugins/megatron.py", line 758, in _setup
self.decoder.layers[i] = DMRegistry.convert(self.decoder.layers[i])
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 1132, in convert
return self[type(nn_mod)].convert(nn_mod)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 676, in convert
module._setup()
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/plugins/megatron.py", line 651, in _setup
self.self_attention = DMRegistry.convert(self.self_attention)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 1132, in convert
return self[type(nn_mod)].convert(nn_mod)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 676, in convert
module._setup()
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/plugins/megatron.py", line 570, in _setup
self.linear_qkv = _DynamicQKVColumnParallelLinear.convert(
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/nas/plugins/megatron.py", line 309, in convert
mod._register_hparam("input_size", TracedHp(list(range(1, mod.input_size + 1))))
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/modelopt/torch/opt/dynamic.py", line 811, in getattr
attr = super().getattr(name)
File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1940, in getattr
raise AttributeError(
AttributeError: '_DynamicQKVColumnParallelLinear' object has no attribute 'input_size'
ning-width/0 E0515 20:22:00.101000 992736 torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 992759) of binary: /nemo_code/envs/nemo_1/bin/python
ning-width/0 I0515 20:22:00.107000 992736 torch/distributed/elastic/multiprocessing/errors/init.py:368] ('local_rank %s FAILED with no error file. Decorate your entrypoint fn with @record for traceback info. See: https://pytorch.org/docs/stable/elastic/errors.html', 0)
ning-width/0 Traceback (most recent call last):
ning-width/0 File "/nemo_code/envs/nemo_1/bin/torchrun", line 8, in
ning-width/0 sys.exit(main())
ning-width/0 File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 355, in wrapper
ning-width/0 return f(*args, **kwargs)
ning-width/0 File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/distributed/run.py", line 892, in main
ning-width/0 run(args)
ning-width/0 File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/distributed/run.py", line 883, in run
ning-width/0 elastic_launch(
ning-width/0 File "/nemo_code/envs/nemo_1/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 139, in call
Steps
I am fine tuning on personal data for Llama3.2 1B peft fine tuning using this link code: https://github.com/NVIDIA/NeMo/blob/main/tutorials/llm/llama/nemo2-sft-peft/nemo2-peft.ipynb
Step 2:
Then i pass model in above step (peft fne tuned llama 3.2 1B) to this code for pruning:
https://github.com/NVIDIA/NeMo/blob/main/tutorials/llm/llama/pruning-distillation/03_pruning.ipynb
But while pruning the finetuned peft model i get above error mentioned.
Can you try if non-peft model can be pruned first? We have not tried pruning peft model before but if the issue is indeed because of peft model and not for non-peft model, we can add support for peft
for non peft pruning is working, not for peft fine tuned one pruning fails -> it hits in nas-plugins- library-megatron.py where input_size is being checked for peft fine tuned