DeepSpeed
DeepSpeed copied to clipboard
[BUG] AttributeError: 'UnembedParameter' object has no attribute 'dtype'
my code
model_name_or_path = "meta/llama2-3.1B"
mii_engine = mii.pipeline(model_name_or_path = args.model_type, tensor_parallel = 1)
mii.engine(prompt="hello, paris")
the error is
[rank0]: deepseedfastgen_engine = get_engine(args, arrival_rate)
[rank0]: File "/root/xxxx/DeepSpeed-MII/hybridserve/serve.py", line 60, in get_engine
[rank0]: mii_engine = mii.pipeline(model_name_or_path = args.model_type, tensor_parallel = args.tensor_parallel)
[rank0]: File "/root/xxxx/DeepSpeed-MII/mii/api.py", line 231, in pipeline
[rank0]: inference_engine = load_model(model_config)
[rank0]: File "/root/xxxx/DeepSpeed-MII/mii/modeling/models.py", line 17, in load_model
[rank0]: inference_engine = build_hf_engine(
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/engine_factory.py", line 135, in build_hf_engine
[rank0]: return InferenceEngineV2(policy, engine_config)
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/engine_v2.py", line 83, in __init__
[rank0]: self._model = self._policy.build_model(self._config, self._base_mp_group)
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/inference_policy_base.py", line 157, in build_model
[rank0]: self.populate_model_parameters()
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/inference_policy_base.py", line 201, in populate_model_parameters
[rank0]: buffer, metadata = flatten_inference_model(container_map.transformer_params,
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/flat_model_helpers.py", line 191, in flatten_inference_model
[rank0]: total_size = process_layer(non_transformer_container, l_name, total_size)
[rank0]: File "/root/xxxx/.python/vllm/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/flat_model_helpers.py", line 166, in process_layer
[rank0]: param_metadata.core_param = TensorMetadata(dtype=str(param.dtype),
[rank0]: AttributeError: 'UnembedParameter' object has no attribute 'dtype'
[rank0]:[W429 09:31:28.021939950 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())