Error installing and running Falcon Models
Dear community,
When trying to install Falcon model and running I´m getting the following error:
┌───────────────────── Traceback (most recent call last) ─────────────────────┐ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1395 │ │ in download_models │ │ │ │ 1392 │ ).for_model(model_name, model_id=model_id, llm_config=config) │ │ 1393 │ │ │ 1394 │ try: │ │ > 1395 │ │ ref = bentoml.transformers.get(model.tag) │ │ 1396 │ │ if machine: │ │ 1397 │ │ │ # NOTE: When debug is enabled, │ │ 1398 │ │ │ # We will prefix the tag with tag and we can use reg │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\f │ │ rameworks\transformers.py:292 in get │ │ │ │ 289 │ # target model must be from the BentoML model store │ │ 290 │ model = bentoml.transformers.get("my_pipeline:latest") │ │ 291 │ """ │ │ > 292 │ model = bentoml.models.get(tag_like) │ │ 293 │ if model.info.module not in (MODULE_NAME, name): │ │ 294 │ │ raise NotFound( │ │ 295 │ │ │ f"Model {model.tag} was saved with module {model.info.mod │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\simple_di_init. │ │ py:139 in _ │ │ │ │ 136 │ │ bind = sig.bind_partial(filtered_args, **filtered_kwargs) │ │ 137 │ │ bind.apply_defaults() │ │ 138 │ │ │ │ > 139 │ │ return func(_inject_args(bind.args), **inject_kwargs(bind.k │ │ 140 │ │ │ 141 │ setattr(, "_is_injected", True) │ │ 142 │ return cast(WrappedCallable, _) │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml\models.py:4 │ │ 2 in get │ │ │ │ 39 │ , │ │ 40 │ _model_store: "ModelStore" = Provide[BentoMLContainer.model_store │ │ 41 ) -> "Model": │ │ > 42 │ return _model_store.get(tag) │ │ 43 │ │ 44 │ │ 45 @inject │ │ │ │ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\bentoml_internal\s │ │ tore.py:146 in get │ │ │ │ 143 │ │ matches = self._fs.glob(f"{path}/") │ │ 144 │ │ counts = matches.count().directories │ │ 145 │ │ if counts == 0: │ │ > 146 │ │ │ raise NotFound( │ │ 147 │ │ │ │ f"{self._item_type.get_typename()} '{tag}' is not fou │ │ 148 │ │ │ ) │ │ 149 │ │ elif counts == 1: │ └─────────────────────────────────────────────────────────────────────────────┘ NotFound: Model 'pt-tiiuae-falcon-7b:2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5' is not found in BentoML store <osfs 'C:\Users\pedro\bentoml\models'>
During handling of the above exception, another exception occurred:
┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ in _run_module_as_main:198 │
│ in run_code:88 │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm_main.py │
│ :26 in main.""" │
│ > 1130 │ │ return self.main(*args, **kwargs) │
│ 1131 │
│ 1132 │
│ 1133 class Command(BaseCommand): │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1055 │
│ in main │
│ │
│ 1052 │ │ try: │
│ 1053 │ │ │ try: │
│ 1054 │ │ │ │ with self.make_context(prog_name, args, **extra) as │
│ > 1055 │ │ │ │ │ rv = self.invoke(ctx) │
│ 1056 │ │ │ │ │ if not standalone_mode: │
│ 1057 │ │ │ │ │ │ return rv │
│ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1657 │
│ in invoke │
│ │
│ 1654 │ │ │ │ super().invoke(ctx) │
│ 1655 │ │ │ │ sub_ctx = cmd.make_context(cmd_name, args, parent=ct │
│ 1656 │ │ │ │ with sub_ctx: │
│ > 1657 │ │ │ │ │ return _process_result(sub_ctx.command.invoke(su │
│ 1658 │ │ │
│ 1659 │ │ # In chain mode we create the contexts step by step, but aft │
│ 1660 │ │ # base command has been invoked. Because at that point we d │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:1404 │
│ in invoke │
│ │
│ 1401 │ │ │ echo(style(message, fg="red"), err=True) │
│ 1402 │ │ │
│ 1403 │ │ if self.callback is not None: │
│ > 1404 │ │ │ return ctx.invoke(self.callback, **ctx.params) │
│ 1405 │ │
│ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.Lis │
│ 1407 │ │ """Return a list of completions for the incomplete value. Lo │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\click\core.py:760 │
│ in invoke │
│ │
│ 757 │ │ │
│ 758 │ │ with augment_usage_errors(__self): │
│ 759 │ │ │ with ctx: │
│ > 760 │ │ │ │ return __callback(*args, **kwargs) │
│ 761 │ │
│ 762 │ def forward( │
│ 763 │ │ __self, __cmd: "Command", *args: t.Any, **kwargs: t.Any # n │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:380 │
│ in wrapper │
│ │
│ 377 │ │ @functools.wraps(func) │
│ 378 │ │ def wrapper(*args: P.args, **attrs: P.kwargs) -> t.Any: │
│ 379 │ │ │ try: │
│ > 380 │ │ │ │ return func(*args, **attrs) │
│ 381 │ │ │ except OpenLLMException as err: │
│ 382 │ │ │ │ raise click.ClickException( │
│ 383 │ │ │ │ │ click.style(f"[{group.name}] '{command_name}' fa │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:353 │
│ in wrapper │
│ │
│ 350 │ │ │ │ assert group.name is not None, "group.name should no │
│ 351 │ │ │ │ event = analytics.OpenllmCliEvent(cmd_group=group.na │
│ 352 │ │ │ │ try: │
│ > 353 │ │ │ │ │ return_value = func(*args, **attrs) │
│ 354 │ │ │ │ │ duration_in_ms = (time.time_ns() - start_time) / │
│ 355 │ │ │ │ │ event.duration_in_ms = duration_in_ms │
│ 356 │ │ │ │ │ analytics.track(event) │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:328 │
│ in wrapper │
│ │
│ 325 │ │ │ │
│ 326 │ │ │ configure_logging() │
│ 327 │ │ │ │
│ > 328 │ │ │ return f(*args, **attrs) │
│ 329 │ │ │
│ 330 │ │ return t.cast("ClickFunctionWrapper[..., t.Any]", wrapper) │
│ 331 │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\cli.py:1422 │
│ in download_models │
│ │
│ 1419 │ │ │ ) │
│ 1420 │ │ │
│ 1421 │ │ (model_args, model_attrs), tokenizer_attrs = model.llm_param │
│ > 1422 │ │ ref = model.import_model( │
│ 1423 │ │ │ model.model_id, │
│ 1424 │ │ │ model.tag, │
│ 1425 │ │ │ *model_args, │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\openllm\models\falc │
│ on\modeling_falcon.py:56 in import_model │
│ │
│ 53 │ │ device_map = attrs.pop("device_map", "auto") │
│ 54 │ │ │
│ 55 │ │ tokenizer = transformers.AutoTokenizer.from_pretrained(model │
│ > 56 │ │ model = transformers.AutoModelForCausalLM.from_pretrained( │
│ 57 │ │ │ model_id, │
│ 58 │ │ │ trust_remote_code=trust_remote_code, │
│ 59 │ │ │ torch_dtype=torch_dtype, │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\models │
│ \auto\auto_factory.py:479 in from_pretrained │
│ │
│ 476 │ │ │ │ class_ref, pretrained_model_name_or_path, **hub_kwarg │
│ 477 │ │ │ ) │
│ 478 │ │ │ _ = hub_kwargs.pop("code_revision", None) │
│ > 479 │ │ │ return model_class.from_pretrained( │
│ 480 │ │ │ │ pretrained_model_name_or_path, *model_args, config=co │
│ 481 │ │ │ ) │
│ 482 │ │ elif type(config) in cls._model_mapping.keys(): │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │
│ ng_utils.py:2881 in from_pretrained │
│ │
│ 2878 │ │ │ │ mismatched_keys, │
│ 2879 │ │ │ │ offload_index, │
│ 2880 │ │ │ │ error_msgs, │
│ > 2881 │ │ │ ) = cls._load_pretrained_model( │
│ 2882 │ │ │ │ model, │
│ 2883 │ │ │ │ state_dict, │
│ 2884 │ │ │ │ loaded_state_dict_keys, # XXX: rename? │
│ │
│ C:\Users\pedro\anaconda3\envs\powerai\Lib\site-packages\transformers\modeli │
│ ng_utils.py:2980 in _load_pretrained_model │
│ │
│ 2977 │ │ │ ) │
│ 2978 │ │ │ is_safetensors = archive_file.endswith(".safetensors") │
│ 2979 │ │ │ if offload_folder is None and not is_safetensors: │
│ > 2980 │ │ │ │ raise ValueError( │
│ 2981 │ │ │ │ │ "The current device_map had weights offloaded │
│ 2982 │ │ │ │ │ " for them. Alternatively, make sure you have s │ │ 2983 │ │ │ │ │ " offers the weights in this format." │ └─────────────────────────────────────────────────────────────────────────────┘ ValueError: The current device_maphad weights offloaded to the disk. Please provide anoffload_folderfor them. Alternatively, make sure you havesafetensors` installed if the model you are using offers the weights in this
format.
Traceback (most recent call last):
File "
Can someone help me with the topic ?
Thank you.
I got the same issue trying to use falcon with openllm==0.1.17
seems like your machine doesn't have enough resource, hence they are offloading it to disk. I will need more bandwidth to investigate how to run falcon on smaller machine
I have the same issue with a massive resource server:
CalledProcessError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/langchain/llms/openllm.py in init(self, model_name, model_id, server_url, server_type, embedded, **llm_kwargs) 168 # in-process. Wrt to BentoML users, setting embedded=False is the expected 169 # behaviour to invoke the runners remotely --> 170 runner = openllm.Runner( 171 model_name=model_name, 172 model_id=model_id,
~/.local/lib/python3.8/site-packages/openllm/_llm.py in Runner(model_name, ensure_available, init_local, implementation, **attrs) 1404 behaviour 1405 """ -> 1406 runner = t.cast( 1407 "_BaseAutoLLMClass", 1408 openllm[implementation if implementation is not None else EnvVarMixin(model_name)["framework_value"]], # type: ignore (internal API)
~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in create_runner(cls, model_name, model_id, **attrs) 155 A LLM instance. 156 """ --> 157 llm, runner_attrs = cls.for_model(model_name, model_id, return_runner_kwargs=True, **attrs) 158 return llm.to_runner(**runner_attrs) 159
~/.local/lib/python3.8/site-packages/openllm/models/auto/factory.py in for_model(cls, model_name, model_id, return_runner_kwargs, llm_config, ensure_available, **attrs) 133 llm.model_id, 134 ) --> 135 llm.ensure_model_id_exists() 136 if not return_runner_kwargs: 137 return llm
~/.local/lib/python3.8/site-packages/openllm/_llm.py in ensure_model_id_exists(self) 898 Auto LLM initialisation. 899 """ --> 900 output = subprocess.check_output( 901 [ 902 sys.executable,
/usr/lib/python3.8/subprocess.py in check_output(timeout, *popenargs, **kwargs) 413 kwargs['input'] = empty 414 --> 415 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True, 416 **kwargs).stdout 417
/usr/lib/python3.8/subprocess.py in run(input, capture_output, timeout, check, *popenargs, **kwargs) 514 retcode = process.poll() 515 if check and retcode: --> 516 raise CalledProcessError(retcode, process.args, 517 output=stdout, stderr=stderr) 518 return CompletedProcess(process.args, retcode, stdout, stderr)
CalledProcessError: Command '['/usr/bin/python3', '-m', 'openllm', 'download', 'falcon', '--model-id', 'tiiuae/falcon-40b-instruct', '--machine', '--implementation', 'pt']' returned non-zero exit status 1.
Same issue as the one on ticket: https://github.com/bentoml/OpenLLM/issues/121
Please reopen if you still see this error on 0.3.0