Issue when running ingest.py "Unable to load weights from pytorch checkpoint"," If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."
C:\Users\sanke\Downloads\localGPT>python ingest.py Loading documents from C:\Users\sanke\Downloads\localGPT/SOURCE_DOCUMENTS Loaded 1 documents from C:\Users\sanke\Downloads\localGPT/SOURCE_DOCUMENTS Split into 72 chunks of text load INSTRUCTOR_Transformer ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │ │ tils.py:446 in load_state_dict │ │ │ │ 443 │ │ │ ) │ │ 444 │ │ return safe_load_file(checkpoint_file) │ │ 445 │ try: │ │ ❱ 446 │ │ return torch.load(checkpoint_file, map_location="cpu") │ │ 447 │ except Exception as e: │ │ 448 │ │ try: │ │ 449 │ │ │ with open(checkpoint_file) as f: │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\serialization.py: │ │ 797 in load │ │ │ │ 794 │ │ │ # If we want to actually tail call to torch.jit.load, we need to │ │ 795 │ │ │ # reset back to the original position. │ │ 796 │ │ │ orig_position = opened_file.tell() │ │ ❱ 797 │ │ │ with _open_zipfile_reader(opened_file) as opened_zipfile: │ │ 798 │ │ │ │ if _is_torchscript_zip(opened_zipfile): │ │ 799 │ │ │ │ │ warnings.warn("'torch.load' received a zip file that looks like a To │ │ 800 │ │ │ │ │ │ │ │ " dispatching to 'torch.jit.load' (call 'torch.jit.loa │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\serialization.py: │ │ 283 in init │ │ │ │ 280 │ │ 281 class _open_zipfile_reader(_opener): │ │ 282 │ def init(self, name_or_buffer) -> None: │ │ ❱ 283 │ │ super().init(torch._C.PyTorchFileReader(name_or_buffer)) │ │ 284 │ │ 285 │ │ 286 class _open_zipfile_writer_file(_opener): │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory
During handling of the above exception, another exception occurred:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │
│ tils.py:450 in load_state_dict │
│ │
│ 447 │ except Exception as e: │
│ 448 │ │ try: │
│ 449 │ │ │ with open(checkpoint_file) as f: │
│ ❱ 450 │ │ │ │ if f.read(7) == "version": │
│ 451 │ │ │ │ │ raise OSError( │
│ 452 │ │ │ │ │ │ "You seem to have cloned a repository without having git-lfs ins │
│ 453 │ │ │ │ │ │ "git-lfs and run git lfs install followed by git lfs pull in │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\encodings\cp1252.py:23 in decode │
│ │
│ 20 │
│ 21 class IncrementalDecoder(codecs.IncrementalDecoder): │
│ 22 │ def decode(self, input, final=False): │
│ ❱ 23 │ │ return codecs.charmap_decode(input,self.errors,decoding_table)[0] │
│ 24 │
│ 25 class StreamWriter(Codec,codecs.StreamWriter): │
│ 26 │ pass │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 1821: character maps to
During handling of the above exception, another exception occurred:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\sanke\Downloads\localGPT\ingest.py:57 in main.""" │
│ ❱ 1130 │ │ return self.main(*args, **kwargs) │
│ 1131 │
│ 1132 │
│ 1133 class Command(BaseCommand): │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:1055 in │
│ main │
│ │
│ 1052 │ │ try: │
│ 1053 │ │ │ try: │
│ 1054 │ │ │ │ with self.make_context(prog_name, args, **extra) as ctx: │
│ ❱ 1055 │ │ │ │ │ rv = self.invoke(ctx) │
│ 1056 │ │ │ │ │ if not standalone_mode: │
│ 1057 │ │ │ │ │ │ return rv │
│ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:1404 in │
│ invoke │
│ │
│ 1401 │ │ │ echo(style(message, fg="red"), err=True) │
│ 1402 │ │ │
│ 1403 │ │ if self.callback is not None: │
│ ❱ 1404 │ │ │ return ctx.invoke(self.callback, **ctx.params) │
│ 1405 │ │
│ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.List["CompletionItem"]: │
│ 1407 │ │ """Return a list of completions for the incomplete value. Looks │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:760 in │
│ invoke │
│ │
│ 757 │ │ │
│ 758 │ │ with augment_usage_errors(__self): │
│ 759 │ │ │ with ctx: │
│ ❱ 760 │ │ │ │ return __callback(*args, **kwargs) │
│ 761 │ │
│ 762 │ def forward( │
│ 763 │ │ __self, __cmd: "Command", args: t.Any, **kwargs: t.Any # noqa: B902 │
│ │
│ C:\Users\sanke\Downloads\localGPT\ingest.py:48 in main │
│ │
│ 45 │ print(f"Split into {len(texts)} chunks of text") │
│ 46 │ │
│ 47 │ # Create embeddings │
│ ❱ 48 │ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", │
│ 49 │ │ │ │ │ │ │ │ │ │ │ │ model_kwargs={"device": device}) │
│ 50 │ │
│ 51 │ db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY, c │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\embeddings\hu │
│ ggingface.py:127 in init │
│ │
│ 124 │ │ try: │
│ 125 │ │ │ from InstructorEmbedding import INSTRUCTOR │
│ 126 │ │ │ │
│ ❱ 127 │ │ │ self.client = INSTRUCTOR( │
│ 128 │ │ │ │ self.model_name, cache_folder=self.cache_folder, **self.model_kwargs │
│ 129 │ │ │ ) │
│ 130 │ │ except ImportError as e: │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\S │
│ entenceTransformer.py:95 in init │
│ │
│ 92 │ │ │ │ │ │ │ │ │ │ use_auth_token=use_auth_token) │
│ 93 │ │ │ │
│ 94 │ │ │ if os.path.exists(os.path.join(model_path, 'modules.json')): #Load as Sen │
│ ❱ 95 │ │ │ │ modules = self._load_sbert_model(model_path) │
│ 96 │ │ │ else: #Load with AutoModel │
│ 97 │ │ │ │ modules = self._load_auto_model(model_path) │
│ 98 │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │
│ tructor.py:474 in load_sbert_model │
│ │
│ 471 │ │ │ │ module_class = INSTRUCTOR_Pooling │
│ 472 │ │ │ else: │
│ 473 │ │ │ │ module_class = import_from_string(module_config['type']) │
│ ❱ 474 │ │ │ module = module_class.load(os.path.join(model_path, module_config['path'])) │
│ 475 │ │ │ modules[module_config['name']] = module │
│ 476 │ │ │
│ 477 │ │ return modules │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │
│ tructor.py:306 in load │
│ │
│ 303 │ │ │
│ 304 │ │ with open(sbert_config_path) as fIn: │
│ 305 │ │ │ config = json.load(fIn) │
│ ❱ 306 │ │ return INSTRUCTOR_Transformer(model_name_or_path=input_path, **config) │
│ 307 │ │
│ 308 │ def tokenize(self, texts): │
│ 309 │ │ """ │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │
│ tructor.py:240 in init │
│ │
│ 237 │ │ │ config = AutoConfig.from_pretrained(os.path.join(model_name_or_path,'with_pr │
│ 238 │ │ else: │
│ 239 │ │ │ config = AutoConfig.from_pretrained(model_name_or_path, **model_args, cache │
│ ❱ 240 │ │ self._load_model(self.model_name_or_path, config, cache_dir, **model_args) │
│ 241 │ │ │
│ 242 │ │ self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path if tokeniz │
│ 243 │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\m │
│ odels\Transformer.py:47 in _load_model │
│ │
│ 44 │ def _load_model(self, model_name_or_path, config, cache_dir): │
│ 45 │ │ """Loads the transformer model""" │
│ 46 │ │ if isinstance(config, T5Config): │
│ ❱ 47 │ │ │ self._load_t5_model(model_name_or_path, config, cache_dir) │
│ 48 │ │ else: │
│ 49 │ │ │ self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=confi │
│ 50 │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\m │
│ odels\Transformer.py:55 in _load_t5_model │
│ │
│ 52 │ │ """Loads the encoder model from T5""" │
│ 53 │ │ from transformers import T5EncoderModel │
│ 54 │ │ T5EncoderModel._keys_to_ignore_on_load_unexpected = ["decoder."] │
│ ❱ 55 │ │ self.auto_model = T5EncoderModel.from_pretrained(model_name_or_path, config=conf │
│ 56 │ │
│ 57 │ def repr(self): │
│ 58 │ │ return "Transformer({}) with Transformer model: {} ".format(self.get_config_dict │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │
│ tils.py:2568 in from_pretrained │
│ │
│ 2565 │ │ if from_pt: │
│ 2566 │ │ │ if not is_sharded and state_dict is None: │
│ 2567 │ │ │ │ # Time to load the checkpoint │
│ ❱ 2568 │ │ │ │ state_dict = load_state_dict(resolved_archive_file) │
│ 2569 │ │ │ │
│ 2570 │ │ │ # set dtype to instantiate the model under: │
│ 2571 │ │ │ # 1. If torch_dtype is not None, we use that dtype │
│ │
│ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │
│ tils.py:462 in load_state_dict │
│ │
│ 459 │ │ │ │ │ │ "model. Make sure you have saved the model properly." │
│ 460 │ │ │ │ │ ) from e │
│ 461 │ │ except (UnicodeDecodeError, ValueError): │
│ ❱ 462 │ │ │ raise OSError( │
│ 463 │ │ │ │ f"Unable to load weights from pytorch checkpoint file for '{checkpoint_f │
│ 464 │ │ │ │ f"at '{checkpoint_file}'. " │
│ 465 │ │ │ │ "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please s │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
OSError: Unable to load weights from pytorch checkpoint file for
'C:\Users\sanke/.cache\torch\sentence_transformers\hkunlp_instructor-xl\pytorch_model.bin' at
'C:\Users\sanke/.cache\torch\sentence_transformers\hkunlp_instructor-xl\pytorch_model.bin'. If you tried to load a
PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.
C:\Users\sanke\Downloads\localGPT>
I have python 3.10 installed. not sure whats causing this.
I was getting the same error. Sometime model is now downloaded completely. I have the same error, I have deleted the model and redownloaded it. It starts working. try it may be it works for you.
I was getting the same error. Sometime model is now downloaded completely. I have the same error, I have deleted the model and redownloaded it. It starts working. try it may be it works for you. Hi Kashif-Raza6 I have the same problem with pytoch_model.bin.... I'm not sure how can I redownloaded it ?