localGPT icon indicating copy to clipboard operation
localGPT copied to clipboard

Issue when running ingest.py "Unable to load weights from pytorch checkpoint"," If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."

Open sankeer28 opened this issue 2 years ago • 1 comments

C:\Users\sanke\Downloads\localGPT>python ingest.py Loading documents from C:\Users\sanke\Downloads\localGPT/SOURCE_DOCUMENTS Loaded 1 documents from C:\Users\sanke\Downloads\localGPT/SOURCE_DOCUMENTS Split into 72 chunks of text load INSTRUCTOR_Transformer ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │ │ tils.py:446 in load_state_dict │ │ │ │ 443 │ │ │ ) │ │ 444 │ │ return safe_load_file(checkpoint_file) │ │ 445 │ try: │ │ ❱ 446 │ │ return torch.load(checkpoint_file, map_location="cpu") │ │ 447 │ except Exception as e: │ │ 448 │ │ try: │ │ 449 │ │ │ with open(checkpoint_file) as f: │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\serialization.py: │ │ 797 in load │ │ │ │ 794 │ │ │ # If we want to actually tail call to torch.jit.load, we need to │ │ 795 │ │ │ # reset back to the original position. │ │ 796 │ │ │ orig_position = opened_file.tell() │ │ ❱ 797 │ │ │ with _open_zipfile_reader(opened_file) as opened_zipfile: │ │ 798 │ │ │ │ if _is_torchscript_zip(opened_zipfile): │ │ 799 │ │ │ │ │ warnings.warn("'torch.load' received a zip file that looks like a To │ │ 800 │ │ │ │ │ │ │ │ " dispatching to 'torch.jit.load' (call 'torch.jit.loa │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\serialization.py: │ │ 283 in init │ │ │ │ 280 │ │ 281 class _open_zipfile_reader(_opener): │ │ 282 │ def init(self, name_or_buffer) -> None: │ │ ❱ 283 │ │ super().init(torch._C.PyTorchFileReader(name_or_buffer)) │ │ 284 │ │ 285 │ │ 286 class _open_zipfile_writer_file(_opener): │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory

During handling of the above exception, another exception occurred:

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │ │ tils.py:450 in load_state_dict │ │ │ │ 447 │ except Exception as e: │ │ 448 │ │ try: │ │ 449 │ │ │ with open(checkpoint_file) as f: │ │ ❱ 450 │ │ │ │ if f.read(7) == "version": │ │ 451 │ │ │ │ │ raise OSError( │ │ 452 │ │ │ │ │ │ "You seem to have cloned a repository without having git-lfs ins │ │ 453 │ │ │ │ │ │ "git-lfs and run git lfs install followed by git lfs pull in │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\encodings\cp1252.py:23 in decode │ │ │ │ 20 │ │ 21 class IncrementalDecoder(codecs.IncrementalDecoder): │ │ 22 │ def decode(self, input, final=False): │ │ ❱ 23 │ │ return codecs.charmap_decode(input,self.errors,decoding_table)[0] │ │ 24 │ │ 25 class StreamWriter(Codec,codecs.StreamWriter): │ │ 26 │ pass │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 1821: character maps to

During handling of the above exception, another exception occurred:

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\Users\sanke\Downloads\localGPT\ingest.py:57 in │ │ │ │ 54 │ │ 55 │ │ 56 if name == "main": │ │ ❱ 57 │ main() │ │ 58 │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:1130 in │ │ call │ │ │ │ 1127 │ │ │ 1128 │ def call(self, *args: t.Any, **kwargs: t.Any) -> t.Any: │ │ 1129 │ │ """Alias for :meth:main.""" │ │ ❱ 1130 │ │ return self.main(*args, **kwargs) │ │ 1131 │ │ 1132 │ │ 1133 class Command(BaseCommand): │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:1055 in │ │ main │ │ │ │ 1052 │ │ try: │ │ 1053 │ │ │ try: │ │ 1054 │ │ │ │ with self.make_context(prog_name, args, **extra) as ctx: │ │ ❱ 1055 │ │ │ │ │ rv = self.invoke(ctx) │ │ 1056 │ │ │ │ │ if not standalone_mode: │ │ 1057 │ │ │ │ │ │ return rv │ │ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:1404 in │ │ invoke │ │ │ │ 1401 │ │ │ echo(style(message, fg="red"), err=True) │ │ 1402 │ │ │ │ 1403 │ │ if self.callback is not None: │ │ ❱ 1404 │ │ │ return ctx.invoke(self.callback, **ctx.params) │ │ 1405 │ │ │ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.List["CompletionItem"]: │ │ 1407 │ │ """Return a list of completions for the incomplete value. Looks │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\click\core.py:760 in │ │ invoke │ │ │ │ 757 │ │ │ │ 758 │ │ with augment_usage_errors(__self): │ │ 759 │ │ │ with ctx: │ │ ❱ 760 │ │ │ │ return __callback(*args, **kwargs) │ │ 761 │ │ │ 762 │ def forward( │ │ 763 │ │ __self, __cmd: "Command", args: t.Any, **kwargs: t.Any # noqa: B902 │ │ │ │ C:\Users\sanke\Downloads\localGPT\ingest.py:48 in main │ │ │ │ 45 │ print(f"Split into {len(texts)} chunks of text") │ │ 46 │ │ │ 47 │ # Create embeddings │ │ ❱ 48 │ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", │ │ 49 │ │ │ │ │ │ │ │ │ │ │ │ model_kwargs={"device": device}) │ │ 50 │ │ │ 51 │ db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY, c │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\embeddings\hu │ │ ggingface.py:127 in init │ │ │ │ 124 │ │ try: │ │ 125 │ │ │ from InstructorEmbedding import INSTRUCTOR │ │ 126 │ │ │ │ │ ❱ 127 │ │ │ self.client = INSTRUCTOR( │ │ 128 │ │ │ │ self.model_name, cache_folder=self.cache_folder, **self.model_kwargs │ │ 129 │ │ │ ) │ │ 130 │ │ except ImportError as e: │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\S │ │ entenceTransformer.py:95 in init │ │ │ │ 92 │ │ │ │ │ │ │ │ │ │ use_auth_token=use_auth_token) │ │ 93 │ │ │ │ │ 94 │ │ │ if os.path.exists(os.path.join(model_path, 'modules.json')): #Load as Sen │ │ ❱ 95 │ │ │ │ modules = self._load_sbert_model(model_path) │ │ 96 │ │ │ else: #Load with AutoModel │ │ 97 │ │ │ │ modules = self._load_auto_model(model_path) │ │ 98 │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │ │ tructor.py:474 in load_sbert_model │ │ │ │ 471 │ │ │ │ module_class = INSTRUCTOR_Pooling │ │ 472 │ │ │ else: │ │ 473 │ │ │ │ module_class = import_from_string(module_config['type']) │ │ ❱ 474 │ │ │ module = module_class.load(os.path.join(model_path, module_config['path'])) │ │ 475 │ │ │ modules[module_config['name']] = module │ │ 476 │ │ │ │ 477 │ │ return modules │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │ │ tructor.py:306 in load │ │ │ │ 303 │ │ │ │ 304 │ │ with open(sbert_config_path) as fIn: │ │ 305 │ │ │ config = json.load(fIn) │ │ ❱ 306 │ │ return INSTRUCTOR_Transformer(model_name_or_path=input_path, **config) │ │ 307 │ │ │ 308 │ def tokenize(self, texts): │ │ 309 │ │ """ │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\InstructorEmbedding\ins │ │ tructor.py:240 in init │ │ │ │ 237 │ │ │ config = AutoConfig.from_pretrained(os.path.join(model_name_or_path,'with_pr │ │ 238 │ │ else: │ │ 239 │ │ │ config = AutoConfig.from_pretrained(model_name_or_path, **model_args, cache │ │ ❱ 240 │ │ self._load_model(self.model_name_or_path, config, cache_dir, **model_args) │ │ 241 │ │ │ │ 242 │ │ self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path if tokeniz │ │ 243 │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\m │ │ odels\Transformer.py:47 in _load_model │ │ │ │ 44 │ def _load_model(self, model_name_or_path, config, cache_dir): │ │ 45 │ │ """Loads the transformer model""" │ │ 46 │ │ if isinstance(config, T5Config): │ │ ❱ 47 │ │ │ self._load_t5_model(model_name_or_path, config, cache_dir) │ │ 48 │ │ else: │ │ 49 │ │ │ self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=confi │ │ 50 │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\sentence_transformers\m │ │ odels\Transformer.py:55 in _load_t5_model │ │ │ │ 52 │ │ """Loads the encoder model from T5""" │ │ 53 │ │ from transformers import T5EncoderModel │ │ 54 │ │ T5EncoderModel._keys_to_ignore_on_load_unexpected = ["decoder."] │ │ ❱ 55 │ │ self.auto_model = T5EncoderModel.from_pretrained(model_name_or_path, config=conf │ │ 56 │ │ │ 57 │ def repr(self): │ │ 58 │ │ return "Transformer({}) with Transformer model: {} ".format(self.get_config_dict │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │ │ tils.py:2568 in from_pretrained │ │ │ │ 2565 │ │ if from_pt: │ │ 2566 │ │ │ if not is_sharded and state_dict is None: │ │ 2567 │ │ │ │ # Time to load the checkpoint │ │ ❱ 2568 │ │ │ │ state_dict = load_state_dict(resolved_archive_file) │ │ 2569 │ │ │ │ │ 2570 │ │ │ # set dtype to instantiate the model under: │ │ 2571 │ │ │ # 1. If torch_dtype is not None, we use that dtype │ │ │ │ C:\Users\sanke\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_u │ │ tils.py:462 in load_state_dict │ │ │ │ 459 │ │ │ │ │ │ "model. Make sure you have saved the model properly." │ │ 460 │ │ │ │ │ ) from e │ │ 461 │ │ except (UnicodeDecodeError, ValueError): │ │ ❱ 462 │ │ │ raise OSError( │ │ 463 │ │ │ │ f"Unable to load weights from pytorch checkpoint file for '{checkpoint_f │ │ 464 │ │ │ │ f"at '{checkpoint_file}'. " │ │ 465 │ │ │ │ "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please s │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ OSError: Unable to load weights from pytorch checkpoint file for 'C:\Users\sanke/.cache\torch\sentence_transformers\hkunlp_instructor-xl\pytorch_model.bin' at 'C:\Users\sanke/.cache\torch\sentence_transformers\hkunlp_instructor-xl\pytorch_model.bin'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.

C:\Users\sanke\Downloads\localGPT>

I have python 3.10 installed. not sure whats causing this.

sankeer28 avatar Jun 01 '23 02:06 sankeer28

I was getting the same error. Sometime model is now downloaded completely. I have the same error, I have deleted the model and redownloaded it. It starts working. try it may be it works for you.

Kashif-Raza6 avatar Jun 01 '23 08:06 Kashif-Raza6

I was getting the same error. Sometime model is now downloaded completely. I have the same error, I have deleted the model and redownloaded it. It starts working. try it may be it works for you. Hi Kashif-Raza6 I have the same problem with pytoch_model.bin.... I'm not sure how can I redownloaded it ?

borkoRi avatar Aug 16 '23 10:08 borkoRi