from_pretrained() Error. Cannot use a path to import local model?
my code like this:
model = ESM3.from_pretrained(model_name = "./huggingface/hub/models--EvolutionaryScale--esm3-sm-open-v1/snapshots/66ecd636588d3100e13598a5720678db6583d01c/", device=torch.device("cpu"))
and give the error: `--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [27], line 1 ----> 1 model = ESM3.from_pretrained(model_name = "./huggingface/hub/models--EvolutionaryScale--esm3-sm-open-v1/snapshots/66ecd636588d3100e13598a5720678db6583d01c/", device=torch.device("cpu"))
File ~/users/wtt/anaconda3/envs/esm3/lib/python3.11/site-packages/esm/models/esm3.py:251, in ESM3.from_pretrained(cls, model_name, device) 249 if device is None: 250 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") --> 251 model = load_local_model(model_name, device=device) 252 if device.type != "cpu": 253 model = model.to(torch.bfloat16)
File ~/users/wtt/anaconda3/envs/esm3/lib/python3.11/site-packages/esm/pretrained.py:87, in load_local_model(model_name, device) 83 def load_local_model( 84 model_name: str, device: torch.device = torch.device("cpu") 85 ) -> nn.Module: 86 if model_name not in LOCAL_MODEL_REGISTRY: ---> 87 raise ValueError(f"Model {model_name} not found in local model registry.") 88 return LOCAL_MODEL_REGISTRYmodel_name
ValueError: Model ./huggingface/hub/models--EvolutionaryScale--esm3-sm-open-v1/snapshots/66ecd636588d3100e13598a5720678db6583d01c/ not found in local model registry.`
you may further refer .../66ecd636588d3100e13598a5720678db6583d01c/data
I have tried and have same error: `--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In [2], line 1 ----> 1 model = ESM3.from_pretrained(model_name = "./huggingface/hub/models--EvolutionaryScale--esm3-sm-open-v1/snapshots/66ecd636588d3100e13598a5720678db6583d01c/data", device=torch.device("cpu"))
File ~/users/wtt/anaconda3/envs/esm3/lib/python3.11/site-packages/esm/models/esm3.py:251, in ESM3.from_pretrained(cls, model_name, device) 249 if device is None: 250 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") --> 251 model = load_local_model(model_name, device=device) 252 if device.type != "cpu": 253 model = model.to(torch.bfloat16)
File ~/users/wtt/anaconda3/envs/esm3/lib/python3.11/site-packages/esm/pretrained.py:87, in load_local_model(model_name, device) 83 def load_local_model( 84 model_name: str, device: torch.device = torch.device("cpu") 85 ) -> nn.Module: 86 if model_name not in LOCAL_MODEL_REGISTRY: ---> 87 raise ValueError(f"Model {model_name} not found in local model registry.") 88 return LOCAL_MODEL_REGISTRYmodel_name
ValueError: Model ./huggingface/hub/models--EvolutionaryScale--esm3-sm-open-v1/snapshots/66ecd636588d3100e13598a5720678db6583d01c/data not found in local model registry.`
The same question for me My remote server could not get connection with Internet so I must load the local model.
The same question for me My remote server could not get connection with Internet so I must load the local model.
https://blog.csdn.net/m0_61474277/article/details/140348032 for reference
This is the set of model names that the function supports: https://github.com/evolutionaryscale/esm/blob/main/esm/utils/constants/models.py#L2-L8
If you're looking to download it without internet, I think you should refer to huggingface for details. I would try to just put the right files in the right locations, or to modify this file: https://github.com/evolutionaryscale/esm/blob/39a3a6cb1e722347947dc375e3f8e2ba80ed8b59/esm/utils/constants/esm3.py#L100-L105
Did anybody success? I tried:
model_path = "/disk2/044/scprint/temp/data/esmc-600m-2024-12"
# Check if the model path exists
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model path not found: {model_path}")
# Load the model configuration
config_path = os.path.join(model_path, "config.json")
config = AutoConfig.from_pretrained(config_path)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Initialize the model
client = ESMC(
n_heads=config.num_attention_heads,
n_layers=config.num_hidden_layers,
tokenizer=tokenizer
).to("cuda" if cuda else "cpu")
# Load the model weights
weights_path = os.path.join(model_path, "/data/weights/esmc_600m_2024_12_v0.pth")
but it noted that OSError: Can't load tokenizer for '/disk2/044/scprint/temp/data/esmc-600m-2024-12'. I screened the https://huggingface.co/EvolutionaryScale/esmc-600m-2024-12/tree/main, but not found tokenizer.json、vocab.json etc.
I solved the problem by both modifying the data_root() and model = ESM3.from_pretrained() like this:
def data_root(model: str):
if "INFRA_PROVIDER" in os.environ:
return Path("")
# Try to download from hugginface if it doesn't exist
if model.startswith("esm3"):
path = Path(snapshot_download(repo_id="EvolutionaryScale/esm3-sm-open-v1"))
elif model.startswith("esmc-300"):
path = Path(snapshot_download(repo_id="EvolutionaryScale/esmc-300m-2024-12"))
elif model.startswith("esmc-600"):
path = Path("/disk2/044/scprint/temp/data/esmc-600m-2024-12") ###manually
local_weights_path = "/disk2/044/scprint/temp/data/esmc-600m-2024-12/data/weights/esmc_600m_2024_12_v0.pth"
client = load_local_model(
"esmc_600m",
device="cuda" if cuda else "cpu"
)
state_dict = torch.load(local_weights_path, map_location="cuda" if cuda else "cpu")
client.load_state_dict(state_dict)
Hope this helps.
I solved the problem by both modifying the
data_root()andmodel = ESM3.from_pretrained()like this:def data_root(model: str): if "INFRA_PROVIDER" in os.environ: return Path("") # Try to download from hugginface if it doesn't exist if model.startswith("esm3"): path = Path(snapshot_download(repo_id="EvolutionaryScale/esm3-sm-open-v1")) elif model.startswith("esmc-300"): path = Path(snapshot_download(repo_id="EvolutionaryScale/esmc-300m-2024-12")) elif model.startswith("esmc-600"): path = Path("/disk2/044/scprint/temp/data/esmc-600m-2024-12") ###manuallylocal_weights_path = "/disk2/044/scprint/temp/data/esmc-600m-2024-12/data/weights/esmc_600m_2024_12_v0.pth" client = load_local_model( "esmc_600m", device="cuda" if cuda else "cpu" ) state_dict = torch.load(local_weights_path, map_location="cuda" if cuda else "cpu") client.load_state_dict(state_dict)Hope this helps.
requests.exceptions.SSLError: (MaxRetryError('HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/EvolutionaryScale/esmc-600m-2024-12/revision/main (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'huggingface.co'. (_ssl.c:1007)")))'), '(Request ID: 119e2e20-ed26-4b45-a398-2e1266a498d0)')
您好,感谢您分享的解决办法,请问您有遇到这个问题吗?
firstly, clone all the weights from huggingface to your local machine. such as: git clone https://user:[email protected]/EvolutionaryScale/esmc-300m-2024-12 all of the three categories should be downloaded.
then, change the path in the function data_root() to your local path.
# original_code
def data_root(model: str):
if "INFRA_PROVIDER" in os.environ:
return Path("")
# Try to download from hugginface if it doesn't exist
if model.startswith("esm3"):
path = Path(snapshot_download(repo_id="EvolutionaryScale/esm3-sm-open-v1"))
elif model.startswith("esmc-300"):
path = Path(snapshot_download(repo_id="EvolutionaryScale/esmc-300m-2024-12"))
elif model.startswith("esmc-600"):
path = Path(snapshot_download(repo_id="EvolutionaryScale/esmc-600m-2024-12"))
else:
raise ValueError(f"{model=} is an invalid model name.")
return path
# change to
def data_root(model: str):
if "INFRA_PROVIDER" in os.environ:
return Path("")
# Try to download from hugginface if it doesn't exist
if model.startswith("esm3"):
path = Path('/your/path/esm3-sm-open-v1')
elif model.startswith("esmc-300"):
path = Path('/your/path/esmc-300m-2024-12')
elif model.startswith("esmc-600"):
path = Path("/your/path/esmc-600m-2024-12")
else:
raise ValueError(f"{model=} is an invalid model name.")
return path
and then test the code!
# from huggingface_hub import login
from esm.models.esm3 import ESM3
from esm.sdk.api import ESM3InferenceClient, ESMProtein, GenerationConfig
model: ESM3InferenceClient = ESM3.from_pretrained("esm3-open").to("cuda") # or
prompt = '''MDEQSQGMQGPPVTQFQPQKALRPDMGYNTLANFRIEKKIGRGQFSEVYRASCLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLNHPNVIKYYASFIEDNELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALDHMHSRRVMHRDIKPANVFITATGVVKLGDLGLGRF'''
protein = ESMProtein(sequence=prompt)
# Generate the sequence, then the structure. This will iteratively unmask the sequence track.
protein = model.generate(protein, GenerationConfig(track="sequence", num_steps=8, temperature=0.7))
# We can show the predicted structure for the generated sequence.
protein = model.generate(protein, GenerationConfig(track="structure", num_steps=8))
protein.to_pdb("test.pdb")
# # Then we can do a round trip design by inverse folding the sequence and recomputing the structure
# protein.sequence = None
# protein = model.generate(protein, GenerationConfig(track="sequence", num_steps=8))
# protein.coordinates = None
# protein = model.generate(protein, GenerationConfig(track="structure", num_steps=8))
# protein.to_pdb("./round_tripped.pdb")