When crewai memory=True, and many agents, os error: memory path too long
OSError: [Errno 63] File name too long: '/Users/darrenlynch/Library/Application Support/[project name]/short_term/[some agent name] _ [some agent name] _ [some agent name] _ [some agent name] _ [some agent name] _ [some agent name] _ [some agent name]'
If not already can you try on the latest version crewai-0.35.8
I also get this on 0.36.0. Please advise.
This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.
This issue was closed because it has been stalled for 5 days with no activity.
Suggested modification Update the init of RAGStorage key in rag_storage.py to use an MD5 signature instead of concatenated name def init(self, type, allow_reset=True, embedder_config=None, crew=None): super().init() if ( not os.getenv("OPENAI_API_KEY") and not os.getenv("OPENAI_BASE_URL") == "https://api.openai.com/v1" ): os.environ["OPENAI_API_KEY"] = "fake"
# Create a hash of agent roles for memory path
if crew and crew.agents:
# Sort roles for consistent hashing across runs
agent_roles = sorted([self._sanitize_role(agent.role) for agent in crew.agents])
roles_str = "_".join(agent_roles)
# Create a hash of the roles string
memory_key = md5(roles_str.encode(), usedforsecurity=False).hexdigest()[:12]
else:
memory_key = "default"
config = {
"app": {
"config": {"name": type, "collect_metrics": False, "log_level": "ERROR"}
},
"chunker": {
"chunk_size": 5000,
"chunk_overlap": 100,
"length_function": "len",
"min_chunk_size": 150,
},
"vectordb": {
"provider": "chroma",
"config": {
"collection_name": type,
"dir": f"{db_storage_path()}/{type}/{memory_key}", # Use hashed memory key
"allow_reset": allow_reset,
},
},
}
@dchevenement You are correct but the rag_storage.py code has been changed now.
Current code is something like this
def __init__(self, type, allow_reset=True, embedder_config=None, crew=None, path=None):
super().__init__(type, allow_reset, embedder_config, crew)
agents = crew.agents if crew else []
agents = [self._sanitize_role(agent.role) for agent in agents]
agents = "_".join(agents)
self.agents = agents
self.type = type
self.allow_reset = allow_reset
self.path = path
self._initialize_app()
def _set_embedder_config(self):
configurator = EmbeddingConfigurator()
self.embedder_config = configurator.configure_embedder(self.embedder_config)
def _initialize_app(self):
import chromadb
from chromadb.config import Settings
self._set_embedder_config()
chroma_client = chromadb.PersistentClient(
path=self.path if self.path else f"{db_storage_path()}/{self.type}/{self.agents}",
settings=Settings(allow_reset=self.allow_reset),
)
so in the path folder f"{db_storage_path()}/{self.type}/{self.agents} is causing the issue as self.agents can be very long. a hack around that I have implemented is instead of self.agents, I am using the current time. It works but not sure if this is correct
The new line would be something like this
import time
curr_time= time.time()
path=self.path if self.path else f"{db_storage_path()}/{self.type}/{curr_time}"
I would not recommend to use a time based code for the memory as it would change your memory for your crew depending on the time you launch your crew. Use md5 hash, it's keeping it constant for a given crew of agents as the string of agent role would remain constant accross several launch, hence the hash as well do not forget to had from hashlib import md5
def __init__(self, type, allow_reset=True, embedder_config=None, crew=None, path=None):
super().__init__(type, allow_reset, embedder_config, crew)
agents = crew.agents if crew else []
if agents:
agent_roles = "_".join(sorted([self._sanitize_role(agent.role) for agent in agents]))
agents_hash = md5(agent_roles.encode(), usedforsecurity=False).hexdigest()[:12]
else:
agents_hash = "default"
self.agents = agents_hash
self.type = type
self.allow_reset = allow_reset
self.path = path
self._initialize_app()
Oh understood. Makes sense. I have integrated md5 hash now. Thanks a lot for the suggestion.