AIRoadMapGenerator / knowledge_base.py
Manisankarrr's picture
added module files
be196ae verified
import chromadb
import chromadb.utils.embedding_functions
# This dictionary is our expert knowledge. It's the "A" in RAG.
JOB_ROLES_KNOWLEDGE = {
"backend": "For backend roles, employers seek skills in cloud services (AWS, GCP), containerization (Docker, Kubernetes), database management (SQL, NoSQL), and building scalable RESTful APIs.",
"frontend": "For frontend roles, demand is high for modern JavaScript frameworks (React, Vue), state management tools (Redux), and experience with build tools like Vite or Webpack.",
"ai_ml": "For AI/ML roles, key skills include Python, frameworks like PyTorch or TensorFlow, understanding of MLOps, and deploying models as APIs.",
"data_science": "For Data Science roles, skills in data analysis, statistical modeling, data visualization, and libraries like Pandas and Scikit-learn are essential.",
"devops": "For DevOps, skills in CI/CD (Jenkins, GitHub Actions), infrastructure as code (Terraform), and container orchestration (Kubernetes) are critical.",
"fullstack": "For Fullstack roles, a mix of frontend and backend skills is required, including a primary web framework, database skills, and deployment knowledge."
}
# Setup the vector database client
client = chromadb.Client()
sentence_transformer_ef = chromadb.utils.embedding_functions.DefaultEmbeddingFunction()
collection = client.create_collection(
name="job_roles_knowledge",
embedding_function=sentence_transformer_ef
)
def setup_knowledge_base():
"""Loads our expert knowledge into the ChromaDB collection."""
print("Setting up the knowledge base with ChromaDB...")
ids = list(JOB_ROLES_KNOWLEDGE.keys())
documents = list(JOB_ROLES_KNOWLEDGE.values())
collection.add(documents=documents, ids=ids)
print("Knowledge base setup complete.")
def get_market_context(career_goal: str) -> str:
"""Performs a semantic search to find the most relevant job role info."""
results = collection.query(query_texts=[career_goal], n_results=1)
if not results['documents'][0]:
# Fallback if no results are found
return "General software engineering principles are always in demand."
return results['documents'][0][0]