Spaces:

Manisankarrr
/

AIRoadMapGenerator

Sleeping

App Files Files Community

Manisankarrr commited on Sep 25

Commit

be196ae

verified ·

1 Parent(s): 4ce1ab5

added module files

Browse files

Files changed (2) hide show

github_analyzer.py +56 -0
knowledge_base.py +37 -0

github_analyzer.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import requests
+from dotenv import load_dotenv
+# Load environment variables to get access to the new GITHUB_TOKEN
+load_dotenv()
+def analyze_github_profile(github_url: str) -> str:
+    """
+    Fetches and summarizes public repository data from a GitHub user's profile
+    using an authenticated API request to increase the rate limit.
+    """
+    try:
+        username = github_url.strip().rstrip('/').split('/')[-1]
+        if not username:
+            raise ValueError("Could not extract a valid username from the provided URL.")
+        api_url = f"https://api.github.com/users/{username}/repos"
+        params = {'sort': 'pushed', 'per_page': 7}
+        # --- THIS IS THE KEY UPGRADE ---
+        # We now create an authentication header to send our token.
+        github_token = os.getenv("GITHUB_TOKEN")
+        headers = {
+            'Authorization': f'token {github_token}'
+        } if github_token else {}
+        # We pass the headers with our request.
+        response = requests.get(api_url, params=params, headers=headers)
+        response.raise_for_status()
+        repos = response.json()
+        if not repos:
+            return "No public repositories found for this user."
+        languages = {repo["language"] for repo in repos if repo.get("language")}
+        summary = f"Key Languages Used:\n- {', '.join(languages) if languages else 'N/A'}\n\n"
+        summary += "Recent Projects Summary:\n"
+        for repo in repos:
+            summary += (f"- Project: {repo.get('name', 'N/A')} "
+                        f"(Language: {repo.get('language', 'N/A')})\n  "
+                        f"Description: {repo.get('description', 'No description.')}\n")
+        return summary
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            raise ValueError(f"GitHub user '{username}' not found. Please check the URL.")
+        else:
+            raise ConnectionError(f"GitHub API error: {e}")
+    except Exception as e:
+        raise RuntimeError(f"An unexpected error occurred: {e}")

knowledge_base.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import chromadb
+import chromadb.utils.embedding_functions
+# This dictionary is our expert knowledge. It's the "A" in RAG.
+JOB_ROLES_KNOWLEDGE = {
+    "backend": "For backend roles, employers seek skills in cloud services (AWS, GCP), containerization (Docker, Kubernetes), database management (SQL, NoSQL), and building scalable RESTful APIs.",
+    "frontend": "For frontend roles, demand is high for modern JavaScript frameworks (React, Vue), state management tools (Redux), and experience with build tools like Vite or Webpack.",
+    "ai_ml": "For AI/ML roles, key skills include Python, frameworks like PyTorch or TensorFlow, understanding of MLOps, and deploying models as APIs.",
+    "data_science": "For Data Science roles, skills in data analysis, statistical modeling, data visualization, and libraries like Pandas and Scikit-learn are essential.",
+    "devops": "For DevOps, skills in CI/CD (Jenkins, GitHub Actions), infrastructure as code (Terraform), and container orchestration (Kubernetes) are critical.",
+    "fullstack": "For Fullstack roles, a mix of frontend and backend skills is required, including a primary web framework, database skills, and deployment knowledge."
+}
+# Setup the vector database client
+client = chromadb.Client()
+sentence_transformer_ef = chromadb.utils.embedding_functions.DefaultEmbeddingFunction()
+collection = client.create_collection(
+    name="job_roles_knowledge",
+    embedding_function=sentence_transformer_ef
+)
+def setup_knowledge_base():
+    """Loads our expert knowledge into the ChromaDB collection."""
+    print("Setting up the knowledge base with ChromaDB...")
+    ids = list(JOB_ROLES_KNOWLEDGE.keys())
+    documents = list(JOB_ROLES_KNOWLEDGE.values())
+    collection.add(documents=documents, ids=ids)
+    print("Knowledge base setup complete.")
+def get_market_context(career_goal: str) -> str:
+    """Performs a semantic search to find the most relevant job role info."""
+    results = collection.query(query_texts=[career_goal], n_results=1)
+    if not results['documents'][0]:
+        # Fallback if no results are found
+        return "General software engineering principles are always in demand."
+    return results['documents'][0][0]