Manisankarrr commited on
Commit
be196ae
·
verified ·
1 Parent(s): 4ce1ab5

added module files

Browse files
Files changed (2) hide show
  1. github_analyzer.py +56 -0
  2. knowledge_base.py +37 -0
github_analyzer.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables to get access to the new GITHUB_TOKEN
6
+ load_dotenv()
7
+
8
+ def analyze_github_profile(github_url: str) -> str:
9
+ """
10
+ Fetches and summarizes public repository data from a GitHub user's profile
11
+ using an authenticated API request to increase the rate limit.
12
+ """
13
+ try:
14
+ username = github_url.strip().rstrip('/').split('/')[-1]
15
+ if not username:
16
+ raise ValueError("Could not extract a valid username from the provided URL.")
17
+
18
+ api_url = f"https://api.github.com/users/{username}/repos"
19
+ params = {'sort': 'pushed', 'per_page': 7}
20
+
21
+ # --- THIS IS THE KEY UPGRADE ---
22
+ # We now create an authentication header to send our token.
23
+ github_token = os.getenv("GITHUB_TOKEN")
24
+ headers = {
25
+ 'Authorization': f'token {github_token}'
26
+ } if github_token else {}
27
+
28
+ # We pass the headers with our request.
29
+ response = requests.get(api_url, params=params, headers=headers)
30
+ response.raise_for_status()
31
+
32
+ repos = response.json()
33
+
34
+ if not repos:
35
+ return "No public repositories found for this user."
36
+
37
+ languages = {repo["language"] for repo in repos if repo.get("language")}
38
+
39
+ summary = f"Key Languages Used:\n- {', '.join(languages) if languages else 'N/A'}\n\n"
40
+
41
+ summary += "Recent Projects Summary:\n"
42
+ for repo in repos:
43
+ summary += (f"- Project: {repo.get('name', 'N/A')} "
44
+ f"(Language: {repo.get('language', 'N/A')})\n "
45
+ f"Description: {repo.get('description', 'No description.')}\n")
46
+
47
+ return summary
48
+
49
+ except requests.exceptions.HTTPError as e:
50
+ if e.response.status_code == 404:
51
+ raise ValueError(f"GitHub user '{username}' not found. Please check the URL.")
52
+ else:
53
+ raise ConnectionError(f"GitHub API error: {e}")
54
+ except Exception as e:
55
+ raise RuntimeError(f"An unexpected error occurred: {e}")
56
+
knowledge_base.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ import chromadb.utils.embedding_functions
3
+
4
+ # This dictionary is our expert knowledge. It's the "A" in RAG.
5
+ JOB_ROLES_KNOWLEDGE = {
6
+ "backend": "For backend roles, employers seek skills in cloud services (AWS, GCP), containerization (Docker, Kubernetes), database management (SQL, NoSQL), and building scalable RESTful APIs.",
7
+ "frontend": "For frontend roles, demand is high for modern JavaScript frameworks (React, Vue), state management tools (Redux), and experience with build tools like Vite or Webpack.",
8
+ "ai_ml": "For AI/ML roles, key skills include Python, frameworks like PyTorch or TensorFlow, understanding of MLOps, and deploying models as APIs.",
9
+ "data_science": "For Data Science roles, skills in data analysis, statistical modeling, data visualization, and libraries like Pandas and Scikit-learn are essential.",
10
+ "devops": "For DevOps, skills in CI/CD (Jenkins, GitHub Actions), infrastructure as code (Terraform), and container orchestration (Kubernetes) are critical.",
11
+ "fullstack": "For Fullstack roles, a mix of frontend and backend skills is required, including a primary web framework, database skills, and deployment knowledge."
12
+ }
13
+
14
+ # Setup the vector database client
15
+ client = chromadb.Client()
16
+ sentence_transformer_ef = chromadb.utils.embedding_functions.DefaultEmbeddingFunction()
17
+ collection = client.create_collection(
18
+ name="job_roles_knowledge",
19
+ embedding_function=sentence_transformer_ef
20
+ )
21
+
22
+ def setup_knowledge_base():
23
+ """Loads our expert knowledge into the ChromaDB collection."""
24
+ print("Setting up the knowledge base with ChromaDB...")
25
+ ids = list(JOB_ROLES_KNOWLEDGE.keys())
26
+ documents = list(JOB_ROLES_KNOWLEDGE.values())
27
+ collection.add(documents=documents, ids=ids)
28
+ print("Knowledge base setup complete.")
29
+
30
+ def get_market_context(career_goal: str) -> str:
31
+ """Performs a semantic search to find the most relevant job role info."""
32
+ results = collection.query(query_texts=[career_goal], n_results=1)
33
+ if not results['documents'][0]:
34
+ # Fallback if no results are found
35
+ return "General software engineering principles are always in demand."
36
+ return results['documents'][0][0]
37
+