Spaces:
Sleeping
Sleeping
added module files
Browse files- github_analyzer.py +56 -0
- knowledge_base.py +37 -0
github_analyzer.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Load environment variables to get access to the new GITHUB_TOKEN
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
def analyze_github_profile(github_url: str) -> str:
|
| 9 |
+
"""
|
| 10 |
+
Fetches and summarizes public repository data from a GitHub user's profile
|
| 11 |
+
using an authenticated API request to increase the rate limit.
|
| 12 |
+
"""
|
| 13 |
+
try:
|
| 14 |
+
username = github_url.strip().rstrip('/').split('/')[-1]
|
| 15 |
+
if not username:
|
| 16 |
+
raise ValueError("Could not extract a valid username from the provided URL.")
|
| 17 |
+
|
| 18 |
+
api_url = f"https://api.github.com/users/{username}/repos"
|
| 19 |
+
params = {'sort': 'pushed', 'per_page': 7}
|
| 20 |
+
|
| 21 |
+
# --- THIS IS THE KEY UPGRADE ---
|
| 22 |
+
# We now create an authentication header to send our token.
|
| 23 |
+
github_token = os.getenv("GITHUB_TOKEN")
|
| 24 |
+
headers = {
|
| 25 |
+
'Authorization': f'token {github_token}'
|
| 26 |
+
} if github_token else {}
|
| 27 |
+
|
| 28 |
+
# We pass the headers with our request.
|
| 29 |
+
response = requests.get(api_url, params=params, headers=headers)
|
| 30 |
+
response.raise_for_status()
|
| 31 |
+
|
| 32 |
+
repos = response.json()
|
| 33 |
+
|
| 34 |
+
if not repos:
|
| 35 |
+
return "No public repositories found for this user."
|
| 36 |
+
|
| 37 |
+
languages = {repo["language"] for repo in repos if repo.get("language")}
|
| 38 |
+
|
| 39 |
+
summary = f"Key Languages Used:\n- {', '.join(languages) if languages else 'N/A'}\n\n"
|
| 40 |
+
|
| 41 |
+
summary += "Recent Projects Summary:\n"
|
| 42 |
+
for repo in repos:
|
| 43 |
+
summary += (f"- Project: {repo.get('name', 'N/A')} "
|
| 44 |
+
f"(Language: {repo.get('language', 'N/A')})\n "
|
| 45 |
+
f"Description: {repo.get('description', 'No description.')}\n")
|
| 46 |
+
|
| 47 |
+
return summary
|
| 48 |
+
|
| 49 |
+
except requests.exceptions.HTTPError as e:
|
| 50 |
+
if e.response.status_code == 404:
|
| 51 |
+
raise ValueError(f"GitHub user '{username}' not found. Please check the URL.")
|
| 52 |
+
else:
|
| 53 |
+
raise ConnectionError(f"GitHub API error: {e}")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
raise RuntimeError(f"An unexpected error occurred: {e}")
|
| 56 |
+
|
knowledge_base.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
import chromadb.utils.embedding_functions
|
| 3 |
+
|
| 4 |
+
# This dictionary is our expert knowledge. It's the "A" in RAG.
|
| 5 |
+
JOB_ROLES_KNOWLEDGE = {
|
| 6 |
+
"backend": "For backend roles, employers seek skills in cloud services (AWS, GCP), containerization (Docker, Kubernetes), database management (SQL, NoSQL), and building scalable RESTful APIs.",
|
| 7 |
+
"frontend": "For frontend roles, demand is high for modern JavaScript frameworks (React, Vue), state management tools (Redux), and experience with build tools like Vite or Webpack.",
|
| 8 |
+
"ai_ml": "For AI/ML roles, key skills include Python, frameworks like PyTorch or TensorFlow, understanding of MLOps, and deploying models as APIs.",
|
| 9 |
+
"data_science": "For Data Science roles, skills in data analysis, statistical modeling, data visualization, and libraries like Pandas and Scikit-learn are essential.",
|
| 10 |
+
"devops": "For DevOps, skills in CI/CD (Jenkins, GitHub Actions), infrastructure as code (Terraform), and container orchestration (Kubernetes) are critical.",
|
| 11 |
+
"fullstack": "For Fullstack roles, a mix of frontend and backend skills is required, including a primary web framework, database skills, and deployment knowledge."
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
# Setup the vector database client
|
| 15 |
+
client = chromadb.Client()
|
| 16 |
+
sentence_transformer_ef = chromadb.utils.embedding_functions.DefaultEmbeddingFunction()
|
| 17 |
+
collection = client.create_collection(
|
| 18 |
+
name="job_roles_knowledge",
|
| 19 |
+
embedding_function=sentence_transformer_ef
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def setup_knowledge_base():
|
| 23 |
+
"""Loads our expert knowledge into the ChromaDB collection."""
|
| 24 |
+
print("Setting up the knowledge base with ChromaDB...")
|
| 25 |
+
ids = list(JOB_ROLES_KNOWLEDGE.keys())
|
| 26 |
+
documents = list(JOB_ROLES_KNOWLEDGE.values())
|
| 27 |
+
collection.add(documents=documents, ids=ids)
|
| 28 |
+
print("Knowledge base setup complete.")
|
| 29 |
+
|
| 30 |
+
def get_market_context(career_goal: str) -> str:
|
| 31 |
+
"""Performs a semantic search to find the most relevant job role info."""
|
| 32 |
+
results = collection.query(query_texts=[career_goal], n_results=1)
|
| 33 |
+
if not results['documents'][0]:
|
| 34 |
+
# Fallback if no results are found
|
| 35 |
+
return "General software engineering principles are always in demand."
|
| 36 |
+
return results['documents'][0][0]
|
| 37 |
+
|