Spaces:
Build error
Build error
| from docx.opc.exceptions import PackageNotFoundError | |
| def read_file(file_path): | |
| """ | |
| Reads the content of a file. If the file is a PDF, it extracts the text using PyPDF2. | |
| If the file is a docx, it extracts the text using python-docx. | |
| Otherwise, it reads the file as a text file, trying different encodings if 'utf-8' fails. | |
| """ | |
| # Check if the file exists before proceeding | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"File not found: {file_path}") | |
| if file_path.lower().endswith('.pdf'): | |
| with open(file_path, 'rb') as file: # Open in binary read mode for PDFs | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page_num in range(len(reader.pages)): | |
| page = reader.pages[page_num] | |
| text += page.extract_text() | |
| return text | |
| elif file_path.lower().endswith('.docx'): | |
| # Handle docx files using python-docx | |
| try: | |
| doc = Document(file_path) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" # Add newline for paragraph separation | |
| return text | |
| # Use the imported exception class | |
| except PackageNotFoundError: | |
| # Provide a more informative error message if the file is not a valid docx | |
| raise PackageNotFoundError(f"The file {file_path} is not a valid docx file. It may be corrupted or of a different format.") | |
| import os | |
| # from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # from langchain_community.vectorstores.faiss import FAISS | |
| from docx import Document | |
| import google.generativeai as genai | |
| import PyPDF2 | |
| api_key_google = os.environ.get('GOOGLE_GEMINI_KEY') | |
| genai.configure(api_key=api_key_google) | |
| # Mount Google Drive | |
| model = genai.GenerativeModel('gemini-pro') | |
| def similarity_main(tailored_resume_path, job_description_path): | |
| """ | |
| Use Gemini Pro to evaluate the relevance score between a tailored resume and job description. | |
| Args: | |
| - tailored_resume (str): Tailored resume content. | |
| - job_description (str): Job description content. | |
| Returns: | |
| - dict: A dictionary containing the 'score' (scaled to 0–100) and 'reason'. | |
| """ | |
| resume_text = read_file(tailored_resume_path) | |
| job_description = read_file(job_description_path) | |
| prompt = f""" | |
| You are a recruitment expert evaluating how well a tailored resume aligns with a job description. Provide a realistic and concise evaluation based on the following criteria: | |
| 1. Relevance of skills and experience: Do the candidate’s skills, accomplishments, and experience meet the job's core requirements? | |
| 2. Domain Match: Are the candidate's experiences and achievements relevant to the industry or role? | |
| 3. Clarity and Conciseness: Is the resume focused on the job requirements? | |
| 4. Highlight any gaps or mismatched qualifications realistically. | |
| Provide your response in this exact format and make sure that score is a floating point number. | |
| Score: [Score between 0 and 1] | |
| Reason: [One or two sentences explaining the score] | |
| Here is the tailored resume: | |
| [Resume Start] | |
| {resume_text} | |
| [Resume End] | |
| And the job description below: | |
| [Job Description Start] | |
| {job_description} | |
| [Job Description End] | |
| """ | |
| try: | |
| # Get the response from Gemini Pro | |
| response = model.generate_content(prompt,generation_config={"temperature": 0.0}) | |
| candidates = response.candidates | |
| if not candidates or len(candidates) == 0: | |
| raise ValueError("No candidates found in the response.") | |
| # Extract content text | |
| content_text = candidates[0].content.parts[0].text | |
| # Extract score and reason with simple parsing | |
| lines = content_text.split("\n") | |
| score = None | |
| reason = None | |
| for line in lines: | |
| if line.lower().startswith("score:"): | |
| try: | |
| line_cleaned = line.replace("**", "").strip() | |
| score = float(line_cleaned.split(":", 1)[1].strip()) * 100 | |
| score = round(score, 2) | |
| except ValueError: | |
| raise ValueError(f"Invalid score format: {line}") | |
| elif line.lower().startswith("reason:"): | |
| reason = line.replace("**", "").split(":", 1)[1].strip() | |
| # Ensure both score and reason are extracted | |
| if score is None: | |
| raise ValueError("Failed to extract score from the response.") | |
| if not reason: | |
| reason = "No reason provided." | |
| return {"score": score, "reason": reason} | |
| except Exception as e: | |
| print(f"Error in relevance checking: {e}") | |
| return None |