Spaces:

resumebuild
/

Anushka

Build error

App Files Files Community

Anushka / similarity_score_refined.py

Anushkabhat9

Update similarity_score_refined.py

80bcaed verified 11 months ago

raw

history blame contribute delete

4.71 kB

	from docx.opc.exceptions import PackageNotFoundError

	def read_file(file_path):
	"""
	Reads the content of a file. If the file is a PDF, it extracts the text using PyPDF2.
	If the file is a docx, it extracts the text using python-docx.
	Otherwise, it reads the file as a text file, trying different encodings if 'utf-8' fails.
	"""

	# Check if the file exists before proceeding
	if not os.path.exists(file_path):
	raise FileNotFoundError(f"File not found: {file_path}")

	if file_path.lower().endswith('.pdf'):
	with open(file_path, 'rb') as file: # Open in binary read mode for PDFs
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page_num in range(len(reader.pages)):
	page = reader.pages[page_num]
	text += page.extract_text()
	return text
	elif file_path.lower().endswith('.docx'):
	# Handle docx files using python-docx
	try:
	doc = Document(file_path)
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n" # Add newline for paragraph separation
	return text
	# Use the imported exception class
	except PackageNotFoundError:
	# Provide a more informative error message if the file is not a valid docx
	raise PackageNotFoundError(f"The file {file_path} is not a valid docx file. It may be corrupted or of a different format.")

	import os
	# from langchain.text_splitter import RecursiveCharacterTextSplitter
	# from langchain_community.vectorstores.faiss import FAISS

	from docx import Document
	import google.generativeai as genai
	import PyPDF2

	api_key_google = os.environ.get('GOOGLE_GEMINI_KEY')
	genai.configure(api_key=api_key_google)

	# Mount Google Drive

	model = genai.GenerativeModel('gemini-pro')

	def similarity_main(tailored_resume_path, job_description_path):
	"""
	Use Gemini Pro to evaluate the relevance score between a tailored resume and job description.
	Args:
	- tailored_resume (str): Tailored resume content.
	- job_description (str): Job description content.
	Returns:
	- dict: A dictionary containing the 'score' (scaled to 0–100) and 'reason'.
	"""

	resume_text = read_file(tailored_resume_path)
	job_description = read_file(job_description_path)
	prompt = f"""
	You are a recruitment expert evaluating how well a tailored resume aligns with a job description. Provide a realistic and concise evaluation based on the following criteria:
	1. Relevance of skills and experience: Do the candidate’s skills, accomplishments, and experience meet the job's core requirements?
	2. Domain Match: Are the candidate's experiences and achievements relevant to the industry or role?
	3. Clarity and Conciseness: Is the resume focused on the job requirements?
	4. Highlight any gaps or mismatched qualifications realistically.

	Provide your response in this exact format and make sure that score is a floating point number.
	Score: [Score between 0 and 1]
	Reason: [One or two sentences explaining the score]

	Here is the tailored resume:
	[Resume Start]
	{resume_text}
	[Resume End]

	And the job description below:
	[Job Description Start]
	{job_description}
	[Job Description End]
	"""

	try:
	# Get the response from Gemini Pro

	response = model.generate_content(prompt,generation_config={"temperature": 0.0})
	candidates = response.candidates
	if not candidates or len(candidates) == 0:
	raise ValueError("No candidates found in the response.")

	# Extract content text
	content_text = candidates[0].content.parts[0].text

	# Extract score and reason with simple parsing
	lines = content_text.split("\n")
	score = None
	reason = None

	for line in lines:
	if line.lower().startswith("score:"):
	try:
	line_cleaned = line.replace("**", "").strip()
	score = float(line_cleaned.split(":", 1)[1].strip()) * 100
	score = round(score, 2)
	except ValueError:
	raise ValueError(f"Invalid score format: {line}")
	elif line.lower().startswith("reason:"):

	reason = line.replace("**", "").split(":", 1)[1].strip()

	# Ensure both score and reason are extracted
	if score is None:
	raise ValueError("Failed to extract score from the response.")
	if not reason:
	reason = "No reason provided."

	return {"score": score, "reason": reason}

	except Exception as e:
	print(f"Error in relevance checking: {e}")
	return None