{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/paulquigley/ZSL_projects/VSP-data-enrichment/src\n" ] } ], "source": [ "import json\n", "import os\n", "import subprocess\n", "\n", "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n", "\n", "\n", "def get_git_root():\n", " return subprocess.check_output([\"git\", \"rev-parse\", \"--show-toplevel\"]).decode().strip()\n", "\n", "\n", "git_root = get_git_root()\n", "os.chdir(path=get_git_root() + \"/src\")\n", "print(os.getcwd())\n", "\n", "\n", "with open(\"../tests/test_data/sample_profiles/hansae_catlett.json\") as f:\n", " data = json.load(f)\n", " # convert to linkedin profile\n", "\n", "profile = LinkedinProfile.profile_from_json(data)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n", "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n", "\u001b[2m2024-09-16 15:17:08\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n", "\u001b[2m2024-09-16 15:17:09\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/rapidapi/linkedin\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1100\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m93\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1057\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m103\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1108\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n", "\u001b[2m2024-09-16 15:17:11\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1146\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m94\u001b[0m\n", "{\n", " \"output\": \"Graduate School\",\n", " \"confidence\": 1.0,\n", " \"reasoning\": \"The specific Linkedin education item indicates that the candidate earned a Master of Public Policy (M.P.P.) from Stanford University, which aligns with the completed graduate degree listed in the full resume. This classification fits the GRAD_SCHOOL category as it is a Master's degree in a field other than Business Administration or Law. The details provided confirm the completion of this degree during the specified period.\"\n", "}\n", "{\n", " \"output\": \"Undergraduate (Incomplete)\",\n", " \"confidence\": 0.9,\n", " \"reasoning\": \"The specific Linkedin education item indicates a \\\"Study Abroad\\\" program at the University of New South Wales, which aligns with the incomplete undergraduate studies mentioned in the resume. The resume lists a completed undergraduate degree from Harvard University, but the study abroad experience does not constitute a completed degree itself, thus fitting the category of UNDERGRAD_INCOMPLETE. The confidence is high due to the clear distinction between completed and incomplete educational experiences.\"\n", "}\n", "{\n", " \"output\": \"Undergraduate (Completed)\",\n", " \"confidence\": 1.0,\n", " \"reasoning\": \"The specific Linkedin education item details a Bachelor of Arts degree in Biomedical Engineering and Philosophy from Harvard University, which aligns perfectly with the information provided in the full resume. The resume confirms the completion of this undergraduate degree, as it states the same degree and institution, along with the graduation period from 2007 to 2011. Therefore, it is classified as a completed undergraduate degree.\"\n", "}\n", "{\n", " \"output\": \"MBA\",\n", " \"confidence\": 1.0,\n", " \"reasoning\": \"The specific Linkedin education item clearly states that the candidate obtained a Master of Business Administration (M.B.A.) from Stanford University Graduate School of Business between 2016 and 2019. This aligns perfectly with the information in the full resume, which also lists the same degree and institution. The high confidence level is due to the direct match in degree type and institution, confirming the classification as an MBA.\"\n", "}\n" ] } ], "source": [ "import asyncio\n", "\n", "from vsp.app.classifiers.education_classifier import EducationClassifier\n", "\n", "education_classifier = EducationClassifier()\n", "\n", "all_educations_classified = []\n", "\n", "\n", "async def classify_education(profile, e):\n", " classification = await education_classifier.classify_education(profile, e)\n", " all_educations_classified.append(classification)\n", "\n", "\n", "await asyncio.gather(*[classify_education(profile, e) for e in profile.educations])\n", "\n", "for e in all_educations_classified:\n", " print(e.model_dump_json(indent=2))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }