"""
main.py

This module serves as the main executable file entrypoint for the VSP Data Enrichment project.
It provides functionality to process LinkedIn profiles and classify various aspects of a person's
educational and professional background.

The main class, VspDataEnrichment, encapsulates all the necessary classifiers and methods
to perform a comprehensive analysis of a LinkedIn profile.

Usage:
    from vsp.app.main import VspDataEnrichment

    vsp_enrichment = VspDataEnrichment()
    results = await vsp_enrichment.process_linkedin_profile(linkedin_profile)

"""

import asyncio
import calendar
from collections import defaultdict
from datetime import date
from typing import List, Mapping, Sequence

from pydantic import BaseModel, Field

from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
    PrimaryJobType,
    SecondaryJobType,
    WorkExperienceClassification,
    WorkExperienceClassifier,
)
from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
    InvestingFocusAssetClassClassification,
    InvestingFocusAssetClassClassifier,
)
from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
    InvestingFocusSectorClassification,
    InvestingFocusSectorClassifier,
)
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
    InvestmentBankingGroupClassification,
    InvestmentBankingGroupClassifier,
)
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile, Position


class ClassifiedEducation(BaseModel):
    """
    Represents a classified education item from a LinkedIn profile.

    Attributes:
        education (Education): The original education item from the LinkedIn profile.
        classification (EducationClassification): The classification results for the education item.
    """

    education: Education
    classification: EducationClassification


class ClassifiedWorkExperience(BaseModel):
    """
    Represents a classified work experience item from a LinkedIn profile.

    Attributes:
        position (Position): The original position item from the LinkedIn profile.
        work_experience_classification (WorkExperienceClassification): The general classification results
        for the work experience.
        investment_banking_classification (
            InvestmentBankingGroupClassification | None
        ): The investment banking classification results, if applicable.
        investing_focus_asset_class_classification (
            InvestingFocusAssetClassClassification | None
        ): The investing focus asset class classification results, if applicable.
        investing_focus_sector_classification (
            InvestingFocusSectorClassification | None
        ): The investing focus sector classification results, if applicable.
    """

    position: Position
    work_experience_classification: WorkExperienceClassification
    investment_banking_classification: InvestmentBankingGroupClassification | None = None
    investing_focus_asset_class_classification: InvestingFocusAssetClassClassification | None = None
    investing_focus_sector_classification: InvestingFocusSectorClassification | None = None


class LinkedinProfileClassificationResults(BaseModel):
    """
    Represents the complete classification results for a LinkedIn profile.

    Attributes:
        classified_educations (Sequence[ClassifiedEducation]): A sequence of classified education items.
        classified_work_experiences (Sequence[ClassifiedWorkExperience]): A sequence of classified work
        experience items.
    """

    classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
    classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
    full_time_work_experience_years: float = Field(default=0.0)
    full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)


class VspDataEnrichment:
    """
    Main class for the VSP Data Enrichment project.

    This class encapsulates all the necessary classifiers and methods to process
    and enrich LinkedIn profile data with various classifications.

    Attributes:
        education_classifier (EducationClassifier): Classifier for education items.
        work_experience_classifier (WorkExperienceClassifier): Classifier for general work experiences.
        investment_banking_classifier (InvestmentBankingGroupClassifier): Classifier for investment banking groups.
        investing_focus_asset_class_classifier (
            InvestingFocusAssetClassClassifier
        ): Classifier for investing focus asset classes.
        investing_focus_sector_classifier (InvestingFocusSectorClassifier): Classifier for investing focus sectors.
    """

    def __init__(self) -> None:
        """Initialize the VspDataEnrichment class with all required classifiers."""
        self._education_classifier = EducationClassifier()
        self._work_experience_classifier = WorkExperienceClassifier()
        self._investment_banking_classifier = InvestmentBankingGroupClassifier()
        self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
        self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()

    def estimate_full_time_experience_by_secondary_job_type(
        self, classified_work_experiences: List[ClassifiedWorkExperience]
    ) -> Mapping[SecondaryJobType, float]:
        # Define current date
        current_date = date(2024, 9, 18)

        # List to store all events (start or end of intervals)
        events = []

        # Set to store all observed SecondaryJobTypes
        observed_secondary_job_types = set()

        for cwe in classified_work_experiences:
            classification = cwe.work_experience_classification.primary_job_type
            secondary_job_type = cwe.work_experience_classification.secondary_job_type

            if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
                # Normalize start date
                start = cwe.position.start
                if not start or not start.year:
                    continue
                start_year = start.year
                start_month = start.month if start.month else 1
                start_day = start.day if start.day else 1
                start_date = date(start_year, start_month, start_day)

                # Normalize end date
                end = cwe.position.end
                if end is None:
                    end_date = current_date
                else:
                    if not end.year:
                        continue
                    end_year = end.year
                    end_month = end.month if end.month else 12
                    if end.day:
                        end_day = end.day
                    else:
                        # Get last day of the month
                        _, end_day = calendar.monthrange(end_year, end_month)
                    end_date = date(end_year, end_month, end_day)

                if start_date > end_date:
                    continue  # Skip invalid intervals

                # Add events for sweep-line algorithm
                events.append((start_date, "start", secondary_job_type))
                events.append((end_date, "end", secondary_job_type))

                observed_secondary_job_types.add(secondary_job_type)

        # Sort events by date
        events.sort(key=lambda x: x[0])

        active_secondary_job_types = set()
        last_date = None
        durations = defaultdict(int)  # in days

        for event_date, event_type, secondary_job_type in events:
            if last_date is not None and event_date > last_date:
                interval_duration = (event_date - last_date).days
                # Distribute the interval_duration among active_secondary_job_types
                for active_type in active_secondary_job_types:
                    durations[active_type] += interval_duration

            if event_type == "start":
                active_secondary_job_types.add(secondary_job_type)
            elif event_type == "end":
                active_secondary_job_types.discard(secondary_job_type)

            last_date = event_date

        # Convert durations from days to years
        durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}

        return durations_in_years

    async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
        """
        Process a LinkedIn profile and classify its education and work experiences.

        This method maintains the original order of educations and work experiences
        from the input profile while performing asynchronous classification tasks.

        Args:
            profile (LinkedinProfile): The LinkedIn profile to process.

        Returns:
            LinkedinProfileClassificationResults: The comprehensive classification results for the profile.
        """
        # Create tasks for education classification
        education_tasks = {
            education: self._education_classifier.classify_education(profile, education)
            for education in profile.educations
        }

        # Create tasks for work experience classification
        work_experience_tasks = {
            position: self._work_experience_classifier.classify_work_experience(profile, position)
            for position in profile.positions
        }

        # Wait for all education and work experience classifications to complete
        education_results = await asyncio.gather(*education_tasks.values())
        work_experience_results = await asyncio.gather(*work_experience_tasks.values())

        # Create ClassifiedEducation objects in the original order
        classified_educations = [
            ClassifiedEducation(education=education, classification=classification)
            for education, classification in zip(profile.educations, education_results)
        ]

        # Process work experiences and create ClassifiedWorkExperience objects
        classified_work_experiences = []
        for position, work_classification in zip(profile.positions, work_experience_results):
            classified_work_experience = ClassifiedWorkExperience(
                position=position, work_experience_classification=work_classification
            )

            if work_classification.primary_job_type not in {
                work_classification.primary_job_type.INTERNSHIP,
                work_classification.primary_job_type.EXTRACURRICULAR,
            }:
                if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTMENT_BANKING:
                    ib_classification = await self._investment_banking_classifier.classify_investment_banking_group(
                        profile, position
                    )
                    classified_work_experience.investment_banking_classification = ib_classification

                if (
                    work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING
                    and not work_classification.primary_job_type
                    == work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR
                ):
                    asset_class_task = (
                        self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
                            profile, position
                        )
                    )
                    sector_task = self._investing_focus_sector_classifier.classify_investing_focus_sector(
                        profile, position
                    )

                    asset_class_result, sector_result = await asyncio.gather(asset_class_task, sector_task)

                    classified_work_experience.investing_focus_asset_class_classification = asset_class_result
                    classified_work_experience.investing_focus_sector_classification = sector_result

            classified_work_experiences.append(classified_work_experience)

        experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
        total_work_experience = sum(experience_by_job_type.values())
        return LinkedinProfileClassificationResults(
            classified_educations=classified_educations,
            classified_work_experiences=classified_work_experiences,
            full_time_work_experience_years=total_work_experience,
            full_time_work_experience_by_secondary=experience_by_job_type,
        )


async def main() -> None:
    """
    Main function to demonstrate the usage of VspDataEnrichment.

    This function loads a sample LinkedIn profile from a JSON file,
    processes it using the VspDataEnrichment class, and prints the results.
    """
    import json

    # Load a sample LinkedIn profile
    with open("tests/test_data/sample_profiles/eric_armagost.json") as f:
        profile_data = json.load(f)
        profile = LinkedinProfile.model_validate(profile_data)

    # Create an instance of VspDataEnrichment and process the profile
    vsp_enrichment = VspDataEnrichment()
    results = await vsp_enrichment.process_linkedin_profile(profile)

    # Print the results
    print(results.model_dump_json(indent=2))


if __name__ == "__main__":
    asyncio.run(main())