Spaces:
Running
Running
| from slotmatch.schema import SchemaValidator | |
| from slotmatch.utils import extract_value_by_regex, fuzzy_match_key, compute_confidence | |
| class SlotExtractor: | |
| def __init__(self, schema: dict): | |
| self.validator = SchemaValidator(schema) | |
| self.schema = self.validator.get_schema() | |
| self.schema_keys = list(self.schema.keys()) | |
| def extract(self, text: str) -> dict: | |
| result = {} | |
| for expected_key in self.schema_keys: | |
| # 1. Try regex directly | |
| raw_value = extract_value_by_regex(text, expected_key) | |
| if raw_value is not None: | |
| result[expected_key] = { | |
| "value": self._coerce_type(raw_value, self.schema[expected_key]), | |
| "confidence": compute_confidence("regex") | |
| } | |
| continue | |
| # 2. Try fuzzy match | |
| fuzzy_key, score = fuzzy_match_key(expected_key, self._get_all_keys_from_text(text)) | |
| if fuzzy_key: | |
| raw_value = extract_value_by_regex(text, fuzzy_key) | |
| if raw_value is not None: | |
| result[expected_key] = { | |
| "value": self._coerce_type(raw_value, self.schema[expected_key]), | |
| "confidence": compute_confidence("fuzzy") * score | |
| } | |
| continue | |
| # 3. Fallback | |
| result[expected_key] = { | |
| "value": None, | |
| "confidence": 0.0 | |
| } | |
| return result | |
| def _get_all_keys_from_text(self, text: str) -> list: | |
| import re | |
| pattern = r'["\']?([\w-]+)["\']?\s*[:=]' | |
| return list(set(re.findall(pattern, text))) | |
| def _coerce_type(self, value, expected_type): | |
| try: | |
| if expected_type == bool: | |
| return value.lower() in ['true', 'yes', '1'] | |
| return expected_type(value) | |
| except: | |
| return value # fallback to original |