Spaces:
Sleeping
Sleeping
| # modules/knowledge_base.py | |
| import json | |
| from pathlib import Path | |
| from utils.logger import log | |
| class KnowledgeBase: | |
| def __init__(self, file_path: Path = Path("./config/general_travelplan.json")): | |
| self.knowledge = [] | |
| self.city_index = {} # 城市索引 | |
| self.country_index = {} # 国家索引 | |
| self.region_index = {} # 地区索引 | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| self.knowledge = json.load(f).get('clean_knowledge', []) | |
| log.info(f"✅ 知识库加载完成") | |
| def _build_indexes(self): | |
| """建立快速检索索引""" | |
| for idx, item in enumerate(self.knowledge): | |
| knowledge = item.get('knowledge', {}).get('travel_knowledge', {}) | |
| if not knowledge: | |
| continue | |
| dest_info = knowledge.get('destination_info', {}) | |
| # 建立城市索引 | |
| primary_destinations = dest_info.get('primary_destinations', []) | |
| for city in primary_destinations: | |
| if city not in self.city_index: | |
| self.city_index[city] = [] | |
| self.city_index[city].append(idx) | |
| # 建立国家索引 | |
| countries = dest_info.get('countries', []) | |
| for country in countries: | |
| if country not in self.country_index: | |
| self.country_index[country] = [] | |
| self.country_index[country].append(idx) | |
| # 建立地区索引 | |
| region_type = dest_info.get('region_type', '') | |
| if region_type: | |
| if region_type not in self.region_index: | |
| self.region_index[region_type] = [] | |
| self.region_index[region_type].append(idx) | |
| def search(self, query: str) -> list: | |
| """搜索知识库中的相关信息""" | |
| relevant_knowledge = [] | |
| query_lower = query.lower() | |
| log.info(f"🔍 在知识库中搜索: '{query}'") | |
| # 1. 直接城市匹配 | |
| if query in self.city_index: | |
| for idx in self.city_index[query]: | |
| if self.knowledge[idx] not in relevant_knowledge: | |
| relevant_knowledge.append(self.knowledge[idx]) | |
| log.info(f"✅ 通过城市直接匹配找到 {len(self.city_index[query])} 条记录") | |
| # 2. 国家匹配 | |
| matching_country = self._find_country_for_city(query) | |
| if matching_country and matching_country in self.country_index: | |
| for idx in self.country_index[matching_country]: | |
| if self.knowledge[idx] not in relevant_knowledge: | |
| relevant_knowledge.append(self.knowledge[idx]) | |
| log.info(f"✅ 通过国家匹配({matching_country})找到额外记录") | |
| # 3. 地区匹配 | |
| matching_region = self._find_region_for_city(query) | |
| if matching_region and matching_region in self.region_index: | |
| for idx in self.region_index[matching_region]: | |
| if self.knowledge[idx] not in relevant_knowledge: | |
| relevant_knowledge.append(self.knowledge[idx]) | |
| log.info(f"✅ 通过地区匹配({matching_region})找到额外记录") | |
| # 4. 模糊匹配 | |
| if not relevant_knowledge: | |
| log.info("🔍 尝试模糊匹配...") | |
| for item in self.knowledge: | |
| knowledge = item.get('knowledge', {}).get('travel_knowledge', {}) | |
| dest_info = knowledge.get('destination_info', {}) | |
| # 检查所有目的地 | |
| primary_destinations = dest_info.get('primary_destinations', []) | |
| for dest in primary_destinations: | |
| if query_lower in dest.lower() or dest.lower() in query_lower: | |
| if item not in relevant_knowledge: | |
| relevant_knowledge.append(item) | |
| log.info(f"✅ 模糊匹配找到: {dest}") | |
| break | |
| log.info(f"📊 搜索完成,共找到 {len(relevant_knowledge)} 条相关记录") | |
| return relevant_knowledge | |
| def _find_country_for_city(self, city_name: str) -> str: | |
| """根据城市名查找所属国家""" | |
| city_country_mapping = { | |
| # 中欧 | |
| "布拉格": "捷克", "布尔诺": "捷克", "库特纳霍拉": "捷克", | |
| "维也纳": "奥地利", "萨尔茨堡": "奥地利", "哈尔施塔特": "奥地利", "巴德伊舍": "奥地利", | |
| "布达佩斯": "匈牙利", "德布勒森": "匈牙利", "圣安德烈": "匈牙利", | |
| "布拉迪斯拉发": "斯洛伐克", | |
| # 西欧 | |
| "巴黎": "法国", "里昂": "法国", "尼斯": "法国", "马赛": "法国", | |
| "柏林": "德国", "慕尼黑": "德国", "汉堡": "德国", "科隆": "德国", "法兰克福": "德国", | |
| "阿姆斯特丹": "荷兰", "鹿特丹": "荷兰", "海牙": "荷兰", | |
| "布鲁塞尔": "比利时", "安特卫普": "比利时", "布吕赫": "比利时", | |
| "卢森堡市": "卢森堡", | |
| "苏黎世": "瑞士", "日内瓦": "瑞士", "因特拉肯": "瑞士", | |
| # 南欧 | |
| "罗马": "意大利", "米兰": "意大利", "威尼斯": "意大利", "佛罗伦萨": "意大利", | |
| "马德里": "西班牙", "巴塞罗那": "西班牙", "塞维利亚": "西班牙", | |
| "里斯本": "葡萄牙", "波尔图": "葡萄牙", | |
| "雅典": "希腊", "圣托里尼": "希腊", "米科诺斯": "希腊", | |
| # 北欧 | |
| "斯德哥尔摩": "瑞典", "哥德堡": "瑞典", | |
| "奥斯陆": "挪威", "卑尔根": "挪威", | |
| "哥本哈根": "丹麦", "奥胡斯": "丹麦", | |
| "赫尔辛基": "芬兰", "坦佩雷": "芬兰", | |
| "雷克雅未克": "冰岛", | |
| # 英国 | |
| "伦敦": "英国", "爱丁堡": "英国", "曼彻斯特": "英国", | |
| } | |
| return city_country_mapping.get(city_name, "") | |
| def _find_region_for_city(self, city_name: str) -> str: | |
| """根据城市名查找所属地区""" | |
| city_region_mapping = { | |
| # 中欧 | |
| "布拉格": "中欧", "布尔诺": "中欧", "库特纳霍拉": "中欧", | |
| "维也纳": "中欧", "萨尔茨堡": "中欧", "哈尔施塔特": "中欧", "巴德伊舍": "中欧", | |
| "布达佩斯": "中欧", "德布勒森": "中欧", "圣安德烈": "中欧", | |
| "布拉迪斯拉发": "中欧", | |
| # 西欧 | |
| "巴黎": "西欧", "里昂": "西欧", "尼斯": "西欧", | |
| "柏林": "西欧", "慕尼黑": "西欧", "汉堡": "西欧", | |
| "阿姆斯特丹": "西欧", "鹿特丹": "西欧", | |
| "布鲁塞尔": "西欧", "安特卫普": "西欧", | |
| "苏黎世": "西欧", "日内瓦": "西欧", | |
| # 东欧(按你的知识库分类) | |
| "华沙": "东欧", "克拉科夫": "东欧", | |
| "莫斯科": "东欧", "圣彼得堡": "东欧", | |
| # 南欧 | |
| "罗马": "南欧", "米兰": "南欧", "威尼斯": "南欧", | |
| "马德里": "南欧", "巴塞罗那": "南欧", | |
| "里斯本": "南欧", "波尔图": "南欧", | |
| "雅典": "南欧", "圣托里尼": "南欧", | |
| # 北欧 | |
| "斯德哥尔摩": "北欧", "哥德堡": "北欧", | |
| "奥斯陆": "北欧", "卑尔根": "北欧", | |
| "哥本哈根": "北欧", "赫尔辛基": "北欧", | |
| "雷克雅未克": "北欧", | |
| } | |
| return city_region_mapping.get(city_name, "") | |
| def get_knowledge_by_destination(self, destination: str) -> dict: | |
| """根据目的地获取结构化的知识信息""" | |
| relevant_items = self.search(destination) | |
| if not relevant_items: | |
| log.warning(f"⚠️ 未找到关于 '{destination}' 的知识") | |
| return {} | |
| # 合并所有相关知识 | |
| merged_knowledge = { | |
| "destination_info": {}, | |
| "budget_analysis": {}, | |
| "detailed_itinerary": [], | |
| "professional_insights": {} | |
| } | |
| for item in relevant_items: | |
| knowledge = item.get('knowledge', {}).get('travel_knowledge', {}) | |
| # 合并目的地信息 | |
| if 'destination_info' in knowledge: | |
| dest_info = knowledge['destination_info'] | |
| merged_knowledge['destination_info'].update(dest_info) | |
| # 使用最详细的预算分析 | |
| if 'budget_analysis' in knowledge: | |
| if not merged_knowledge['budget_analysis'] or len(knowledge['budget_analysis']) > len(merged_knowledge['budget_analysis']): | |
| merged_knowledge['budget_analysis'] = knowledge['budget_analysis'] | |
| # 合并行程建议 | |
| if 'detailed_itinerary' in knowledge: | |
| merged_knowledge['detailed_itinerary'].extend(knowledge['detailed_itinerary']) | |
| # 合并专业洞察 | |
| if 'professional_insights' in knowledge: | |
| for key, value in knowledge['professional_insights'].items(): | |
| if key not in merged_knowledge['professional_insights']: | |
| merged_knowledge['professional_insights'][key] = value | |
| elif isinstance(value, list): | |
| # 合并列表,去重 | |
| existing = merged_knowledge['professional_insights'][key] | |
| if isinstance(existing, list): | |
| merged_knowledge['professional_insights'][key] = list(set(existing + value)) | |
| # 去重行程建议 | |
| if merged_knowledge['detailed_itinerary']: | |
| seen_days = set() | |
| unique_itinerary = [] | |
| for day_plan in merged_knowledge['detailed_itinerary']: | |
| day_key = (day_plan.get('day_number', 0), day_plan.get('location', '')) | |
| if day_key not in seen_days: | |
| seen_days.add(day_key) | |
| unique_itinerary.append(day_plan) | |
| merged_knowledge['detailed_itinerary'] = unique_itinerary | |
| log.info(f"📚 为 '{destination}' 合并了 {len(relevant_items)} 条知识记录") | |
| return merged_knowledge | |
| def get_similar_destinations(self, destination: str, limit: int = 5) -> list: | |
| """获取相似的目的地推荐""" | |
| similar_destinations = [] | |
| # 找到目标城市的国家和地区 | |
| target_country = self._find_country_for_city(destination) | |
| target_region = self._find_region_for_city(destination) | |
| # 优先推荐同国家的其他城市 | |
| if target_country and target_country in self.country_index: | |
| for idx in self.country_index[target_country]: | |
| knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {}) | |
| dest_info = knowledge.get('destination_info', {}) | |
| destinations = dest_info.get('primary_destinations', []) | |
| for dest in destinations: | |
| if dest != destination and dest not in similar_destinations: | |
| similar_destinations.append(dest) | |
| if len(similar_destinations) >= limit: | |
| return similar_destinations | |
| # 然后推荐同地区的城市 | |
| if target_region and target_region in self.region_index and len(similar_destinations) < limit: | |
| for idx in self.region_index[target_region]: | |
| knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {}) | |
| dest_info = knowledge.get('destination_info', {}) | |
| destinations = dest_info.get('primary_destinations', []) | |
| for dest in destinations: | |
| if dest != destination and dest not in similar_destinations: | |
| similar_destinations.append(dest) | |
| if len(similar_destinations) >= limit: | |
| return similar_destinations | |
| return similar_destinations | |
| def get_statistics(self) -> dict: | |
| """获取知识库统计信息""" | |
| stats = { | |
| "total_records": len(self.knowledge), | |
| "cities_covered": len(self.city_index), | |
| "countries_covered": len(self.country_index), | |
| "regions_covered": len(self.region_index), | |
| "cities_by_region": {}, | |
| "popular_cities": [] | |
| } | |
| # 按地区统计城市数量 | |
| for region, indices in self.region_index.items(): | |
| cities_in_region = set() | |
| for idx in indices: | |
| knowledge = self.knowledge[idx].get('knowledge', {}).get('travel_knowledge', {}) | |
| dest_info = knowledge.get('destination_info', {}) | |
| cities_in_region.update(dest_info.get('primary_destinations', [])) | |
| stats["cities_by_region"][region] = len(cities_in_region) | |
| # 找出出现频率最高的城市 | |
| city_frequency = {} | |
| for city, indices in self.city_index.items(): | |
| city_frequency[city] = len(indices) | |
| # 按出现频率排序 | |
| sorted_cities = sorted(city_frequency.items(), key=lambda x: x[1], reverse=True) | |
| stats["popular_cities"] = sorted_cities[:10] # 前10个最热门城市 | |
| return stats |