Spaces:
Runtime error
Runtime error
| from dataclasses import dataclass | |
| from datetime import datetime | |
| from typing import List, Optional, Any, Dict | |
| # 修改后的数据类(添加 Optional 和默认值) | |
| class Author: | |
| _id: Optional[str] = None | |
| name: Optional[str] = None | |
| hidden: Optional[bool] = None | |
| class Paper: | |
| id: Optional[str] = None | |
| authors: List[Author] = None | |
| publishedAt: Optional[datetime] = None | |
| title: Optional[str] = None | |
| summary: Optional[str] = None | |
| upvotes: Optional[int] = None | |
| discussionId: Optional[str] = None | |
| class SubmittedBy: | |
| _id: Optional[str] = None | |
| avatarUrl: Optional[str] = None | |
| fullname: Optional[str] = None | |
| name: Optional[str] = None | |
| type: Optional[str] = None | |
| isPro: Optional[bool] = None | |
| isHf: Optional[bool] = None | |
| isMod: Optional[bool] = None | |
| followerCount: Optional[int] = None | |
| class Article: | |
| paper: Optional[Paper] = None | |
| publishedAt: Optional[datetime] = None | |
| title: Optional[str] = None | |
| thumbnail: Optional[str] = None | |
| numComments: Optional[int] = None | |
| submittedBy: Optional[SubmittedBy] = None | |
| isAuthorParticipating: Optional[bool] = None | |
| def safe_get(data: Dict, *keys: str) -> Any: | |
| """安全获取嵌套字典值""" | |
| for key in keys: | |
| data = data.get(key, {}) if isinstance(data, dict) else None | |
| return data if data != {} else None | |
| def parse_article(data: Dict[str, Any]) -> Article: | |
| """容错式解析函数""" | |
| def parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: | |
| """安全解析时间""" | |
| if not dt_str: | |
| return None | |
| try: | |
| if dt_str.endswith('Z'): | |
| dt_str = dt_str[:-1] + '+00:00' | |
| return datetime.fromisoformat(dt_str) | |
| except ValueError: | |
| return None | |
| # 解析作者列表 | |
| authors = [] | |
| for author_data in safe_get(data, "paper", "authors") or []: | |
| authors.append(Author( | |
| _id=author_data.get("_id"), | |
| name=author_data.get("name"), | |
| hidden=author_data.get("hidden") | |
| )) | |
| # 解析论文 | |
| paper = Paper( | |
| id=safe_get(data, "paper", "id"), | |
| authors=authors, | |
| publishedAt=parse_datetime(safe_get(data, "paper", "publishedAt")), | |
| title=safe_get(data, "paper", "title"), | |
| summary=safe_get(data, "paper", "summary"), | |
| upvotes=safe_get(data, "paper", "upvotes"), | |
| discussionId=safe_get(data, "paper", "discussionId") | |
| ) if safe_get(data, "paper") else None | |
| # 解析提交者 | |
| submitted_by_data = safe_get(data, "submittedBy") | |
| submitted_by = SubmittedBy( | |
| _id=submitted_by_data.get("_id") if submitted_by_data else None, | |
| avatarUrl=submitted_by_data.get("avatarUrl") if submitted_by_data else None, | |
| fullname=submitted_by_data.get("fullname") if submitted_by_data else None, | |
| name=submitted_by_data.get("name") if submitted_by_data else None, | |
| type=submitted_by_data.get("type") if submitted_by_data else None, | |
| isPro=submitted_by_data.get("isPro") if submitted_by_data else None, | |
| isHf=submitted_by_data.get("isHf") if submitted_by_data else None, | |
| isMod=submitted_by_data.get("isMod") if submitted_by_data else None, | |
| followerCount=submitted_by_data.get("followerCount") if submitted_by_data else None | |
| ) if submitted_by_data else None | |
| # 构建最终对象 | |
| return Article( | |
| paper=paper, | |
| publishedAt=parse_datetime(data.get("publishedAt")), | |
| title=data.get("title"), | |
| thumbnail=data.get("thumbnail"), | |
| numComments=data.get("numComments"), | |
| submittedBy=submitted_by, | |
| isAuthorParticipating=data.get("isAuthorParticipating") | |
| ) | |
| # 使用示例 | |
| if __name__ == "__main__": | |
| import json | |
| from rich import print | |
| # 假设您的原始数据保存在 article.json 文件中 | |
| with open("article.json") as f: | |
| raw_data = json.load(f) | |
| articles = [] | |
| for raw_article in raw_data: | |
| article = parse_article(raw_article) | |
| articles.append(article) | |
| print(articles[0]) | |
| print(len(articles)) | |