Commit
·
0e70948
1
Parent(s):
2856ca3
updated the input
Browse files
app.py
CHANGED
|
@@ -141,6 +141,20 @@ def webpage_to_json_wrapper(content: str, is_url: bool, schema_input: str) -> Di
|
|
| 141 |
return {"error": f"Schema parsing error: {str(e)}"}
|
| 142 |
|
| 143 |
def webpage_to_json(content: str, is_url: bool, schema: BaseModel) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
prompt_template = """Extract the following information from the provided content according to the specified schema.
|
| 145 |
|
| 146 |
Content to analyze:
|
|
|
|
| 141 |
return {"error": f"Schema parsing error: {str(e)}"}
|
| 142 |
|
| 143 |
def webpage_to_json(content: str, is_url: bool, schema: BaseModel) -> Dict[str, Any]:
|
| 144 |
+
"""
|
| 145 |
+
Extracts structured JSON information from a given content based on a specified schema.
|
| 146 |
+
This function sets up a processing pipeline that includes:
|
| 147 |
+
- Preprocessing the input content.
|
| 148 |
+
- Utilizing an AI language model to extract information according to the provided schema.
|
| 149 |
+
- Postprocessing the extracted output to match the exact schema requirements.
|
| 150 |
+
Parameters:
|
| 151 |
+
content (str): The input content to be analyzed. This can be direct text or a URL content.
|
| 152 |
+
is_url (bool): A flag indicating whether the provided content is a URL (True) or raw text (False).
|
| 153 |
+
schema (BaseModel): A Pydantic BaseModel defining the expected structure and data types for the output.
|
| 154 |
+
Returns:
|
| 155 |
+
Dict[str, Any]: A dictionary containing the extracted data matching the schema. In case of errors during initialization
|
| 156 |
+
or processing, the dictionary will include an "error" key with a descriptive message.
|
| 157 |
+
"""
|
| 158 |
prompt_template = """Extract the following information from the provided content according to the specified schema.
|
| 159 |
|
| 160 |
Content to analyze:
|