Commit
·
03d1834
1
Parent(s):
9eee872
bruh
Browse files- web2json/pipeline.py +6 -6
web2json/pipeline.py
CHANGED
|
@@ -27,16 +27,16 @@ class Pipeline:
|
|
| 27 |
"""
|
| 28 |
# Step 1: Preprocess the content
|
| 29 |
preprocessed_content = self.preprocessor.preprocess(content, is_url)
|
| 30 |
-
|
| 31 |
-
|
| 32 |
# Step 2: Extract structured information using AI
|
| 33 |
extracted_data = self.ai_extractor.extract(preprocessed_content, schema)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
# Step 3: Post-process the extracted data
|
| 37 |
final_output = self.postprocessor.process(extracted_data)
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
return final_output
|
| 42 |
|
|
|
|
| 27 |
"""
|
| 28 |
# Step 1: Preprocess the content
|
| 29 |
preprocessed_content = self.preprocessor.preprocess(content, is_url)
|
| 30 |
+
print(f"Preprocessed content: {preprocessed_content[:100]}...")
|
| 31 |
+
print('+'*80)
|
| 32 |
# Step 2: Extract structured information using AI
|
| 33 |
extracted_data = self.ai_extractor.extract(preprocessed_content, schema)
|
| 34 |
+
print(f"Extracted data: {extracted_data[:100]}...")
|
| 35 |
+
print('+'*80)
|
| 36 |
# Step 3: Post-process the extracted data
|
| 37 |
final_output = self.postprocessor.process(extracted_data)
|
| 38 |
+
print(f"Final output: {final_output}")
|
| 39 |
+
print('+'*80)
|
| 40 |
|
| 41 |
return final_output
|
| 42 |
|