Spaces:

Skitzo-4152
/

vlsi

Runtime error

App Files Files Community

Skitzo-4152 commited on Sep 19

Commit

81d1046

verified ·

1 Parent(s): c3d23b3

Create preprocessor.py

Browse files

Files changed (1) hide show

data/preprocessor.py +40 -0

data/preprocessor.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env python3
+"""
+Data Preprocessor for ChipVerifyAI
+Preprocess datasets for ML training and inference
+"""
+import pandas as pd
+import numpy as np
+class DataPreprocessor:
+    """Preprocess data for ML models"""
+    def __init__(self):
+        self.feature_columns = [
+            'lines_of_code', 'module_count', 'signal_count', 'always_blocks',
+            'assign_statements', 'if_statements', 'case_statements', 'for_loops',
+            'function_count', 'task_count', 'clock_domains', 'reset_signals',
+            'interface_signals', 'memory_instances', 'fsm_count', 'pipeline_stages',
+            'arithmetic_units', 'complexity_score', 'has_memory', 'has_fsm',
+            'has_pipeline', 'has_floating_point', 'is_complex', 'is_large'
+        ]
+    def preprocess_for_ml(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Preprocess DataFrame for ML training"""
+        processed_df = df.copy()
+        # Fill missing
+        for col in self.feature_columns:
+            if col in processed_df.columns:
+                if processed_df[col].dtype == 'bool':
+                    processed_df[col] = processed_df[col].fillna(False)
+                else:
+                    processed_df[col] = processed_df[col].fillna(processed_df[col].median())
+        # Convert booleans to int
+        bool_cols = processed_df.select_dtypes(include=['bool']).columns
+        processed_df[bool_cols] = processed_df[bool_cols].astype(int)
+        # Optional: Remove outliers here if needed
+        return processed_df