Spaces:
Sleeping
Sleeping
Rename utils/indicator_classifier.py to utils/vulnerability_classifier.py
Browse files
utils/{indicator_classifier.py → vulnerability_classifier.py}
RENAMED
|
@@ -10,7 +10,7 @@ from transformers import pipeline
|
|
| 10 |
|
| 11 |
|
| 12 |
@st.cache_resource
|
| 13 |
-
def
|
| 14 |
"""
|
| 15 |
loads the document classifier using haystack, where the name/path of model
|
| 16 |
in HF-hub as string is used to fetch the model object.Either configfile or
|
|
@@ -30,9 +30,9 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
|
|
| 30 |
return
|
| 31 |
else:
|
| 32 |
config = getconfig(config_file)
|
| 33 |
-
classifier_name = config.get('
|
| 34 |
|
| 35 |
-
logging.info("Loading
|
| 36 |
# we are using the pipeline as the model is multilabel and DocumentClassifier
|
| 37 |
# from Haystack doesnt support multilabel
|
| 38 |
# in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
|
|
@@ -51,7 +51,7 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
|
|
| 51 |
|
| 52 |
|
| 53 |
@st.cache_data
|
| 54 |
-
def
|
| 55 |
threshold:float = 0.5,
|
| 56 |
classifier_model:pipeline= None
|
| 57 |
)->Tuple[DataFrame,Series]:
|
|
@@ -74,14 +74,14 @@ def indicator_classification(haystack_doc:pd.DataFrame,
|
|
| 74 |
x: Series object with the unique SDG covered in the document uploaded and
|
| 75 |
the number of times it is covered/discussed/count_of_paragraphs.
|
| 76 |
"""
|
| 77 |
-
logging.info("Working on
|
| 78 |
haystack_doc['Indicator Label'] = 'NA'
|
| 79 |
haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
|
| 80 |
|
| 81 |
df1 = haystack_doc[haystack_doc['PA_check'] == True]
|
| 82 |
df = haystack_doc[haystack_doc['PA_check'] == False]
|
| 83 |
if not classifier_model:
|
| 84 |
-
classifier_model = st.session_state['
|
| 85 |
|
| 86 |
predictions = classifier_model(list(df1.text))
|
| 87 |
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
@st.cache_resource
|
| 13 |
+
def load_vulnerabilityClassifier(config_file:str = None, classifier_name:str = None):
|
| 14 |
"""
|
| 15 |
loads the document classifier using haystack, where the name/path of model
|
| 16 |
in HF-hub as string is used to fetch the model object.Either configfile or
|
|
|
|
| 30 |
return
|
| 31 |
else:
|
| 32 |
config = getconfig(config_file)
|
| 33 |
+
classifier_name = config.get('vulnerability','MODEL')
|
| 34 |
|
| 35 |
+
logging.info("Loading vulnerability classifier")
|
| 36 |
# we are using the pipeline as the model is multilabel and DocumentClassifier
|
| 37 |
# from Haystack doesnt support multilabel
|
| 38 |
# in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
@st.cache_data
|
| 54 |
+
def vulnerability_classification(haystack_doc:pd.DataFrame,
|
| 55 |
threshold:float = 0.5,
|
| 56 |
classifier_model:pipeline= None
|
| 57 |
)->Tuple[DataFrame,Series]:
|
|
|
|
| 74 |
x: Series object with the unique SDG covered in the document uploaded and
|
| 75 |
the number of times it is covered/discussed/count_of_paragraphs.
|
| 76 |
"""
|
| 77 |
+
logging.info("Working on vulnerability Identification")
|
| 78 |
haystack_doc['Indicator Label'] = 'NA'
|
| 79 |
haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
|
| 80 |
|
| 81 |
df1 = haystack_doc[haystack_doc['PA_check'] == True]
|
| 82 |
df = haystack_doc[haystack_doc['PA_check'] == False]
|
| 83 |
if not classifier_model:
|
| 84 |
+
classifier_model = st.session_state['vulnerability_classifier']
|
| 85 |
|
| 86 |
predictions = classifier_model(list(df1.text))
|
| 87 |
|