vulnerability_2_1

Sleeping

App Files Files Community

leavoigt commited on Sep 25, 2023

Commit

9cbcff4

1 Parent(s): 48bf795

Update utils/sdg_classifier.py

Browse files

Files changed (1) hide show

utils/sdg_classifier.py +13 -13

utils/sdg_classifier.py CHANGED Viewed

@@ -95,7 +95,7 @@ def classification(haystack_doc:List[Document],
     the number of times it is covered/discussed/count_of_paragraphs.
     """
-    logging.info("Working on Vulnerability Classification")
     if not classifier_model:
         if check_streamlit():
             classifier_model = st.session_state['vulnerability_classifier']
@@ -109,27 +109,27 @@ def classification(haystack_doc:List[Document],
     labels_= [(l.meta['classification']['label'],
             l.meta['classification']['score'],l.content,) for l in results]
-    df = DataFrame(labels_, columns=["Vulnerability","Relevancy","text"])
     df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
     df.index += 1
     df =df[df['Relevancy']>threshold]
     # creating the dataframe for value counts of SDG, along with 'title' of SDGs
-    x = df['Vulnerability'].value_counts()
     x = x.rename('count')
-    x = x.rename_axis('Vulnerability').reset_index()
-    x["Vulnerability"] = pd.to_numeric(x["Vulnerability"])
     x = x.sort_values(by=['count'], ascending=False)
-    x['SDG_name'] = x['Vulnerability'].apply(lambda x: _lab_dict[x])
-    x['SDG_Num'] = x['Vulnerability'].apply(lambda x: "Vulnerability "+str(x))
-    df['Vulnerability'] = pd.to_numeric(df['Vulnerability'])
-    df = df.sort_values('Vulnerability')
     return df, x
-def runSDGPreprocessingPipeline(file_name:str, file_path:str,
             split_by: Literal["sentence", "word"] = 'sentence',
             split_length:int = 2, split_respect_sentence_boundary:bool = False,
             split_overlap:int = 0,remove_punc:bool = False)->List[Document]:
@@ -163,9 +163,9 @@ def runSDGPreprocessingPipeline(file_name:str, file_path:str,
     """
-    sdg_processing_pipeline = processingpipeline()
-    output_sdg_pre = sdg_processing_pipeline.run(file_paths = file_path,
                             params= {"FileConverter": {"file_path": file_path, \
                                         "file_name": file_name},
                                      "UdfPreProcessor": {"remove_punc": remove_punc, \
@@ -174,4 +174,4 @@ def runSDGPreprocessingPipeline(file_name:str, file_path:str,
                                             "split_overlap": split_overlap, \
         "split_respect_sentence_boundary":split_respect_sentence_boundary}})
-    return output_sdg_pre

     the number of times it is covered/discussed/count_of_paragraphs.
     """
+    logging.info("Working on vulnerability Classification")
     if not classifier_model:
         if check_streamlit():
             classifier_model = st.session_state['vulnerability_classifier']
     labels_= [(l.meta['classification']['label'],
             l.meta['classification']['score'],l.content,) for l in results]
+    df = DataFrame(labels_, columns=["vulnerability","Relevancy","text"])
     df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
     df.index += 1
     df =df[df['Relevancy']>threshold]
     # creating the dataframe for value counts of SDG, along with 'title' of SDGs
+    x = df['vulnerability'].value_counts()
     x = x.rename('count')
+    x = x.rename_axis('vulnerability').reset_index()
+    x["Vulnerability"] = pd.to_numeric(x["vulnerability"])
     x = x.sort_values(by=['count'], ascending=False)
+    x['vulnerability_name'] = x['vulnerability'].apply(lambda x: _lab_dict[x])
+    x['vulnerability_Num'] = x['vulnerability'].apply(lambda x: "vulnerability "+str(x))
+    df['vulnerability'] = pd.to_numeric(df['vulnerability'])
+    df = df.sort_values('vulnerability')
     return df, x
+def runPreprocessingPipeline(file_name:str, file_path:str,
             split_by: Literal["sentence", "word"] = 'sentence',
             split_length:int = 2, split_respect_sentence_boundary:bool = False,
             split_overlap:int = 0,remove_punc:bool = False)->List[Document]:
     """
+    processing_pipeline = processingpipeline()
+    output_pre = processing_pipeline.run(file_paths = file_path,
                             params= {"FileConverter": {"file_path": file_path, \
                                         "file_name": file_name},
                                      "UdfPreProcessor": {"remove_punc": remove_punc, \
                                             "split_overlap": split_overlap, \
         "split_respect_sentence_boundary":split_respect_sentence_boundary}})
+    return output_pre