vulnerability_2_1

Sleeping

App Files Files Community

leavoigt commited on Sep 22, 2023

Commit

c8b94cd

1 Parent(s): a29c372

Update utils/sdg_classifier.py

Browse files

Files changed (1) hide show

utils/sdg_classifier.py +31 -31

utils/sdg_classifier.py CHANGED Viewed

@@ -14,27 +14,27 @@ except ImportError:
     logging.info("Streamlit not installed")
 ## Labels dictionary ###
-_lab_dict = {0: 'no_cat',
-            1:'SDG 1 - No poverty',
-            2:'SDG 2 - Zero hunger',
-            3:'SDG 3 - Good health and well-being',
-            4:'SDG 4 - Quality education',
-            5:'SDG 5 - Gender equality',
-            6:'SDG 6 - Clean water and sanitation',
-            7:'SDG 7 - Affordable and clean energy',
-            8:'SDG 8 - Decent work and economic growth',
-            9:'SDG 9 - Industry, Innovation and Infrastructure',
-            10:'SDG 10 - Reduced inequality',
-            11:'SDG 11 - Sustainable cities and communities',
-            12:'SDG 12 - Responsible consumption and production',
-            13:'SDG 13 - Climate action',
-            14:'SDG 14 - Life below water',
-            15:'SDG 15 - Life on land',
-            16:'SDG 16 - Peace, justice and strong institutions',
-            17:'SDG 17 - Partnership for the goals',}
 @st.cache(allow_output_mutation=True)
-def load_sdgClassifier(config_file:str = None, classifier_name:str = None):
     """
     loads the document classifier using haystack, where the name/path of model
     in HF-hub as string is used to fetch the model object.Either configfile or
@@ -57,7 +57,7 @@ def load_sdgClassifier(config_file:str = None, classifier_name:str = None):
             return
         else:
             config = getconfig(config_file)
-            classifier_name = config.get('sdg','MODEL')
     logging.info("Loading classifier")
     doc_classifier = TransformersDocumentClassifier(
@@ -68,7 +68,7 @@ def load_sdgClassifier(config_file:str = None, classifier_name:str = None):
 @st.cache(allow_output_mutation=True)
-def sdg_classification(haystack_doc:List[Document],
                         threshold:float = 0.8,
                         classifier_model:TransformersDocumentClassifier= None
                         )->Tuple[DataFrame,Series]:
@@ -95,10 +95,10 @@ def sdg_classification(haystack_doc:List[Document],
     the number of times it is covered/discussed/count_of_paragraphs.
     """
-    logging.info("Working on SDG Classification")
     if not classifier_model:
         if check_streamlit():
-            classifier_model = st.session_state['sdg_classifier']
         else:
             logging.warning("No streamlit envinornment found, Pass the classifier")
             return
@@ -109,23 +109,23 @@ def sdg_classification(haystack_doc:List[Document],
     labels_= [(l.meta['classification']['label'],
             l.meta['classification']['score'],l.content,) for l in results]
-    df = DataFrame(labels_, columns=["SDG","Relevancy","text"])
     df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
     df.index += 1
     df =df[df['Relevancy']>threshold]
     # creating the dataframe for value counts of SDG, along with 'title' of SDGs
-    x = df['SDG'].value_counts()
     x = x.rename('count')
-    x = x.rename_axis('SDG').reset_index()
-    x["SDG"] = pd.to_numeric(x["SDG"])
     x = x.sort_values(by=['count'], ascending=False)
-    x['SDG_name'] = x['SDG'].apply(lambda x: _lab_dict[x])
-    x['SDG_Num'] = x['SDG'].apply(lambda x: "SDG "+str(x))
-    df['SDG'] = pd.to_numeric(df['SDG'])
-    df = df.sort_values('SDG')
     return df, x

     logging.info("Streamlit not installed")
 ## Labels dictionary ###
+_lab_dict = {0: 'Agricultural communities',
+             1: 'Children',
+             2: 'Coastal communities',
+             3: 'Ethnic, racial or other minorities',
+             4: 'Fishery communities',
+             5: 'Informal sector workers',
+             6: 'Members of indigenous and local communities',
+             7: 'Migrants and displaced persons',
+             8: 'Older persons',
+             9: 'Other',
+             10: 'Persons living in poverty',
+             11: 'Persons with disabilities',
+             12: 'Persons with pre-existing health conditions',
+             13: 'Residents of drought-prone regions',
+             14: 'Rural populations',
+             15: 'Sexual minorities (LGBTQI+)',
+             16: 'Urban populations',
+             17: 'Women and other genders'}
 @st.cache(allow_output_mutation=True)
+def load_Classifier(config_file:str = None, classifier_name:str = None):
     """
     loads the document classifier using haystack, where the name/path of model
     in HF-hub as string is used to fetch the model object.Either configfile or
             return
         else:
             config = getconfig(config_file)
+            classifier_name = config.get('vulnerability','MODEL')
     logging.info("Loading classifier")
     doc_classifier = TransformersDocumentClassifier(
 @st.cache(allow_output_mutation=True)
+def classification(haystack_doc:List[Document],
                         threshold:float = 0.8,
                         classifier_model:TransformersDocumentClassifier= None
                         )->Tuple[DataFrame,Series]:
     the number of times it is covered/discussed/count_of_paragraphs.
     """
+    logging.info("Working on Vulnerability Classification")
     if not classifier_model:
         if check_streamlit():
+            classifier_model = st.session_state['vulnerability_classifier']
         else:
             logging.warning("No streamlit envinornment found, Pass the classifier")
             return
     labels_= [(l.meta['classification']['label'],
             l.meta['classification']['score'],l.content,) for l in results]
+    df = DataFrame(labels_, columns=["Vulnerability","Relevancy","text"])
     df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
     df.index += 1
     df =df[df['Relevancy']>threshold]
     # creating the dataframe for value counts of SDG, along with 'title' of SDGs
+    x = df['Vulnerability'].value_counts()
     x = x.rename('count')
+    x = x.rename_axis('Vulnerability').reset_index()
+    x["Vulnerability"] = pd.to_numeric(x["Vulnerability"])
     x = x.sort_values(by=['count'], ascending=False)
+    x['SDG_name'] = x['Vulnerability'].apply(lambda x: _lab_dict[x])
+    x['SDG_Num'] = x['Vulnerability'].apply(lambda x: "Vulnerability "+str(x))
+    df['Vulnerability'] = pd.to_numeric(df['Vulnerability'])
+    df = df.sort_values('Vulnerability')
     return df, x