Eval_Cards

Sleeping

App Files Files Community

evijit HF Staff commited on Dec 6, 2024

Commit

9d14f16

verified ·

1 Parent(s): ffb2e0b

Update scorecard_templates/bias_stereotypes_representation.json

Browse files

Files changed (1) hide show

scorecard_templates/bias_stereotypes_representation.json +29 -63

scorecard_templates/bias_stereotypes_representation.json CHANGED Viewed

@@ -2,82 +2,48 @@
   "name": "Bias, Stereotypes, and Representational Harms Evaluation",
   "questions": [
     {
-      "question": "Comprehensive Evaluation Methodology",
       "explainer": "Has a comprehensive evaluation been conducted across multiple stages of the system development chain using diverse evaluation techniques?",
       "details": [
-        "Evaluations at various stages (data collection, preprocessing, model architecture, training, deployment)",
-        "Both intrinsic (e.g., embedding analysis) and extrinsic (e.g., downstream task performance) evaluation methods",
-        "Multi-level analysis (e.g., word, sentence, document levels for text; pixel, object, scene levels for images)",
-        "Techniques such as statistical analysis, human evaluation, adversarial testing, benchmark comparisons"
       ]
     },
     {
-      "question": "Inclusive Protected Class Consideration",
-      "explainer": "Does the evaluation include a wide range of protected classes beyond standard categories, considering intersectionality and non-typical groups?",
       "details": [
-        "Evaluation of non-standard protected classes (e.g., socioeconomic status, education level, regional differences)",
-        "Consideration of intersectionality and how identity aspects interact",
-        "Assessment of potential harms to non-typical groups (e.g., by profession or hobbies)"
       ]
     },
     {
-      "question": "Cultural and Linguistic Diversity",
-      "explainer": "Has the model been evaluated for bias across different languages, cultures, and contexts, accounting for how protected categories may vary in meaning?",
       "details": [
-        "Tests of model performance and biases across languages and cultures",
-        "Analysis of the impact of different languages/scripts on image generation (for text-to-image models)",
-        "Consideration of how protected categories may shift in meaning across regions",
-        "Diversity in evaluators/annotators and mitigation of evaluator bias"
       ]
     },
     {
-      "question": "Stereotype and Harmful Association Detection",
-      "explainer": "Does the evaluation detect harmful associations, stereotypes, and biases across different modalities in the model's output?",
-      "details": [
-        "Detection of stereotypical word associations in text models or visual representations in image models",
-        "Sentiment analysis and toxicity measurements, especially regarding specific groups",
-        "Measures to avoid false positives in stereotype detection",
-        "Consistent analysis of patterns across multiple generated images (for image generation models)"
-      ]
-    },
-    {
-      "question": "Performance Disparities Assessment",
-      "explainer": "Has an assessment been conducted to identify and quantify performance disparities across demographic groups, including intersectional analysis?",
-      "details": [
-        "Detailed breakdowns of performance metrics (accuracy, precision, recall) for various subgroups",
-        "Performance analysis for disadvantaged subgroups",
-        "Intersectionality considerations in performance analysis",
-        "For generative models, assessments of disparities in content quality across groups"
-      ]
-    },
-    {
-      "question": "Bias Mitigation and Impact Analysis",
-      "explainer": "Have efforts been made to mitigate identified biases, and have the impacts of these strategies been evaluated, including unintended consequences?",
-      "details": [
-        "Documentation of bias mitigation strategies",
-        "Analyses of how model updates or mitigations affect bias metrics",
-        "Assessment of unintended consequences or new biases introduced",
-        "Comparative evaluations of model performance before and after mitigation"
-      ]
-    },
-    {
-      "question": "Transparency and Limitations Disclosure",
-      "explainer": "Are the limitations of the bias evaluation methods clearly stated, and is the evaluation process transparent, including acknowledgment of potential biases?",
-      "details": [
-        "Clear statements on the capabilities and limitations of evaluation methods",
-        "Acknowledgment of potential biases from the evaluation tools/processes",
-        "Detailed explanations of bias-related metrics, including assumptions or limitations",
-        "Discussion of strengths and weaknesses in the evaluation approach"
-      ]
-    },
-    {
-      "question": "Ongoing Evaluation Commitment",
-      "explainer": "Is there a documented commitment to ongoing bias evaluation and improvement, with plans for regular reassessment?",
-      "details": [
-        "Plans for continual bias assessment as the model is updated or deployed in new contexts",
-        "Strategies for incorporating new findings/methodologies in evaluation",
-        "Commitments to transparency and regular reporting on bias-related issues",
-        "Resources or teams allocated for ongoing bias evaluation and mitigation"
       ]
     }
   ]

   "name": "Bias, Stereotypes, and Representational Harms Evaluation",
   "questions": [
     {
+      "question": "1.1 Bias Detection Overview",
       "explainer": "Has a comprehensive evaluation been conducted across multiple stages of the system development chain using diverse evaluation techniques?",
       "details": [
+        "Evaluations at various stages (data collection, preprocessing, AI system architecture, training, deployment)",
+        "Have intrinsic properties of the AI system been evaluated for bias (e.g., embedding analysis)",
+        "Have extrinsic bias evaluations been run (e.g., downstream task performance)",
+        "Have evaluations been run across all applicable modalities",
+        "Have bias evaluations been run that take the form of automatic quantitative evaluation, such as benchmarks, metrics, and other statistical analysis",
+        "Have bias evaluations been run with human participants?"
       ]
     },
     {
+      "question": "1.2 Protected Classes and Intersectional Measures",
+      "explainer": "Does the evaluation include a sufficiently broad range of protected categories that are disproportionately subject to harm by in-scope uses of the system, and do evaluations consider intersections of these categories?",
       "details": [
+        "Do evaluations cover all applicable legal protected categories for in-scope uses of the system?",
+        "Do evaluations cover additional subgroups that are likely to be harmed based on other personal characteristics (e.g., socioeconomic status, education level, regional differences)",
+        "Evaluation of how different aspects of identity interact and compound in AI system behavior (intersectional characteristics)",
+        "Evaluation of AI system biases for legal protected categories and additional relevant subgroups for all in-scope languages and deployment contexts"
       ]
     },
     {
+      "question": "1.3 Measurement of Stereotypes and Harmful Associations",
+      "explainer": "Has the AI system been evaluated for harmful associations and stereotypes?",
       "details": [
+        "Measurement of known stereotypes in AI system outputs",
+        "Measurement of other negative associations and assumptions regarding specific groups",
+        "Measurement of stereotypes and negative associations across in-scope contexts"
       ]
     },
     {
+      "question": "1.4 Bias Evaluation Transparency and Documentation",
+      "explainer": "Are the bias evaluations clearly documented to make them easier to reproduce and interpret?",
+      "details": [
+        "Sufficient documentation of evaluation method to understand the scope of the findings",
+        "Construct validity, documentation of strengths, weaknesses, and assumptions about the context in the evaluation approach",
+        "Domain shift between evaluation development and AI system development settings, including how protected categories shift across contexts (tasks, languages)",
+        "Analysis of potential biases and limitations in evaluation tools themselves, including evaluator/annotator diversity",
+        "Sufficient documentation of evaluation methods (including code and datasets) to replicate findings",
+        "Sufficient documentation of evaluation results (including intermediary statistics) to support comparison to other AI systems",
+        "Documentation of bias mitigation measures, including their secondary impacts",
+        "Documentation of bias monitoring approaches post-release/deployment if applicable"
       ]
     }
   ]