Update various config files (#11)

Browse files

- Update various config files (a8b968e5cf22527c145fba7fc7413aaa4f53c008)

Co-authored-by: Fred Reiss <frreiss@users.noreply.huggingface.co>

Files changed (8) hide show

hallucination_detection/lora/gpt-oss-20b/io.yaml +78 -0
hallucination_detection/lora/granite-3.3-2b-instruct/io.yaml +78 -0
hallucination_detection/lora/granite-3.3-8b-instruct/io.yaml +78 -0
run_vllm.sh +44 -0
uncertainty/alora/granite-3.3-2b-instruct/io.yaml +32 -0
uncertainty/alora/granite-3.3-8b-instruct/io.yaml +32 -0
uncertainty/lora/granite-3.3-2b-instruct/io.yaml +32 -0
uncertainty/lora/granite-3.3-8b-instruct/io.yaml +32 -0

hallucination_detection/lora/gpt-oss-20b/io.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "$defs": {
+      "HallucinationOutputEntry": {
+        "properties": {
+          "r": {
+            "minimum": 0,
+            "title": "Sentence Num",
+            "type": "integer"
+          },
+          "f": {
+            "title": "Is Faithful",
+            "type": "string",
+            "enum": ["faithful", "partial", "unfaithful"]
+          },
+          "e": {
+            "title": "Reasoning",
+            "type": "string"
+          }
+        },
+        "required": [
+          "r",
+          "e",
+          "f"
+        ],
+        "title": "HallucinationOutputEntry",
+        "type": "object"
+      }
+    },
+    "items": {
+      "$ref": "#/$defs/HallucinationOutputEntry"
+    },
+    "title": "HallucinationOutput",
+    "type": "array"
+  }
+transformations:
+  # Use logprobs to replace "f" flag with a probability
+  - type: likelihood
+    categories_to_values:
+      "faithful": 1.0
+      "partial": 0.5
+      "unfaithful": 0.0
+    input_path: [~, "f"]  # Null in path means wildcard
+  # Replace sentence number with sentence location and contents
+  - type: decode_sentences
+    source: "last_message"
+    input_path: [~, "r"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      begin: "response_begin"
+      end: "response_end"
+      text: "response_text"
+  # Remove fields that we no longer need and rename some of the fields.
+  - type: project
+    input_path: []
+    retained_fields:
+      "response_begin": "response_begin"
+      "response_end": "response_end"
+      "response_text": "response_text"
+      "f": "faithfulness_likelihood"
+      "e": "explanation"
+instruction: >
+  Split the last assistant response into individual sentences.
+  For each sentence in the last assistant response, identify the faithfulness
+  by comparing with the provided documents and generate the faithfulness reasoning
+  and faithfulness decision.
+  Ensure that your output includes all response sentence IDs,
+  and for each response sentence ID, provide the corresponding faithfulness
+  reasoning and faithfulness decision.
+  The output must be a json structure.
+parameters:
+  # Current LoRA can be quite verbose in its explanations.
+  max_completion_tokens: 4096
+sentence_boundaries:
+  last_message: "i"

hallucination_detection/lora/granite-3.3-2b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "$defs": {
+      "HallucinationOutputEntry": {
+        "properties": {
+          "r": {
+            "minimum": 0,
+            "title": "Sentence Num",
+            "type": "integer"
+          },
+          "f": {
+            "title": "Is Faithful",
+            "type": "string",
+            "enum": ["faithful", "partial", "unfaithful"]
+          },
+          "e": {
+            "title": "Reasoning",
+            "type": "string"
+          }
+        },
+        "required": [
+          "r",
+          "e",
+          "f"
+        ],
+        "title": "HallucinationOutputEntry",
+        "type": "object"
+      }
+    },
+    "items": {
+      "$ref": "#/$defs/HallucinationOutputEntry"
+    },
+    "title": "HallucinationOutput",
+    "type": "array"
+  }
+transformations:
+  # Use logprobs to replace "f" flag with a probability
+  - type: likelihood
+    categories_to_values:
+      "faithful": 1.0
+      "partial": 0.5
+      "unfaithful": 0.0
+    input_path: [~, "f"]  # Null in path means wildcard
+  # Replace sentence number with sentence location and contents
+  - type: decode_sentences
+    source: "last_message"
+    input_path: [~, "r"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      begin: "response_begin"
+      end: "response_end"
+      text: "response_text"
+  # Remove fields that we no longer need and rename some of the fields.
+  - type: project
+    input_path: []
+    retained_fields:
+      "response_begin": "response_begin"
+      "response_end": "response_end"
+      "response_text": "response_text"
+      "f": "faithfulness_likelihood"
+      "e": "explanation"
+instruction: >
+  Split the last assistant response into individual sentences.
+  For each sentence in the last assistant response, identify the faithfulness
+  by comparing with the provided documents and generate the faithfulness reasoning
+  and faithfulness decision.
+  Ensure that your output includes all response sentence IDs,
+  and for each response sentence ID, provide the corresponding faithfulness
+  reasoning and faithfulness decision.
+  The output must be a json structure.
+parameters:
+  # Current LoRA can be quite verbose in its explanations.
+  max_completion_tokens: 4096
+sentence_boundaries:
+  last_message: "i"

hallucination_detection/lora/granite-3.3-8b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,78 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "$defs": {
+      "HallucinationOutputEntry": {
+        "properties": {
+          "r": {
+            "minimum": 0,
+            "title": "Sentence Num",
+            "type": "integer"
+          },
+          "f": {
+            "title": "Is Faithful",
+            "type": "string",
+            "enum": ["faithful", "partial", "unfaithful"]
+          },
+          "e": {
+            "title": "Reasoning",
+            "type": "string"
+          }
+        },
+        "required": [
+          "r",
+          "e",
+          "f"
+        ],
+        "title": "HallucinationOutputEntry",
+        "type": "object"
+      }
+    },
+    "items": {
+      "$ref": "#/$defs/HallucinationOutputEntry"
+    },
+    "title": "HallucinationOutput",
+    "type": "array"
+  }
+transformations:
+  # Use logprobs to replace "f" flag with a probability
+  - type: likelihood
+    categories_to_values:
+      "faithful": 1.0
+      "partial": 0.5
+      "unfaithful": 0.0
+    input_path: [~, "f"]  # Null in path means wildcard
+  # Replace sentence number with sentence location and contents
+  - type: decode_sentences
+    source: "last_message"
+    input_path: [~, "r"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      begin: "response_begin"
+      end: "response_end"
+      text: "response_text"
+  # Remove fields that we no longer need and rename some of the fields.
+  - type: project
+    input_path: []
+    retained_fields:
+      "response_begin": "response_begin"
+      "response_end": "response_end"
+      "response_text": "response_text"
+      "f": "faithfulness_likelihood"
+      "e": "explanation"
+instruction: >
+  Split the last assistant response into individual sentences.
+  For each sentence in the last assistant response, identify the faithfulness
+  by comparing with the provided documents and generate the faithfulness reasoning
+  and faithfulness decision.
+  Ensure that your output includes all response sentence IDs,
+  and for each response sentence ID, provide the corresponding faithfulness
+  reasoning and faithfulness decision.
+  The output must be a json structure.
+parameters:
+  # Current LoRA can be quite verbose in its explanations.
+  max_completion_tokens: 4096
+sentence_boundaries:
+  last_message: "i"

run_vllm.sh ADDED Viewed

	@@ -0,0 +1,44 @@

+#! /bin/bash
+################################################################################
+# Shell script that starts a copy of vLLM with a base model plus all the
+# available LoRA adapters in this repository.
+#
+# To run this script:
+# 1. Install an appropriate build of vLLM for your machine
+# 2. Install the Hugging Face CLI (`hf`)
+# 3. Download the intrinsics library by running:
+#    hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib
+# 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed
+# 5. Run this script from the root of your local copy of intrinsics-lib.
+################################################################################
+BASE_MODEL_NAME=granite-3.3-8b-instruct
+BASE_MODEL_ORG=ibm-granite
+export VLLM_API_KEY=rag_intrinsics_1234
+# Find all LoRA adapters for the target base model.
+LORAS=""
+for item in "."/*; do
+    # Remove the "./"
+    name=$(basename -- "${item}")
+    if [ -d "./${name}/lora/${BASE_MODEL_NAME}" ]; then
+        LORAS+="${name}=./${name}/lora/${BASE_MODEL_NAME} "
+    fi
+done
+CMD="vllm serve ibm-granite/granite-3.3-8b-instruct \
+    --port 55555 \
+    --gpu-memory-utilization 0.45 \
+    --max-model-len 8192 \
+    --enable-lora \
+    --max_lora_rank 64 \
+    --lora-modules $LORAS"
+echo $CMD
+$CMD

uncertainty/alora/granite-3.3-2b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "type": "integer",
+    "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+  }
+# Output transformation rules to apply
+transformations:
+  - type: likelihood
+    categories_to_values:
+      # Each 1-digit output maps to 0.1 * <output> + 0.05
+      0: 0.05
+      1: 0.15
+      2: 0.25
+      3: 0.35
+      4: 0.45
+      5: 0.55
+      6: 0.65
+      7: 0.75
+      8: 0.85
+      9: 0.95
+    input_path: []
+  # Convert scalar value to a record for consistency with other intrinsics
+  - type: nest
+    input_path: []
+    field_name: "certainty"
+instruction: ~
+parameters:
+  max_completion_tokens: 2
+sentence_boundaries: ~

uncertainty/alora/granite-3.3-8b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "type": "integer",
+    "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+  }
+# Output transformation rules to apply
+transformations:
+  - type: likelihood
+    categories_to_values:
+      # Each 1-digit output maps to 0.1 * <output> + 0.05
+      0: 0.05
+      1: 0.15
+      2: 0.25
+      3: 0.35
+      4: 0.45
+      5: 0.55
+      6: 0.65
+      7: 0.75
+      8: 0.85
+      9: 0.95
+    input_path: []
+  # Convert scalar value to a record for consistency with other intrinsics
+  - type: nest
+    input_path: []
+    field_name: "certainty"
+instruction: ~
+parameters:
+  max_completion_tokens: 2
+sentence_boundaries: ~

uncertainty/lora/granite-3.3-2b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "type": "integer",
+    "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+  }
+# Output transformation rules to apply
+transformations:
+  - type: likelihood
+    categories_to_values:
+      # Each 1-digit output maps to 0.1 * <output> + 0.05
+      0: 0.05
+      1: 0.15
+      2: 0.25
+      3: 0.35
+      4: 0.45
+      5: 0.55
+      6: 0.65
+      7: 0.75
+      8: 0.85
+      9: 0.95
+    input_path: []
+  # Convert scalar value to a record for consistency with other intrinsics
+  - type: nest
+    input_path: []
+    field_name: "certainty"
+instruction: ~
+parameters:
+  max_completion_tokens: 2
+sentence_boundaries: ~

uncertainty/lora/granite-3.3-8b-instruct/io.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "type": "integer",
+    "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+  }
+# Output transformation rules to apply
+transformations:
+  - type: likelihood
+    categories_to_values:
+      # Each 1-digit output maps to 0.1 * <output> + 0.05
+      0: 0.05
+      1: 0.15
+      2: 0.25
+      3: 0.35
+      4: 0.45
+      5: 0.55
+      6: 0.65
+      7: 0.75
+      8: 0.85
+      9: 0.95
+    input_path: []
+  # Convert scalar value to a record for consistency with other intrinsics
+  - type: nest
+    input_path: []
+    field_name: "certainty"
+instruction: ~
+parameters:
+  max_completion_tokens: 2
+sentence_boundaries: ~