cguna frreiss commited on
Commit
e29d383
·
verified ·
1 Parent(s): e9a03b0

Update various config files (#11)

Browse files

- Update various config files (a8b968e5cf22527c145fba7fc7413aaa4f53c008)


Co-authored-by: Fred Reiss <frreiss@users.noreply.huggingface.co>

hallucination_detection/lora/gpt-oss-20b/io.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "$defs": {
7
+ "HallucinationOutputEntry": {
8
+ "properties": {
9
+ "r": {
10
+ "minimum": 0,
11
+ "title": "Sentence Num",
12
+ "type": "integer"
13
+ },
14
+ "f": {
15
+ "title": "Is Faithful",
16
+ "type": "string",
17
+ "enum": ["faithful", "partial", "unfaithful"]
18
+ },
19
+ "e": {
20
+ "title": "Reasoning",
21
+ "type": "string"
22
+ }
23
+ },
24
+ "required": [
25
+ "r",
26
+ "e",
27
+ "f"
28
+ ],
29
+ "title": "HallucinationOutputEntry",
30
+ "type": "object"
31
+ }
32
+ },
33
+ "items": {
34
+ "$ref": "#/$defs/HallucinationOutputEntry"
35
+ },
36
+ "title": "HallucinationOutput",
37
+ "type": "array"
38
+ }
39
+ transformations:
40
+ # Use logprobs to replace "f" flag with a probability
41
+ - type: likelihood
42
+ categories_to_values:
43
+ "faithful": 1.0
44
+ "partial": 0.5
45
+ "unfaithful": 0.0
46
+ input_path: [~, "f"] # Null in path means wildcard
47
+ # Replace sentence number with sentence location and contents
48
+ - type: decode_sentences
49
+ source: "last_message"
50
+ input_path: [~, "r"] # Null in path means wildcard
51
+ # New fields to add for each sentence
52
+ output_names:
53
+ begin: "response_begin"
54
+ end: "response_end"
55
+ text: "response_text"
56
+ # Remove fields that we no longer need and rename some of the fields.
57
+ - type: project
58
+ input_path: []
59
+ retained_fields:
60
+ "response_begin": "response_begin"
61
+ "response_end": "response_end"
62
+ "response_text": "response_text"
63
+ "f": "faithfulness_likelihood"
64
+ "e": "explanation"
65
+ instruction: >
66
+ Split the last assistant response into individual sentences.
67
+ For each sentence in the last assistant response, identify the faithfulness
68
+ by comparing with the provided documents and generate the faithfulness reasoning
69
+ and faithfulness decision.
70
+ Ensure that your output includes all response sentence IDs,
71
+ and for each response sentence ID, provide the corresponding faithfulness
72
+ reasoning and faithfulness decision.
73
+ The output must be a json structure.
74
+ parameters:
75
+ # Current LoRA can be quite verbose in its explanations.
76
+ max_completion_tokens: 4096
77
+ sentence_boundaries:
78
+ last_message: "i"
hallucination_detection/lora/granite-3.3-2b-instruct/io.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "$defs": {
7
+ "HallucinationOutputEntry": {
8
+ "properties": {
9
+ "r": {
10
+ "minimum": 0,
11
+ "title": "Sentence Num",
12
+ "type": "integer"
13
+ },
14
+ "f": {
15
+ "title": "Is Faithful",
16
+ "type": "string",
17
+ "enum": ["faithful", "partial", "unfaithful"]
18
+ },
19
+ "e": {
20
+ "title": "Reasoning",
21
+ "type": "string"
22
+ }
23
+ },
24
+ "required": [
25
+ "r",
26
+ "e",
27
+ "f"
28
+ ],
29
+ "title": "HallucinationOutputEntry",
30
+ "type": "object"
31
+ }
32
+ },
33
+ "items": {
34
+ "$ref": "#/$defs/HallucinationOutputEntry"
35
+ },
36
+ "title": "HallucinationOutput",
37
+ "type": "array"
38
+ }
39
+ transformations:
40
+ # Use logprobs to replace "f" flag with a probability
41
+ - type: likelihood
42
+ categories_to_values:
43
+ "faithful": 1.0
44
+ "partial": 0.5
45
+ "unfaithful": 0.0
46
+ input_path: [~, "f"] # Null in path means wildcard
47
+ # Replace sentence number with sentence location and contents
48
+ - type: decode_sentences
49
+ source: "last_message"
50
+ input_path: [~, "r"] # Null in path means wildcard
51
+ # New fields to add for each sentence
52
+ output_names:
53
+ begin: "response_begin"
54
+ end: "response_end"
55
+ text: "response_text"
56
+ # Remove fields that we no longer need and rename some of the fields.
57
+ - type: project
58
+ input_path: []
59
+ retained_fields:
60
+ "response_begin": "response_begin"
61
+ "response_end": "response_end"
62
+ "response_text": "response_text"
63
+ "f": "faithfulness_likelihood"
64
+ "e": "explanation"
65
+ instruction: >
66
+ Split the last assistant response into individual sentences.
67
+ For each sentence in the last assistant response, identify the faithfulness
68
+ by comparing with the provided documents and generate the faithfulness reasoning
69
+ and faithfulness decision.
70
+ Ensure that your output includes all response sentence IDs,
71
+ and for each response sentence ID, provide the corresponding faithfulness
72
+ reasoning and faithfulness decision.
73
+ The output must be a json structure.
74
+ parameters:
75
+ # Current LoRA can be quite verbose in its explanations.
76
+ max_completion_tokens: 4096
77
+ sentence_boundaries:
78
+ last_message: "i"
hallucination_detection/lora/granite-3.3-8b-instruct/io.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "$defs": {
7
+ "HallucinationOutputEntry": {
8
+ "properties": {
9
+ "r": {
10
+ "minimum": 0,
11
+ "title": "Sentence Num",
12
+ "type": "integer"
13
+ },
14
+ "f": {
15
+ "title": "Is Faithful",
16
+ "type": "string",
17
+ "enum": ["faithful", "partial", "unfaithful"]
18
+ },
19
+ "e": {
20
+ "title": "Reasoning",
21
+ "type": "string"
22
+ }
23
+ },
24
+ "required": [
25
+ "r",
26
+ "e",
27
+ "f"
28
+ ],
29
+ "title": "HallucinationOutputEntry",
30
+ "type": "object"
31
+ }
32
+ },
33
+ "items": {
34
+ "$ref": "#/$defs/HallucinationOutputEntry"
35
+ },
36
+ "title": "HallucinationOutput",
37
+ "type": "array"
38
+ }
39
+ transformations:
40
+ # Use logprobs to replace "f" flag with a probability
41
+ - type: likelihood
42
+ categories_to_values:
43
+ "faithful": 1.0
44
+ "partial": 0.5
45
+ "unfaithful": 0.0
46
+ input_path: [~, "f"] # Null in path means wildcard
47
+ # Replace sentence number with sentence location and contents
48
+ - type: decode_sentences
49
+ source: "last_message"
50
+ input_path: [~, "r"] # Null in path means wildcard
51
+ # New fields to add for each sentence
52
+ output_names:
53
+ begin: "response_begin"
54
+ end: "response_end"
55
+ text: "response_text"
56
+ # Remove fields that we no longer need and rename some of the fields.
57
+ - type: project
58
+ input_path: []
59
+ retained_fields:
60
+ "response_begin": "response_begin"
61
+ "response_end": "response_end"
62
+ "response_text": "response_text"
63
+ "f": "faithfulness_likelihood"
64
+ "e": "explanation"
65
+ instruction: >
66
+ Split the last assistant response into individual sentences.
67
+ For each sentence in the last assistant response, identify the faithfulness
68
+ by comparing with the provided documents and generate the faithfulness reasoning
69
+ and faithfulness decision.
70
+ Ensure that your output includes all response sentence IDs,
71
+ and for each response sentence ID, provide the corresponding faithfulness
72
+ reasoning and faithfulness decision.
73
+ The output must be a json structure.
74
+ parameters:
75
+ # Current LoRA can be quite verbose in its explanations.
76
+ max_completion_tokens: 4096
77
+ sentence_boundaries:
78
+ last_message: "i"
run_vllm.sh ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/bash
2
+
3
+ ################################################################################
4
+ # Shell script that starts a copy of vLLM with a base model plus all the
5
+ # available LoRA adapters in this repository.
6
+ #
7
+ # To run this script:
8
+ # 1. Install an appropriate build of vLLM for your machine
9
+ # 2. Install the Hugging Face CLI (`hf`)
10
+ # 3. Download the intrinsics library by running:
11
+ # hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib
12
+ # 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed
13
+ # 5. Run this script from the root of your local copy of intrinsics-lib.
14
+ ################################################################################
15
+
16
+ BASE_MODEL_NAME=granite-3.3-8b-instruct
17
+ BASE_MODEL_ORG=ibm-granite
18
+
19
+ export VLLM_API_KEY=rag_intrinsics_1234
20
+
21
+ # Find all LoRA adapters for the target base model.
22
+ LORAS=""
23
+ for item in "."/*; do
24
+ # Remove the "./"
25
+ name=$(basename -- "${item}")
26
+ if [ -d "./${name}/lora/${BASE_MODEL_NAME}" ]; then
27
+ LORAS+="${name}=./${name}/lora/${BASE_MODEL_NAME} "
28
+ fi
29
+ done
30
+
31
+
32
+ CMD="vllm serve ibm-granite/granite-3.3-8b-instruct \
33
+ --port 55555 \
34
+ --gpu-memory-utilization 0.45 \
35
+ --max-model-len 8192 \
36
+ --enable-lora \
37
+ --max_lora_rank 64 \
38
+ --lora-modules $LORAS"
39
+
40
+ echo $CMD
41
+ $CMD
42
+
43
+
44
+
uncertainty/alora/granite-3.3-2b-instruct/io.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "type": "integer",
7
+ "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
+ }
9
+ # Output transformation rules to apply
10
+ transformations:
11
+ - type: likelihood
12
+ categories_to_values:
13
+ # Each 1-digit output maps to 0.1 * <output> + 0.05
14
+ 0: 0.05
15
+ 1: 0.15
16
+ 2: 0.25
17
+ 3: 0.35
18
+ 4: 0.45
19
+ 5: 0.55
20
+ 6: 0.65
21
+ 7: 0.75
22
+ 8: 0.85
23
+ 9: 0.95
24
+ input_path: []
25
+ # Convert scalar value to a record for consistency with other intrinsics
26
+ - type: nest
27
+ input_path: []
28
+ field_name: "certainty"
29
+ instruction: ~
30
+ parameters:
31
+ max_completion_tokens: 2
32
+ sentence_boundaries: ~
uncertainty/alora/granite-3.3-8b-instruct/io.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "type": "integer",
7
+ "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
+ }
9
+ # Output transformation rules to apply
10
+ transformations:
11
+ - type: likelihood
12
+ categories_to_values:
13
+ # Each 1-digit output maps to 0.1 * <output> + 0.05
14
+ 0: 0.05
15
+ 1: 0.15
16
+ 2: 0.25
17
+ 3: 0.35
18
+ 4: 0.45
19
+ 5: 0.55
20
+ 6: 0.65
21
+ 7: 0.75
22
+ 8: 0.85
23
+ 9: 0.95
24
+ input_path: []
25
+ # Convert scalar value to a record for consistency with other intrinsics
26
+ - type: nest
27
+ input_path: []
28
+ field_name: "certainty"
29
+ instruction: ~
30
+ parameters:
31
+ max_completion_tokens: 2
32
+ sentence_boundaries: ~
uncertainty/lora/granite-3.3-2b-instruct/io.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "type": "integer",
7
+ "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
+ }
9
+ # Output transformation rules to apply
10
+ transformations:
11
+ - type: likelihood
12
+ categories_to_values:
13
+ # Each 1-digit output maps to 0.1 * <output> + 0.05
14
+ 0: 0.05
15
+ 1: 0.15
16
+ 2: 0.25
17
+ 3: 0.35
18
+ 4: 0.45
19
+ 5: 0.55
20
+ 6: 0.65
21
+ 7: 0.75
22
+ 8: 0.85
23
+ 9: 0.95
24
+ input_path: []
25
+ # Convert scalar value to a record for consistency with other intrinsics
26
+ - type: nest
27
+ input_path: []
28
+ field_name: "certainty"
29
+ instruction: ~
30
+ parameters:
31
+ max_completion_tokens: 2
32
+ sentence_boundaries: ~
uncertainty/lora/granite-3.3-8b-instruct/io.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name string, or null to use whatever is provided in the chat completion request
2
+ model: ~
3
+ # JSON schema of the model's output
4
+ response_format: |
5
+ {
6
+ "type": "integer",
7
+ "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
+ }
9
+ # Output transformation rules to apply
10
+ transformations:
11
+ - type: likelihood
12
+ categories_to_values:
13
+ # Each 1-digit output maps to 0.1 * <output> + 0.05
14
+ 0: 0.05
15
+ 1: 0.15
16
+ 2: 0.25
17
+ 3: 0.35
18
+ 4: 0.45
19
+ 5: 0.55
20
+ 6: 0.65
21
+ 7: 0.75
22
+ 8: 0.85
23
+ 9: 0.95
24
+ input_path: []
25
+ # Convert scalar value to a record for consistency with other intrinsics
26
+ - type: nest
27
+ input_path: []
28
+ field_name: "certainty"
29
+ instruction: ~
30
+ parameters:
31
+ max_completion_tokens: 2
32
+ sentence_boundaries: ~