Spaces:
Running
on
Zero
Running
on
Zero
| import re | |
| import json | |
| def debug_text(text, label="Text"): | |
| """Helper function to debug text processing issues""" | |
| print(f"\n--- DEBUG {label} ---") | |
| print(f"Length: {len(text)}") | |
| print(f"First 100 chars: {text[:100]}") | |
| print(f"Contains highlight_start: {'[[highlight_start]]' in text}") | |
| print(f"Contains start_highlight: {'[[start_highlight]]' in text}") | |
| print("-------------------------\n") | |
| def clean_json_text(text): | |
| """ | |
| Handle text that came from JSON and might have JSON escaping. | |
| This handles the case of text like: "the sky isn\\'t falling" | |
| """ | |
| # First attempt to clean JSON-style escapes | |
| try: | |
| # Try to treat the string as if it were a JSON string | |
| if '\\' in text: | |
| # Create a valid JSON string with the text as content | |
| json_str = json.dumps({"text": text}) | |
| # Parse it back to get properly unescaped text | |
| parsed = json.loads(json_str) | |
| return parsed["text"] | |
| except Exception: | |
| # If that fails, continue with the original text | |
| pass | |
| return text | |
| def process_highlights(text): | |
| """ | |
| Process highlight markers in text to create HTML highlighted text. | |
| Handles both standard format and alternative format. | |
| Also properly handles escaped quotes. | |
| """ | |
| # Debug info | |
| # debug_text(text, "Before processing") | |
| # Clean JSON escaping | |
| text = clean_json_text(text) | |
| # Process highlight tags | |
| pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]' | |
| replacement = r'<span class="highlight">\1</span>' | |
| highlighted_text = re.sub(pattern1, replacement, text) | |
| pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]' | |
| highlighted_text = re.sub(pattern2, replacement, highlighted_text) | |
| # Debug info | |
| # debug_text(highlighted_text, "After processing") | |
| return highlighted_text | |
| def process_table_with_highlights(markdown_table): | |
| """ | |
| Special function to process markdown tables with highlights. | |
| Ensures the table structure is preserved while applying highlights. | |
| """ | |
| # First, split the table into lines | |
| lines = markdown_table.strip().split('\n') | |
| processed_lines = [] | |
| for line in lines: | |
| # Process highlights in each line | |
| processed_line = process_highlights(line) | |
| processed_lines.append(processed_line) | |
| return convert_markdown_table_to_html('\n'.join(processed_lines)) | |
| def convert_markdown_table_to_html(markdown_text): | |
| """ | |
| Converts a markdown table to an HTML table. | |
| """ | |
| # Clean JSON escaping | |
| markdown_text = clean_json_text(markdown_text) | |
| lines = markdown_text.strip().split('\n') | |
| table_lines = [line for line in lines if line.strip().startswith('|')] | |
| if len(table_lines) < 2: # Need at least header and separator | |
| return markdown_text # Return original if not a proper table | |
| html = '<table class="md-table">' | |
| # Check if we have a header row | |
| if len(table_lines) >= 2 and '---' in table_lines[1]: | |
| # Process header | |
| header_cells = table_lines[0].split('|')[1:-1] if table_lines[0].strip().endswith('|') else table_lines[0].split('|')[1:] | |
| html += '<thead><tr>' | |
| for cell in header_cells: | |
| # Process highlights in the cell | |
| processed_cell = process_highlights(cell.strip()) | |
| html += f'<th>{processed_cell}</th>' | |
| html += '</tr></thead>' | |
| # Process data rows (skip the separator row at index 1) | |
| html += '<tbody>' | |
| for line in table_lines[2:]: | |
| if not line.strip(): | |
| continue | |
| cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] | |
| html += '<tr>' | |
| for cell in cells: | |
| # Process highlights in the cell | |
| processed_cell = process_highlights(cell.strip()) | |
| html += f'<td>{processed_cell}</td>' | |
| html += '</tr>' | |
| html += '</tbody>' | |
| else: | |
| # No header row, treat all rows as data | |
| html += '<tbody>' | |
| for line in table_lines: | |
| if not line.strip(): | |
| continue | |
| cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] | |
| html += '<tr>' | |
| for cell in cells: | |
| # Process highlights in the cell | |
| processed_cell = process_highlights(cell.strip()) | |
| html += f'<td>{processed_cell}</td>' | |
| html += '</tr>' | |
| html += '</tbody>' | |
| html += '</table>' | |
| return html | |
| def get_context_html(example, show_full=False): | |
| """ | |
| Formats the context chunks into an HTML string for display using specific CSS classes. | |
| Includes an alert for insufficient context and applies highlighting. | |
| Parameters: | |
| - example: The example data containing contexts | |
| - show_full: Boolean indicating whether to show full context | |
| """ | |
| html = "" | |
| # Add insufficient context warning if needed | |
| if example.get("insufficient", False): | |
| insufficient_reason = example.get("insufficient_reason", "") | |
| reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>" | |
| html += f""" | |
| <div class="insufficient-alert"> | |
| <strong> | |
| <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;"> | |
| <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path> | |
| <line x1="12" y1="9" x2="12" y2="13"></line> | |
| <line x1="12" y1="17" x2="12.01" y2="17"></line> | |
| </svg> | |
| Insufficient Context | |
| </strong> | |
| {reason_html} | |
| </div> | |
| """ | |
| # Create container div for all context items | |
| html += '<div class="context-items-container">' | |
| # Determine which context to display based on show_full flag | |
| if show_full and "full_contexts" in example and example["full_contexts"]: | |
| # If showing full context, create individual items for each chunk without headers | |
| for context_item in example["full_contexts"]: | |
| context_text = context_item.get('content', '') | |
| # Check for markdown table format (both standard and newline format) | |
| if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): | |
| # Process as a table | |
| html += f'<div class="context-item">{process_table_with_highlights(context_text)}</div>' | |
| else: | |
| # Regular text content - process highlights | |
| processed_text = process_highlights(context_text) | |
| html += f'<div class="context-item">{processed_text}</div>' | |
| else: | |
| # Show the highlighted context items | |
| if "contexts" in example and example["contexts"]: | |
| for context_item in example["contexts"]: | |
| chunk_num = context_item.get('chunk_num', '') | |
| context_text = context_item.get('content', '') | |
| is_primary = context_item.get('is_primary', False) | |
| # Add appropriate class for primary chunks | |
| extra_class = " primary-context" if is_primary else "" | |
| # Check for markdown table format | |
| if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): | |
| # Process as a table | |
| html += f'<div class="context-item{extra_class}">{process_table_with_highlights(context_text)}</div>' | |
| else: | |
| # Regular text with potential highlights | |
| processed_text = process_highlights(context_text) | |
| html += f'<div class="context-item{extra_class}">{processed_text}</div>' | |
| else: | |
| # If no contexts available, show a message | |
| html += '<div class="context-item">No context available. Try toggling to full context view.</div>' | |
| # Close the container div | |
| html += '</div>' | |
| return html |