Spaces:

KG0101
/

TeachingFiles

Sleeping

App Files Files Community

kgupta21 commited on Jul 20

Commit

41cd3de

1 Parent(s): 45177a3

local inference page

Browse files

Files changed (3) hide show

.gitignore +44 -0
app.py +193 -14
requirements.txt +4 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+ENV/
+env/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Logs
+*.log
+# Local development
+.env
+.env.local
+.env.*.local
+# Misc
+.DS_Store
+Thumbs.db

app.py CHANGED Viewed

@@ -6,6 +6,11 @@ from PIL import Image
 import io
 import base64
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -15,6 +20,40 @@ logger = logging.getLogger(__name__)
 APP_VERSION = "1.0.0"
 logger.info(f"Starting Radiology Teaching App v{APP_VERSION}")
 try:
     # Load only 10 rows from the dataset
     logger.info("Loading MIMIC-CXR dataset...")
@@ -68,6 +107,81 @@ def analyze_report(user_findings, ground_truth_findings, ground_truth_impression
         logger.error(f"Error in report analysis: {str(e)}")
         return f"Error analyzing report: {str(e)}"
 def load_random_case(hide_ground_truth):
     try:
         # Randomly select a case from our dataset
@@ -112,21 +226,86 @@ with gr.Blocks() as demo:
     actual_findings_state = gr.State("")
     actual_impression_state = gr.State("")
-    with gr.Row():
-        with gr.Column():
-            image_display = gr.Image(label="Chest X-ray Image", type="pil")
-            api_key_input = gr.Textbox(label="DeepSeek API Key", type="password")
-            hide_truth = gr.Checkbox(label="Hide Ground Truth", value=False)
-            load_btn = gr.Button("Load Random Case")
-        with gr.Column():
-            user_findings_input = gr.Textbox(label="Your Findings", lines=10, placeholder="Type or dictate your findings here...")
-            ground_truth_findings = gr.Textbox(label="Ground Truth Findings", lines=5, interactive=False)
-            ground_truth_impression = gr.Textbox(label="Ground Truth Impression", lines=5, interactive=False)
-            analysis_output = gr.Textbox(label="Analysis and Feedback", lines=10, interactive=False)
-            submit_btn = gr.Button("Submit Report")
-    # Event handlers
     load_btn.click(
         fn=load_random_case,
         inputs=[hide_truth],

 import io
 import base64
 import logging
+import torch
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+from typing import Iterator
+import os
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 APP_VERSION = "1.0.0"
 logger.info(f"Starting Radiology Teaching App v{APP_VERSION}")
+# Initialize models for local inference
+device = 0 if torch.cuda.is_available() else "cpu"
+logger.info(f"Using device: {device}")
+# Initialize Whisper
+MODEL_NAME = "openai/whisper-large-v3-turbo"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 5000
+try:
+    logger.info("Initializing Whisper model...")
+    pipe = pipeline(
+        task="automatic-speech-recognition",
+        model=MODEL_NAME,
+        chunk_length_s=30,
+        device=device,
+    )
+except Exception as e:
+    logger.error(f"Error initializing Whisper model: {str(e)}")
+    pipe = None
+# Initialize Llama
+try:
+    logger.info("Initializing Llama model...")
+    if torch.cuda.is_available():
+        llm_model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
+        llm = AutoModelForCausalLM.from_pretrained(llm_model_id, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
+        tokenizer.use_default_system_prompt = False
+except Exception as e:
+    logger.error(f"Error initializing Llama model: {str(e)}")
+    llm = None
+    tokenizer = None
 try:
     # Load only 10 rows from the dataset
     logger.info("Loading MIMIC-CXR dataset...")
         logger.error(f"Error in report analysis: {str(e)}")
         return f"Error analyzing report: {str(e)}"
+def transcribe(inputs, task="transcribe"):
+    """Transcribe audio using Whisper"""
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    if pipe is None:
+        raise gr.Error("Whisper model not initialized properly!")
+    try:
+        logger.info("Transcribing audio...")
+        text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+        return text
+    except Exception as e:
+        logger.error(f"Error in transcription: {str(e)}")
+        raise gr.Error(f"Transcription failed: {str(e)}")
+def analyze_with_llama(
+    transcribed_text: str,
+    ground_truth_findings: str,
+    ground_truth_impression: str,
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+) -> Iterator[str]:
+    """Analyze transcribed report against ground truth using Llama"""
+    if llm is None or tokenizer is None:
+        raise gr.Error("Llama model not initialized properly!")
+    try:
+        task_prompt = f"""You are an expert radiologist. Compare the following transcribed radiology report with the ground truth and provide detailed feedback.
+        Transcribed Report:
+        {transcribed_text}
+        Ground Truth Findings:
+        {ground_truth_findings}
+        Ground Truth Impression:
+        {ground_truth_impression}
+        Please analyze:
+        1. Accuracy of findings
+        2. Completeness of report
+        3. Structure and clarity
+        4. Areas for improvement
+        Provide your analysis in a clear, structured format."""
+        conversation = [
+            {"role": "system", "content": "You are an expert radiologist providing detailed feedback."},
+            {"role": "user", "content": task_prompt}
+        ]
+        input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
+        input_ids = input_ids.to(llm.device)
+        streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+        generate_kwargs = dict(
+            input_ids=input_ids,
+            streamer=streamer,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temperature,
+            num_beams=1,
+        )
+        t = Thread(target=llm.generate, kwargs=generate_kwargs)
+        t.start()
+        outputs = []
+        for text in streamer:
+            outputs.append(text)
+            yield "".join(outputs)
+    except Exception as e:
+        logger.error(f"Error in Llama analysis: {str(e)}")
+        raise gr.Error(f"Analysis failed: {str(e)}")
 def load_random_case(hide_ground_truth):
     try:
         # Randomly select a case from our dataset
     actual_findings_state = gr.State("")
     actual_impression_state = gr.State("")
+    with gr.Tab("DeepSeek Analysis"):
+        with gr.Row():
+            with gr.Column():
+                image_display = gr.Image(label="Chest X-ray Image", type="pil")
+                api_key_input = gr.Textbox(label="DeepSeek API Key", type="password")
+                hide_truth = gr.Checkbox(label="Hide Ground Truth", value=False)
+                load_btn = gr.Button("Load Random Case")
+            with gr.Column():
+                user_findings_input = gr.Textbox(label="Your Findings", lines=10, placeholder="Type or dictate your findings here...")
+                ground_truth_findings = gr.Textbox(label="Ground Truth Findings", lines=5, interactive=False)
+                ground_truth_impression = gr.Textbox(label="Ground Truth Impression", lines=5, interactive=False)
+                analysis_output = gr.Textbox(label="Analysis and Feedback", lines=10, interactive=False)
+                submit_btn = gr.Button("Submit Report")
+    with gr.Tab("Local Inference"):
+        gr.Markdown("### Use Local Models for Transcription and Analysis")
+        with gr.Row():
+            with gr.Column():
+                # Transcription Interface
+                audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or Upload Audio")
+                task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
+                transcribe_button = gr.Button("Transcribe Audio")
+                transcription_output = gr.Textbox(label="Transcription Output", lines=5)
+                # Load case for comparison
+                load_case_btn = gr.Button("Load Random Case for Comparison")
+                local_ground_truth_findings = gr.Textbox(label="Ground Truth Findings", lines=5, interactive=False)
+                local_ground_truth_impression = gr.Textbox(label="Ground Truth Impression", lines=5, interactive=False)
+            with gr.Column():
+                # Editable transcription and analysis interface
+                edited_transcription = gr.Textbox(label="Edit Transcription", lines=10)
+                temperature_input = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.6, step=0.1)
+                max_tokens_input = gr.Slider(label="Max Tokens", minimum=256, maximum=2048, value=1024, step=128)
+                analyze_btn = gr.Button("Analyze with Llama")
+                llama_analysis_output = gr.Textbox(label="Llama Analysis Output", lines=15, interactive=False)
+        # Event handlers for Local Inference tab
+        transcribe_button.click(
+            fn=transcribe,
+            inputs=[audio_input, task_input],
+            outputs=transcription_output
+        )
+        # Copy transcription to editable box
+        transcription_output.change(
+            fn=lambda x: x,
+            inputs=[transcription_output],
+            outputs=[edited_transcription]
+        )
+        # Load case for local analysis
+        load_case_btn.click(
+            fn=load_random_case,
+            inputs=[gr.Checkbox(value=False, visible=False)],  # Hidden checkbox for hide_ground_truth
+            outputs=[
+                gr.Image(visible=False),  # Hidden image output
+                local_ground_truth_findings,
+                local_ground_truth_impression,
+                gr.State(),  # Hidden state
+                gr.State()   # Hidden state
+            ]
+        )
+        # Analyze with Llama
+        analyze_btn.click(
+            fn=analyze_with_llama,
+            inputs=[
+                edited_transcription,
+                local_ground_truth_findings,
+                local_ground_truth_impression,
+                max_tokens_input,
+                temperature_input
+            ],
+            outputs=llama_analysis_output
+        )
+    # Event handlers for DeepSeek Analysis tab
     load_btn.click(
         fn=load_random_case,
         inputs=[hide_truth],

requirements.txt CHANGED Viewed

@@ -3,4 +3,7 @@ pandas>=2.0.0
 datasets>=2.15.0
 openai>=1.0.0
 Pillow>=10.0.0
-huggingface-hub>=0.20.0

 datasets>=2.15.0
 openai>=1.0.0
 Pillow>=10.0.0
+huggingface-hub>=0.20.0
+torch>=2.0.0
+transformers>=4.36.0
+spaces>=0.19.3