Spaces:

ayyuce
/

MAIRA-2

Running

App Files Files Community

ayyuce commited on Mar 11

Commit

e2d3fe3

verified ·

1 Parent(s): 3424243

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -8

app.py CHANGED Viewed

@@ -17,13 +17,13 @@ def load_model_and_processor(hf_token: str):
         return _model_cache[hf_token]
     device = torch.device("cpu")
     model = AutoModelForCausalLM.from_pretrained(
-        "microsoft/maira-2",
-        trust_remote_code=True,
         use_auth_token=hf_token
     )
     processor = AutoProcessor.from_pretrained(
-        "microsoft/maira-2",
-        trust_remote_code=True,
         use_auth_token=hf_token
     )
     model.eval()
@@ -33,7 +33,7 @@ def load_model_and_processor(hf_token: str):
 def get_sample_data() -> dict:
     """
-    Download sample chest X-ray images and associated data.
     """
     frontal_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-1001.png"
     lateral_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-2001.png"
@@ -86,7 +86,14 @@ def generate_report(hf_token, frontal, lateral, indication, technique, compariso
         return_tensors="pt",
         get_grounding=use_grounding,
     )
     processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
     max_tokens = 450 if use_grounding else 300
     with torch.no_grad():
         output_decoding = model.generate(
@@ -121,6 +128,12 @@ def run_phrase_grounding(hf_token, frontal, phrase):
         return_tensors="pt",
     )
     processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
     with torch.no_grad():
         output_decoding = model.generate(
             **processed_inputs,
@@ -132,6 +145,7 @@ def run_phrase_grounding(hf_token, frontal, phrase):
     prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)
     return prediction
 def login_ui(hf_token):
     """Authenticate the user by loading the model."""
     try:
@@ -177,14 +191,14 @@ def load_sample_findings():
     sample = get_sample_data()
     return [
         save_temp_image(sample["frontal"]),  # frontal image file path
-        save_temp_image(sample["lateral"]),  # lateral image file path
         sample["indication"],
         sample["technique"],
         sample["comparison"],
         None,  # prior frontal (not used)
         None,  # prior lateral (not used)
         None,  # prior report (not used)
-        False
     ]
 def load_sample_phrase():
@@ -276,4 +290,4 @@ with gr.Blocks(title="MAIRA-2 Medical Assistant") as demo:
                 outputs=pg_output
             )
-demo.launch()

         return _model_cache[hf_token]
     device = torch.device("cpu")
     model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/maira-2",
+        trust_remote_code=True,
         use_auth_token=hf_token
     )
     processor = AutoProcessor.from_pretrained(
+        "microsoft/maira-2",
+        trust_remote_code=True,
         use_auth_token=hf_token
     )
     model.eval()
 def get_sample_data() -> dict:
     """
+    Downloads sample chest X-ray images and associated data.
     """
     frontal_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-1001.png"
     lateral_image_url = "https://openi.nlm.nih.gov/imgs/512/145/145/CXR145_IM-0290-2001.png"
         return_tensors="pt",
         get_grounding=use_grounding,
     )
+    # Move all tensors to the CPU
     processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
+    # Remove keys containing "image_sizes" to prevent unexpected keyword errors.
+    processed_inputs = dict(processed_inputs)
+    keys_to_remove = [k for k in processed_inputs if "image_sizes" in k]
+    for key in keys_to_remove:
+        processed_inputs.pop(key, None)
     max_tokens = 450 if use_grounding else 300
     with torch.no_grad():
         output_decoding = model.generate(
         return_tensors="pt",
     )
     processed_inputs = {k: v.to(device) for k, v in processed_inputs.items()}
+    # Remove keys containing "image_sizes" to prevent unexpected keyword errors.
+    processed_inputs = dict(processed_inputs)
+    keys_to_remove = [k for k in processed_inputs if "image_sizes" in k]
+    for key in keys_to_remove:
+        processed_inputs.pop(key, None)
     with torch.no_grad():
         output_decoding = model.generate(
             **processed_inputs,
     prediction = processor.convert_output_to_plaintext_or_grounded_sequence(decoded_text)
     return prediction
 def login_ui(hf_token):
     """Authenticate the user by loading the model."""
     try:
     sample = get_sample_data()
     return [
         save_temp_image(sample["frontal"]),  # frontal image file path
+        save_temp_image(sample["lateral"]),    # lateral image file path
         sample["indication"],
         sample["technique"],
         sample["comparison"],
         None,  # prior frontal (not used)
         None,  # prior lateral (not used)
         None,  # prior report (not used)
+        False  # grounding checkbox default
     ]
 def load_sample_phrase():
                 outputs=pg_output
             )
+demo.launch()