aagamjtdev commited on
Commit
d988980
Β·
1 Parent(s): c4eb340
Files changed (1) hide show
  1. app.py +165 -4
app.py CHANGED
@@ -1,3 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import subprocess
3
  import os
@@ -25,10 +177,16 @@ def train_model(dataset_file: gr.File, batch_size: int, epochs: int, lr: float,
25
  os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
26
 
27
  # 2. File Handling: Use the temporary path of the uploaded file
28
- # if dataset_file is None or not dataset_file.path.endswith(".json"):
29
- # return "❌ ERROR: Please upload a valid Label Studio JSON file.", None
 
 
 
 
30
 
31
- input_path = dataset_file.path
 
 
32
 
33
  progress(0.1, desc="Starting LayoutLMv3 Training...")
34
 
@@ -47,6 +205,7 @@ def train_model(dataset_file: gr.File, batch_size: int, epochs: int, lr: float,
47
  ]
48
 
49
  log_output += f"Executing command: {' '.join(command)}\n\n"
 
50
 
51
  try:
52
  # 4. Run the training script and capture output
@@ -147,4 +306,6 @@ with gr.Blocks(title="LayoutLMv3 Fine-Tuning App") as demo:
147
  )
148
 
149
  if __name__ == "__main__":
150
- demo.launch(server_port=7860, server_name="0.0.0.0")
 
 
 
1
+ # import gradio as gr
2
+ # import subprocess
3
+ # import os
4
+ # import sys
5
+ # from datetime import datetime
6
+ #
7
+ # # The name of your existing training script
8
+ # TRAINING_SCRIPT = "LayoutLM_Train_Passage.py"
9
+ #
10
+ # # --- CORRECTED MODEL PATH BASED ON LayoutLM_Train_Passage.py ---
11
+ # MODEL_OUTPUT_DIR = "checkpoints"
12
+ # MODEL_FILE_NAME = "layoutlmv3_crf_passage.pth"
13
+ # MODEL_FILE_PATH = os.path.join(MODEL_OUTPUT_DIR, MODEL_FILE_NAME)
14
+ #
15
+ #
16
+ # # ----------------------------------------------------------------
17
+ #
18
+ #
19
+ # def train_model(dataset_file: gr.File, batch_size: int, epochs: int, lr: float, max_len: int, progress=gr.Progress()):
20
+ # """
21
+ # Handles the Gradio submission and executes the training script using subprocess.
22
+ # """
23
+ #
24
+ # # 1. Setup: Create output directory if it doesn't exist
25
+ # os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
26
+ #
27
+ # # 2. File Handling: Use the temporary path of the uploaded file
28
+ # # if dataset_file is None or not dataset_file.path.endswith(".json"):
29
+ # # return "❌ ERROR: Please upload a valid Label Studio JSON file.", None
30
+ #
31
+ # input_path = dataset_file.path
32
+ #
33
+ # progress(0.1, desc="Starting LayoutLMv3 Training...")
34
+ #
35
+ # log_output = f"--- Training Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"
36
+ #
37
+ # # 3. Construct the subprocess command
38
+ # command = [
39
+ # sys.executable,
40
+ # TRAINING_SCRIPT,
41
+ # "--mode", "train",
42
+ # "--input", input_path,
43
+ # "--batch_size", str(batch_size),
44
+ # "--epochs", str(epochs),
45
+ # "--lr", str(lr),
46
+ # "--max_len", str(max_len)
47
+ # ]
48
+ #
49
+ # log_output += f"Executing command: {' '.join(command)}\n\n"
50
+ #
51
+ # try:
52
+ # # 4. Run the training script and capture output
53
+ # process = subprocess.Popen(
54
+ # command,
55
+ # stdout=subprocess.PIPE,
56
+ # stderr=subprocess.STDOUT,
57
+ # text=True,
58
+ # bufsize=1
59
+ # )
60
+ #
61
+ # # Stream logs in real-time
62
+ # for line in iter(process.stdout.readline, ""):
63
+ # log_output += line
64
+ # yield log_output, None # Send partial log to Gradio output
65
+ #
66
+ # process.stdout.close()
67
+ # return_code = process.wait()
68
+ #
69
+ # # 5. Check for successful completion
70
+ # if return_code == 0:
71
+ # log_output += "\nβœ… TRAINING COMPLETE! Model saved."
72
+ #
73
+ # # 6. Prepare download links based on script's saved path
74
+ # model_exists = os.path.exists(MODEL_FILE_PATH)
75
+ #
76
+ # if model_exists:
77
+ # log_output += f"\nModel path: {MODEL_FILE_PATH}"
78
+ # # Return final log, and the file path for Gradio's download component
79
+ # return log_output, MODEL_FILE_PATH
80
+ # else:
81
+ # log_output += f"\n⚠️ WARNING: Training completed, but model file not found at expected path ({MODEL_FILE_PATH})."
82
+ # return log_output, None
83
+ # else:
84
+ # log_output += f"\n\n❌ TRAINING FAILED with return code {return_code}. Check logs above."
85
+ # return log_output, None
86
+ #
87
+ # except FileNotFoundError:
88
+ # return f"❌ ERROR: The training script '{TRAINING_SCRIPT}' was not found. Ensure it is in the root directory of your Space.", None
89
+ # except Exception as e:
90
+ # return f"❌ An unexpected error occurred: {e}", None
91
+ #
92
+ #
93
+ # # --- Gradio Interface Setup (using Blocks for a nicer layout) ---
94
+ # with gr.Blocks(title="LayoutLMv3 Fine-Tuning App") as demo:
95
+ # gr.Markdown("# πŸš€ LayoutLMv3 Fine-Tuning on Hugging Face Spaces")
96
+ # gr.Markdown(
97
+ # """
98
+ # Upload your Label Studio JSON file, set your hyperparameters, and click **Train Model** to fine-tune the LayoutLMv3 model using your script.
99
+ #
100
+ # **Note:** The trained model is saved in the **`checkpoints/`** folder as **`layoutlmv3_crf_passage.pth`**.
101
+ # """
102
+ # )
103
+ #
104
+ # with gr.Row():
105
+ # with gr.Column(scale=1):
106
+ # file_input = gr.File(
107
+ # label="1. Upload Label Studio JSON Dataset"
108
+ # )
109
+ #
110
+ # gr.Markdown("---")
111
+ # gr.Markdown("### βš™οΈ Training Parameters")
112
+ #
113
+ # batch_size_input = gr.Slider(
114
+ # minimum=1, maximum=32, step=1, value=4, label="Batch Size (--batch_size)"
115
+ # )
116
+ # epochs_input = gr.Slider(
117
+ # minimum=1, maximum=20, step=1, value=5, label="Epochs (--epochs)"
118
+ # )
119
+ # lr_input = gr.Number(
120
+ # value=5e-5, label="Learning Rate (--lr)"
121
+ # )
122
+ # max_len_input = gr.Number(
123
+ # value=512, label="Max Sequence Length (--max_len)"
124
+ # )
125
+ #
126
+ # with gr.Column(scale=2):
127
+ # train_button = gr.Button("πŸ”₯ Train Model", variant="primary")
128
+ #
129
+ # log_output = gr.Textbox(
130
+ # label="Training Log Output",
131
+ # lines=20,
132
+ # autoscroll=True,
133
+ # placeholder="Click 'Train Model' to start and see real-time logs..."
134
+ # )
135
+ #
136
+ # gr.Markdown("---")
137
+ # gr.Markdown(f"### πŸŽ‰ Trained Model Output (Saved to `{MODEL_OUTPUT_DIR}/`)")
138
+ #
139
+ # # Only providing the download link for the saved .pth model file
140
+ # model_download = gr.File(label=f"Trained Model File ({MODEL_FILE_NAME})", interactive=False)
141
+ #
142
+ # # Define the action when the button is clicked
143
+ # train_button.click(
144
+ # fn=train_model,
145
+ # inputs=[file_input, batch_size_input, epochs_input, lr_input, max_len_input],
146
+ # outputs=[log_output, model_download]
147
+ # )
148
+ #
149
+ # if __name__ == "__main__":
150
+ # demo.launch(server_port=7860, server_name="0.0.0.0")
151
+
152
+
153
  import gradio as gr
154
  import subprocess
155
  import os
 
177
  os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
178
 
179
  # 2. File Handling: Use the temporary path of the uploaded file
180
+ if dataset_file is None:
181
+ yield "❌ ERROR: Please upload a file.", None
182
+ return
183
+
184
+ # FIX: Gradio returns the path in the .name attribute, not .path
185
+ input_path = dataset_file.name
186
 
187
+ if not input_path.lower().endswith(".json"):
188
+ yield "❌ ERROR: Please upload a valid Label Studio JSON file (.json).", None
189
+ return
190
 
191
  progress(0.1, desc="Starting LayoutLMv3 Training...")
192
 
 
205
  ]
206
 
207
  log_output += f"Executing command: {' '.join(command)}\n\n"
208
+ yield log_output, None # Yield the command to the log output
209
 
210
  try:
211
  # 4. Run the training script and capture output
 
306
  )
307
 
308
  if __name__ == "__main__":
309
+ # Removed server_port and server_name as they are often unnecessary
310
+ # and sometimes cause issues in managed Space environments.
311
+ demo.launch()