Commit 
							
							·
						
						4d1c962
	
1
								Parent(s):
							
							c0c68e7
								
feat: added duration for run
Browse files- .gitignore +1 -0
- src/distilabel_dataset_generator/sft.py +16 -3
    	
        .gitignore
    CHANGED
    
    | @@ -160,3 +160,4 @@ cython_debug/ | |
| 160 | 
             
            #  and can be added to the global gitignore or merged into this file.  For a more nuclear
         | 
| 161 | 
             
            #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
         | 
| 162 | 
             
            #.idea/
         | 
|  | 
|  | |
| 160 | 
             
            #  and can be added to the global gitignore or merged into this file.  For a more nuclear
         | 
| 161 | 
             
            #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
         | 
| 162 | 
             
            #.idea/
         | 
| 163 | 
            +
            .DS_Store
         | 
    	
        src/distilabel_dataset_generator/sft.py
    CHANGED
    
    | @@ -232,16 +232,29 @@ def generate_dataset( | |
| 232 | 
             
                    )
         | 
| 233 | 
             
                    num_rows = 5000
         | 
| 234 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 235 | 
             
                gr.Info(
         | 
| 236 | 
            -
                    "Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page."
         | 
|  | |
| 237 | 
             
                )
         | 
| 238 | 
             
                result_queue = multiprocessing.Queue()
         | 
| 239 | 
             
                p = multiprocessing.Process(
         | 
| 240 | 
             
                    target=_run_pipeline,
         | 
| 241 | 
             
                    args=(result_queue, num_turns, num_rows, system_prompt),
         | 
| 242 | 
             
                )
         | 
| 243 | 
            -
                 | 
| 244 | 
            -
             | 
|  | |
|  | |
|  | |
| 245 | 
             
                distiset = result_queue.get()
         | 
| 246 |  | 
| 247 | 
             
                if dataset_name is not None:
         | 
|  | |
| 232 | 
             
                    )
         | 
| 233 | 
             
                    num_rows = 5000
         | 
| 234 |  | 
| 235 | 
            +
                if num_rows < 50:
         | 
| 236 | 
            +
                    duration = 60
         | 
| 237 | 
            +
                elif num_rows < 250:
         | 
| 238 | 
            +
                    duration = 300
         | 
| 239 | 
            +
                elif num_rows < 1000:
         | 
| 240 | 
            +
                    duration = 500
         | 
| 241 | 
            +
                else:
         | 
| 242 | 
            +
                    duration = 1000
         | 
| 243 | 
            +
             | 
| 244 | 
             
                gr.Info(
         | 
| 245 | 
            +
                    "Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page.",
         | 
| 246 | 
            +
                    duration=duration,
         | 
| 247 | 
             
                )
         | 
| 248 | 
             
                result_queue = multiprocessing.Queue()
         | 
| 249 | 
             
                p = multiprocessing.Process(
         | 
| 250 | 
             
                    target=_run_pipeline,
         | 
| 251 | 
             
                    args=(result_queue, num_turns, num_rows, system_prompt),
         | 
| 252 | 
             
                )
         | 
| 253 | 
            +
                try:
         | 
| 254 | 
            +
                    p.start()
         | 
| 255 | 
            +
                    p.join()
         | 
| 256 | 
            +
                except Exception as e:
         | 
| 257 | 
            +
                    raise gr.Error(f"An error occurred during dataset generation: {str(e)}")
         | 
| 258 | 
             
                distiset = result_queue.get()
         | 
| 259 |  | 
| 260 | 
             
                if dataset_name is not None:
         | 
 
			

