Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	fixed gradio error on completed stream
Browse files
    	
        app.py
    CHANGED
    
    | @@ -2,6 +2,7 @@ import gradio as gr | |
| 2 | 
             
            from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
         | 
| 3 | 
             
            import torch, transformers
         | 
| 4 | 
             
            from threading import Thread
         | 
|  | |
| 5 |  | 
| 6 | 
             
            #Load the model
         | 
| 7 | 
             
            model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq' 
         | 
| @@ -53,9 +54,11 @@ def chat(message, history): | |
| 53 | 
             
                t, stream = chat_processor(chat=message)
         | 
| 54 | 
             
                response = ""
         | 
| 55 | 
             
                for character in stream:
         | 
| 56 | 
            -
                     | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
|  | |
|  | |
| 59 | 
             
                t.join()
         | 
| 60 | 
             
                torch.cuda.empty_cache()
         | 
| 61 |  | 
|  | |
| 2 | 
             
            from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
         | 
| 3 | 
             
            import torch, transformers
         | 
| 4 | 
             
            from threading import Thread
         | 
| 5 | 
            +
            import time
         | 
| 6 |  | 
| 7 | 
             
            #Load the model
         | 
| 8 | 
             
            model_id = 'mobiuslabsgmbh/Llama-2-7b-chat-hf_1bitgs8_hqq' 
         | 
|  | |
| 54 | 
             
                t, stream = chat_processor(chat=message)
         | 
| 55 | 
             
                response = ""
         | 
| 56 | 
             
                for character in stream:
         | 
| 57 | 
            +
                    if character is not None:
         | 
| 58 | 
            +
                        response += character
         | 
| 59 | 
            +
                        # print(character)
         | 
| 60 | 
            +
                        yield response
         | 
| 61 | 
            +
                time.sleep(0.1)
         | 
| 62 | 
             
                t.join()
         | 
| 63 | 
             
                torch.cuda.empty_cache()
         | 
| 64 |  |