Spaces:
Paused
Paused
| import pytest | |
| from tests.utils import wrap_test_forked | |
| def test_langchain_simple_h2ogpt(): | |
| run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot') | |
| def test_langchain_simple_vicuna(): | |
| run_langchain_simple(base_model='junelee/wizard-vicuna-13b', prompt_type='instruct_vicuna') | |
| def run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot'): | |
| """ | |
| :param base_model: | |
| :param prompt_type: prompt_type required for stopping support and correct handling of instruction prompting | |
| :return: | |
| """ | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from src.h2oai_pipeline import H2OTextGenerationPipeline | |
| model_name = base_model | |
| from transformers import AutoConfig | |
| config = AutoConfig.from_pretrained(base_model, use_auth_token=True, | |
| trust_remote_code=True, | |
| offload_folder="./") | |
| llama_type_from_config = 'llama' in str(config).lower() | |
| llama_type_from_name = "llama" in base_model.lower() | |
| llama_type = llama_type_from_config or llama_type_from_name | |
| if llama_type: | |
| from transformers import LlamaForCausalLM, LlamaTokenizer | |
| model_loader = LlamaForCausalLM | |
| tokenizer_loader = LlamaTokenizer | |
| else: | |
| model_loader = AutoModelForCausalLM | |
| tokenizer_loader = AutoTokenizer | |
| load_in_8bit = True | |
| n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0 | |
| device = 'cpu' if n_gpus == 0 else 'cuda' | |
| device_map = {"": 0} if device == 'cuda' else "auto" | |
| tokenizer = tokenizer_loader.from_pretrained(model_name, padding_side="left") | |
| model = model_loader.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map=device_map, | |
| load_in_8bit=load_in_8bit) | |
| gen_kwargs = dict(max_new_tokens=512, return_full_text=True, early_stopping=False) | |
| pipe = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer, prompt_type=prompt_type, | |
| base_model=base_model, **gen_kwargs) | |
| # below makes it listen only to our prompt removal, | |
| # not built in prompt removal that is less general and not specific for our model | |
| pipe.task = "text2text-generation" | |
| # create llm for LangChain | |
| from langchain.llms import HuggingFacePipeline | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| # Setup QA | |
| from langchain import PromptTemplate | |
| from langchain.chains.question_answering import load_qa_chain | |
| # NOTE: Instruct-tuned models don't need excessive many-shot examples that waste context space | |
| template = """ | |
| == | |
| {context} | |
| == | |
| {question}""" | |
| prompt = PromptTemplate( | |
| input_variables=["context", "question"], | |
| template=template, | |
| ) | |
| chain = load_qa_chain(llm, prompt=prompt) | |
| docs = [] # could have been some Documents from LangChain inputted from some sources | |
| query = "Give detailed list of reasons for who is smarter, Einstein or Newton." | |
| chain_kwargs = dict(input_documents=docs, question=query) | |
| answer = chain(chain_kwargs) | |
| print(answer) | |
| if 'vicuna' in base_model: | |
| res1 = 'Both Albert Einstein and Sir Isaac Newton were brilliant scientists' in answer[ | |
| 'output_text'] and "Newton" in answer['output_text'] | |
| res2 = 'Both Albert Einstein and Sir Isaac Newton are considered two' in answer[ | |
| 'output_text'] and "Newton" in answer['output_text'] | |
| else: | |
| res1 = 'Einstein was a genius who revolutionized physics' in answer['output_text'] and "Newton" in answer[ | |
| 'output_text'] | |
| res2 = 'Einstein and Newton are two of the most famous scientists in history' in answer[ | |
| 'output_text'] and "Newton" in answer['output_text'] | |
| res3 = 'Einstein is considered to be the smartest person' in answer[ | |
| 'output_text'] and "Newton" in answer['output_text'] | |
| assert res1 or res2 or res3 | |