import sql_utils as su import gpt_utils as gu import time def database_structure_strings(): NanoEntDes_structure = "{table:NanoEntDes,attributes:" + \ "[ID(Primary),NanoEntity,Description]}" NanoEntCom_structure = "{table:NanoEntCom,attributes:" + \ "[ID(Primary),NanoEntity,Composition,CompositionType," + \ "MolecularWeight,PubChemID]}" FuncEntDes_structure = "{table: FuncEntDes, attributes:" + \ "[ID(Primary),FunctionEntity,FunctionEntityType," + \ "Description,ActivationMethod,pubChemID," + \ "MolarMass,MolarMassUnit]}" FuncEntFunction_structure = "{table: FuncEntFunction," + \ "attributes:[ID(Primary),FunctionEntiry,Function," + \ "FunctionDescription]}" ChemAsso_structure = "{table: ChemAsso, attributes:" + \ "[ID(Primary),AssociationType,BondType,Description," + \ "dataId,ComposingElementNameA,ComposingElementNameB," + \ "CompositiontypeB,CompositiontypeA,DomainElementNameB," + \ "DomainElementNameA,DomainAssociationId,ComposingElemetIdB," + \ "ComposingElemetIdA,ComposingElementTypeA,EntityDisplayNameB," + \ "ComposingElementTypeB,EntityDisplayNameA,AttachmentId}" GeneralInfo_structure = "{table:GeneralInfo,attributes:" + \ "[ID(Primary),sampleName,createdYear,createdMonth]}" SampleKeywords_structure = "{table:SampleKeyWords,attributes:" + \ "[ID(Primary),sampleName,SampleKeyWord]}" PublicationInfo_structure = "{table:PublicationInfo," + \ "attributes:[ID(Primary),PMID,year,title,author," + \ "journal,publicationCategories,description]}" PublicationKeyWords_structure = "{table:PublicationKeyWords," + \ "attributes:[ID(Primary),sampleName,SampleKeyWord]}" CharacterizationInfo_structure = "{table:CharacterizationInfo," + \ "attributes:[ID(Primary),CharType,CharName," + \ "AssayType,Protocol, " + \ "DesignDescription,AnalysisAndConclusion]}" CharExpConfig_structure = "{table:CharExpConfig," + \ "attributes:[ID(Primary),CharType,CharName," + \ "AssayType,ExpConfigTechnique, " + \ "ExpConfigInstruments,ExpConfigDescription]}" CharResultDescriptions_structure = "{table:CharResultDescriptions," + \ "attributes:[ID(Primary),CharType,CharName," + \ "AssayType,CharResultDescription]}" CharResultKeywords_structure = "{table:CharResultKeywords," + \ "attributes:[ID(Primary),CharType,CharName," + \ "AssayType,CharResultKeyword]}" CharResultTables_structure = "{table:CharResultTables," + \ "attributes:[ID(Primary),CharType,CharName," + \ "AssayType,CharTable]}" note1 = "NanoEntCom should not be used to count unique " + \ "NanoEntity\ncount composition should only use composition table" note2 = "ALL tables shoud join on ID. Table NanoEntDes and " + \ "NanoEntCom share key NanoEntity, " + \ "FuncEntDes and FuncEntFunction share FunctionEntity key, " + \ "ChemAsso does not have other common keys with other tables." note3 = "Strictly reference to table name and columns in this context. " Overall_structure = [ "\n", NanoEntDes_structure, NanoEntCom_structure, FuncEntDes_structure, FuncEntFunction_structure, ChemAsso_structure, GeneralInfo_structure, SampleKeywords_structure, PublicationInfo_structure, PublicationKeyWords_structure, CharacterizationInfo_structure, CharExpConfig_structure, CharResultDescriptions_structure, CharResultKeywords_structure, CharResultTables_structure, note1, note2, note3 ] Overall_structure_string = "\n".join(Overall_structure) return Overall_structure_string def custom_query( question, connection, GPT4=True, print_prompt=False, print_token=False, print_query=False, print_time=False ): stucture = database_structure_strings() context_token_count = gu.num_tokens_from_string( stucture ) prompt = su.sql_prompt( question, stucture ) prompt_token_count = gu.num_tokens_from_string(prompt) if print_prompt: print(prompt) if print_token: print(f"\nContext token count: {context_token_count}") print(f"Prompt token count: {prompt_token_count}") start_time = time.time() if GPT4: query = gu.quick_ask(prompt, model_num=0) else: query = gu.quick_ask(prompt, model_num=1) if print_query: print("\n============= The Query is: ===============\n") print(query) print("\n===========================================\n") result_df = su.submit_querry(query, connection) # End the timer end_time = time.time() # Calculate the execution time execution_time = end_time - start_time # Print the execution time if print_time: print("\nExecution Time:", execution_time, "seconds") return result_df, query