Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain import vectorstores as vs | |
| from langchain import chains | |
| import pinecone | |
| from goose3 import Goose | |
| import streamlit as st | |
| import whisper | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import AI21 | |
| from pytube import YouTube | |
| import moviepy.editor | |
| import time | |
| load_dotenv() | |
| api_key=os.getenv('PINECONE_API_KEY') | |
| env=os.getenv('PINECONE_ENVIRONMENT') | |
| ai21_api_key=os.getenv('AI21_API_KEY') | |
| pinecone.init(api_key=api_key, environment=env) | |
| def txtread(txt_content): | |
| texts = "" | |
| texts += txt_content.decode('utf-8') | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size = 1000, | |
| chunk_overlap = 0) | |
| chunks = text_splitter.split_text(texts) | |
| process.success("Chunking of the data is done") | |
| embeddings = HuggingFaceEmbeddings() | |
| pinecone.init(api_key=api_key, environment=env) | |
| process.warning("Starting Upload of the vector data in the Pinecone VectoreDB") | |
| db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="txt") | |
| process.success("Data is securly Uploaded") | |
| def pdfread(pdf): | |
| pdf_reader = PdfReader(pdf) | |
| texts = "" | |
| for page in pdf_reader.pages: | |
| texts += page.extract_text() | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size = 4000, | |
| chunk_overlap = 0) | |
| chunks = text_splitter.split_text(texts) | |
| process.success("Chunking of the data is done") | |
| embeddings = HuggingFaceEmbeddings() | |
| pinecone.init(api_key=api_key, environment=env) | |
| process.warning("Starting Upload of the vector data in the Pinecone VectoreDB") | |
| db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="pdf") | |
| process.success("Data is securly Uploaded") | |
| def urlread(url_path): | |
| g = Goose({'browser_user_agent': 'Mozilla', 'parser_class': 'soup'}) | |
| texts = g.extract(url=url_path).cleaned_text | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size = 2000, | |
| chunk_overlap = 0) | |
| chunks = text_splitter.split_text(texts) | |
| process.success("Chunking of the data is done") | |
| embeddings = HuggingFaceEmbeddings() | |
| pinecone.init(api_key=api_key, environment=env) | |
| process.warning("Starting Upload of the vector data in the Pinecone VectoreDB") | |
| db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="url") | |
| process.success("Data is securly Uploaded") | |
| def scrape(vidlink): | |
| youtubeObject = YouTube(vidlink) | |
| youtubeObject = youtubeObject.streams.get_highest_resolution() | |
| youtubeObject.download(filename='video.mp4') | |
| process.success('Downloading Video') | |
| done=False | |
| while not done: | |
| time.sleep(10) | |
| done=os.path.exists("video.mp4") | |
| video = moviepy.editor.VideoFileClip("video.mp4") | |
| process.warning('Extracting Audio') | |
| audio = video.audio | |
| audio.write_audiofile("audio.mp3") | |
| process.warning('Trancscribing the Audio') | |
| model = whisper.load_model('base') | |
| result=model.transcribe('audio.mp3') | |
| texts=(result['text']) | |
| process.success('Transcription is done') | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size = 1000, | |
| chunk_overlap = 0) | |
| chunks = text_splitter.split_text(texts) | |
| process.success("Chunking of the data is done") | |
| embeddings = HuggingFaceEmbeddings() | |
| pinecone.init(api_key=api_key, environment=env) | |
| process.warning("Starting Upload of the vector data in the Pinecone VectoreDB") | |
| db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="vid") | |
| process.success("Data is securly Uploaded") | |
| def chain(name): | |
| process.warning("Your Chain is running") | |
| embeddings = HuggingFaceEmbeddings() | |
| pinecone.init(api_key=api_key, environment=env) | |
| db=vs.pinecone.Pinecone.from_existing_index(index_name='multigpt',namespace=name, embedding=embeddings) | |
| retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":10}) | |
| llm = AI21(ai21_api_key=ai21_api_key) | |
| qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever) | |
| return qa | |
| def ai(qa,prompt): | |
| chat_history=[] | |
| result = qa({"question": prompt, "chat_history": chat_history}) | |
| process.success("Search Complete!") | |
| return result | |
| def intro(): | |
| placeholder.title('____________π¨π»βπ» MINOR PROJECT π¨π»βπ»____________\n') | |
| data.subheader('π Introducing "KnowledgeHub" Web App! ππ§ ') | |
| process.write('___________________________________________') | |
| intro=(''' | |
| Welcome to the future of knowledge interaction! π With our groundbreaking web app, "KnowledgeHub," you can effortlessly infuse intelligence into our platform through various mediums. ππ» | |
| How It Works: | |
| π File Magic: Upload your knowledge-packed text files or PDFs to seamlessly share insights and wisdom with the world! π | |
| π URL Wizardry: Simply paste a website URL, and watch as the KnowledgeHub transforms online information into a dynamic source of intelligence! π€― | |
| π₯ YouTube Brilliance: Share video insights by dropping those mind-blowing YouTube links! Transforming video content into knowledge gold has never been easier! π | |
| Why use KnowledgeHub: | |
| π Instant Interaction: Say goodbye to static data! Engage with your knowledge instantly and turn information into actionable insights. π | |
| π Universal Accessibility: Access your knowledge from anywhere, anytime, and empower your audience to dive into your insights effortlessly. π | |
| π€ AI-Powered Conversations: Leverage cutting-edge AI for interactive conversations based on your knowledge repository! It's like having a brilliant virtual assistant at your fingertips! π€π‘ | |
| π Data-Driven Decisions: Turn raw data into actionable intelligence. Make informed decisions backed by the power of your knowledge repository. π | |
| Embrace the future of knowledge sharing with KnowledgeHub β Where ideas come to life, and intelligence knows no bounds! ππ₯π''') | |
| ph=st.empty() | |
| x='' | |
| for i in intro: | |
| x+=i | |
| time.sleep(0.01) | |
| ph.markdown(x) | |
| def upload(): | |
| placeholder.title("Let's create the Knowledge Base") | |
| process.error('Here you will be notified regarding the status of the upload') | |
| page = ['','TEXT','PDF','URL','VIDEO'] | |
| choice = st.sidebar.radio("Choose your mode",page) | |
| if choice=='': | |
| data.subheader('Choose what type of data you wanna upload') | |
| elif choice == 'TEXT': | |
| text = data.file_uploader("Upload your txt file", type="txt") | |
| if text: | |
| txtread(text) | |
| elif choice == 'PDF': | |
| pdf = data.file_uploader("Upload your PDF file", type="pdf") | |
| if pdf: | |
| pdfread(pdf) | |
| elif choice == 'URL': | |
| url_path = data.text_input('Enter the url') | |
| if url_path: | |
| urlread(url_path) | |
| elif choice == 'VIDEO': | |
| link = data.text_input('Enter link to the youtube video') | |
| if link: | |
| scrape(link) | |
| time.sleep(3) | |
| process.success('You can go to the chat section or upload more data') | |
| def chat(): | |
| placeholder.title("Let's go!!") | |
| process.error('Here you will be notified regarding the retrival of your answers') | |
| page = ['','TEXT','PDF','URL','VIDEO'] | |
| choice = st.sidebar.radio("Choose your mode",page) | |
| if choice=='': | |
| data.subheader('Choose from which data you want answers from') | |
| elif choice == 'TEXT': | |
| name='txt' | |
| query = st.text_input("Ask a question based on the txt file",value="") | |
| if query: | |
| qa=chain(name) | |
| result=ai(qa,query) | |
| ph=st.empty() | |
| x='' | |
| for i in result["answer"]: | |
| x+=i | |
| time.sleep(0.01) | |
| ph.markdown(x) | |
| elif choice == 'PDF': | |
| name='pdf' | |
| query = st.text_input("Ask a question based on the PDF",value="") | |
| if query: | |
| qa=chain(name) | |
| result=ai(qa,query) | |
| ph=st.empty() | |
| x='' | |
| for i in result["answer"]: | |
| x+=i | |
| time.sleep(0.01) | |
| ph.markdown(x) | |
| elif choice == 'URL': | |
| name='url' | |
| query = st.text_input("Ask a question based on the data from the url",value="") | |
| if query: | |
| qa=chain(name) | |
| result=ai(qa,query) | |
| ph=st.empty() | |
| x='' | |
| for i in result["answer"]: | |
| x+=i | |
| time.sleep(0.01) | |
| ph.markdown(x) | |
| elif choice == 'VIDEO': | |
| name='vid' | |
| query = st.text_input("Ask a question from based on the YouTube video",value="") | |
| if query: | |
| qa=chain(name) | |
| result=ai(qa,query) | |
| ph=st.empty() | |
| x='' | |
| for i in result["answer"]: | |
| x+=i | |
| time.sleep(0.01) | |
| ph.markdown(x) | |
| def main(): | |
| global placeholder, process, data | |
| placeholder=st.empty() | |
| data=st.empty() | |
| process=st.empty() | |
| page = ['HOME','Upload','Chat'] | |
| choice = st.sidebar.radio("Choose upload or chat",page) | |
| if choice=='HOME': | |
| intro() | |
| elif choice=='Upload': | |
| upload() | |
| elif choice=='Chat': | |
| chat() | |
| if __name__ == "__main__": | |
| main() | |