Spaces:

Nitish-py
/

KnowledgeHub

Sleeping

App Files Files Community

Nitish-py commited on Nov 23, 2023

Commit

559c3d3

1 Parent(s): b6983e3

Upload 4 files

Browse files

Files changed (4) hide show

.gitignore +1 -0
README.md +4 -10
app.py +264 -0
requirements.txt +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
----
 title: KnowledgeHub
-emoji: 🌖
-colorFrom: red
-colorTo: purple
 sdk: streamlit
-sdk_version: 1.28.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 title: KnowledgeHub
 sdk: streamlit
+emoji: 🚀
+colorFrom: purple
+colorTo: indigo
+pinned: true

app.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import os
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain import vectorstores as vs
+from langchain import chains
+import pinecone
+from goose3 import Goose
+import streamlit as st
+import whisper
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.llms import AI21
+from pytube import YouTube
+import moviepy.editor
+import time
+load_dotenv()
+api_key=os.getenv('PINECONE_API_KEY')
+env=os.getenv('PINECONE_ENVIRONMENT')
+ai21_api_key=os.getenv('AI21_API_KEY')
+pinecone.init(api_key=api_key, environment=env)
+def txtread(txt_content):
+    texts = ""
+    texts += txt_content.decode('utf-8')
+    text_splitter = CharacterTextSplitter(
+    separator="\n",
+    chunk_size = 1000,
+    chunk_overlap = 0)
+    chunks = text_splitter.split_text(texts)
+    process.success("Chunking of the data is done")
+    embeddings = HuggingFaceEmbeddings()
+    pinecone.init(api_key=api_key, environment=env)
+    process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
+    db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="txt")
+    process.success("Data is securly Uploaded")
+def pdfread(pdf):
+    pdf_reader = PdfReader(pdf)
+    texts = ""
+    for page in pdf_reader.pages:
+        texts += page.extract_text()
+    text_splitter = CharacterTextSplitter(
+    separator="\n",
+    chunk_size = 4000,
+    chunk_overlap = 0)
+    chunks = text_splitter.split_text(texts)
+    process.success("Chunking of the data is done")
+    embeddings = HuggingFaceEmbeddings()
+    pinecone.init(api_key=api_key, environment=env)
+    process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
+    db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="pdf")
+    process.success("Data is securly Uploaded")
+def urlread(url_path):
+    g = Goose({'browser_user_agent': 'Mozilla', 'parser_class': 'soup'})
+    texts = g.extract(url=url_path).cleaned_text
+    text_splitter = CharacterTextSplitter(
+    separator="\n",
+    chunk_size = 2000,
+    chunk_overlap = 0)
+    chunks = text_splitter.split_text(texts)
+    process.success("Chunking of the data is done")
+    embeddings = HuggingFaceEmbeddings()
+    pinecone.init(api_key=api_key, environment=env)
+    process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
+    db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="url")
+    process.success("Data is securly Uploaded")
+def scrape(vidlink):
+    youtubeObject = YouTube(vidlink)
+    youtubeObject = youtubeObject.streams.get_highest_resolution()
+    youtubeObject.download(filename='video.mp4')
+    process.success('Downloading Video')
+    done=False
+    while not done:
+        time.sleep(10)
+        done=os.path.exists("video.mp4")
+    video = moviepy.editor.VideoFileClip("video.mp4")
+    process.warning('Extracting Audio')
+    audio = video.audio
+    audio.write_audiofile("audio.mp3")
+    process.warning('Trancscribing the Audio')
+    model = whisper.load_model('base')
+    result=model.transcribe('audio.mp3')
+    texts=(result['text'])
+    process.success('Transcription is done')
+    text_splitter = CharacterTextSplitter(
+    separator="\n",
+    chunk_size = 1000,
+    chunk_overlap = 0)
+    chunks = text_splitter.split_text(texts)
+    process.success("Chunking of the data is done")
+    embeddings = HuggingFaceEmbeddings()
+    pinecone.init(api_key=api_key, environment=env)
+    process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
+    db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="vid")
+    process.success("Data is securly Uploaded")
+def chain(name):
+    process.warning("Your Chain is running")
+    embeddings = HuggingFaceEmbeddings()
+    pinecone.init(api_key=api_key, environment=env)
+    db=vs.pinecone.Pinecone.from_existing_index(index_name='multigpt',namespace=name, embedding=embeddings)
+    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":10})
+    llm = AI21(ai21_api_key=ai21_api_key)
+    qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)
+    return qa
+def ai(qa,prompt):
+    chat_history=[]
+    result = qa({"question": prompt,  "chat_history": chat_history})
+    process.success("Search Complete!")
+    return result
+def intro():
+    placeholder.title('____________👨🏻‍💻 MINOR PROJECT 👨🏻‍💻____________\n')
+    data.subheader('🚀 Introducing "KnowledgeHub" Web App! 🌐🧠')
+    process.write('___________________________________________')
+    intro=('''
+Welcome to the future of knowledge interaction! 🚀 With our groundbreaking web app, "KnowledgeHub," you can effortlessly infuse intelligence into our platform through various mediums. 📚💻
+How It Works:
+📁 File Magic: Upload your knowledge-packed text files or PDFs to seamlessly share insights and wisdom with the world! 🚀
+🌐 URL Wizardry: Simply paste a website URL, and watch as the KnowledgeHub transforms online information into a dynamic source of intelligence! 🤯
+🎥 YouTube Brilliance: Share video insights by dropping those mind-blowing YouTube links! Transforming video content into knowledge gold has never been easier! 🌟
+Why use KnowledgeHub:
+🚀 Instant Interaction: Say goodbye to static data! Engage with your knowledge instantly and turn information into actionable insights. 🚀
+🌐 Universal Accessibility: Access your knowledge from anywhere, anytime, and empower your audience to dive into your insights effortlessly. 🌍
+🤖 AI-Powered Conversations: Leverage cutting-edge AI for interactive conversations based on your knowledge repository! It's like having a brilliant virtual assistant at your fingertips! 🤖💡
+📊 Data-Driven Decisions: Turn raw data into actionable intelligence. Make informed decisions backed by the power of your knowledge repository. 📈
+Embrace the future of knowledge sharing with KnowledgeHub – Where ideas come to life, and intelligence knows no bounds! 🚀🔥🔍''')
+    ph=st.empty()
+    x=''
+    for i in intro:
+        x+=i
+        time.sleep(0.01)
+        ph.markdown(x)
+def upload():
+    placeholder.title("Let's create the Knowledge Base")
+    process.error('Here you will be notified regarding the status of the upload')
+    page = ['','TEXT','PDF','URL','VIDEO']
+    choice = st.sidebar.radio("Choose your mode",page)
+    if choice=='':
+        data.subheader('Choose what type of data you wanna upload')
+    elif choice == 'TEXT':
+        text = data.file_uploader("Upload your txt file", type="txt")
+        if text:
+            txtread(text)
+    elif choice == 'PDF':
+        pdf = data.file_uploader("Upload your PDF file", type="pdf")
+        if pdf:
+            pdfread(pdf)
+    elif choice == 'URL':
+        url_path = data.text_input('Enter the url')
+        if url_path:
+            urlread(url_path)
+    elif choice == 'VIDEO':
+        link = data.text_input('Enter link to the youtube video')
+        if link:
+            scrape(link)
+    time.sleep(3)
+    process.success('You can go to the chat section or upload more data')
+def chat():
+    placeholder.title("Let's go!!")
+    process.error('Here you will be notified regarding the retrival of your answers')
+    page = ['','TEXT','PDF','URL','VIDEO']
+    choice = st.sidebar.radio("Choose your mode",page)
+    if choice=='':
+        data.subheader('Choose from which data you want answers from')
+    elif choice == 'TEXT':
+        name='txt'
+        query = st.text_input("Ask a question based on the txt file",value="")
+        if query:
+            qa=chain(name)
+            result=ai(qa,query)
+            ph=st.empty()
+            x=''
+            for i in result["answer"]:
+                x+=i
+                time.sleep(0.01)
+                ph.markdown(x)
+    elif choice == 'PDF':
+        name='pdf'
+        query = st.text_input("Ask a question based on the PDF",value="")
+        if query:
+            qa=chain(name)
+            result=ai(qa,query)
+            ph=st.empty()
+            x=''
+            for i in result["answer"]:
+                x+=i
+                time.sleep(0.01)
+                ph.markdown(x)
+    elif choice == 'URL':
+        name='url'
+        query = st.text_input("Ask a question based on the data from the url",value="")
+        if query:
+            qa=chain(name)
+            result=ai(qa,query)
+            ph=st.empty()
+            x=''
+            for i in result["answer"]:
+                x+=i
+                time.sleep(0.01)
+                ph.markdown(x)
+    elif choice == 'VIDEO':
+        name='vid'
+        query = st.text_input("Ask a question from based on the YouTube video",value="")
+        if query:
+            qa=chain(name)
+            result=ai(qa,query)
+            ph=st.empty()
+            x=''
+            for i in result["answer"]:
+                x+=i
+                time.sleep(0.01)
+                ph.markdown(x)
+def main():
+    global placeholder, process, data
+    placeholder=st.empty()
+    data=st.empty()
+    process=st.empty()
+    page = ['HOME','Upload','Chat']
+    choice = st.sidebar.radio("Choose upload or chat",page)
+    if choice=='HOME':
+        intro()
+    elif choice=='Upload':
+        upload()
+    elif choice=='Chat':
+        chat()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+ai21
+goose3
+pinecone-client
+pydantic==1.10.12
+langchain==0.0.278
+PyPDF2
+python-dotenv
+streamlit
+moviepy
+pytube
+git+https://github.com/openai/whisper.git
+sentence_transformers