Charles Chan
commited on
Commit
·
21d443e
1
Parent(s):
de611e2
coding
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import random
|
| 3 |
from langchain_community.llms import HuggingFaceHub
|
| 4 |
-
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from datasets import load_dataset
|
| 7 |
from opencc import OpenCC
|
|
@@ -35,8 +35,12 @@ if "vector_created" not in st.session_state:
|
|
| 35 |
if not st.session_state.vector_created:
|
| 36 |
try:
|
| 37 |
with st.spinner("正在构建向量数据库..."):
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
st.success("向量数据库构建完成!")
|
| 41 |
print("向量数据库构建完成!")
|
| 42 |
except Exception as e:
|
|
@@ -69,10 +73,10 @@ def answer_question(repo_id, temperature, max_length, question):
|
|
| 69 |
# 获取答案
|
| 70 |
try:
|
| 71 |
with st.spinner("正在筛选本地数据集..."):
|
| 72 |
-
question_embedding = embeddings.embed_query(question)
|
| 73 |
question_embedding_str = " ".join(map(str, question_embedding))
|
| 74 |
# print('question_embedding: ' + question_embedding_str)
|
| 75 |
-
docs_and_scores = db.similarity_search_with_score(question_embedding_str)
|
| 76 |
|
| 77 |
context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
|
| 78 |
print('context: ' + context)
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import random
|
| 3 |
from langchain_community.llms import HuggingFaceHub
|
| 4 |
+
from langchain_community.st.session_state.embeddings import SentenceTransformerEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from datasets import load_dataset
|
| 7 |
from opencc import OpenCC
|
|
|
|
| 35 |
if not st.session_state.vector_created:
|
| 36 |
try:
|
| 37 |
with st.spinner("正在构建向量数据库..."):
|
| 38 |
+
# all-mpnet-base-v2 是一个由 Sentence Transformers 库提供的预训练模型,
|
| 39 |
+
# 专门用于生成高质量的句子嵌入(sentence embeddings)。
|
| 40 |
+
# all-mpnet-base-v2 在多个自然语言处理任务上表现出色,包括语义相似度计算、
|
| 41 |
+
# 文本检索、聚类等。它能够有效地捕捉句子的语义信息,并生成具有代表性的向量表示。
|
| 42 |
+
st.session_state.embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
|
| 43 |
+
st.session_state.db = FAISS.from_texts(st.session_state.answer_list, st.session_state.embeddings)
|
| 44 |
st.success("向量数据库构建完成!")
|
| 45 |
print("向量数据库构建完成!")
|
| 46 |
except Exception as e:
|
|
|
|
| 73 |
# 获取答案
|
| 74 |
try:
|
| 75 |
with st.spinner("正在筛选本地数据集..."):
|
| 76 |
+
question_embedding = st.session_state.embeddings.embed_query(question)
|
| 77 |
question_embedding_str = " ".join(map(str, question_embedding))
|
| 78 |
# print('question_embedding: ' + question_embedding_str)
|
| 79 |
+
docs_and_scores = st.session_state.db.similarity_search_with_score(question_embedding_str)
|
| 80 |
|
| 81 |
context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
|
| 82 |
print('context: ' + context)
|