马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
x
langchain Chroma 构建本地向量数据库
- # import
- from langchain_community.document_loaders import TextLoader
- from langchain_community.embeddings.sentence_transformer import (
- SentenceTransformerEmbeddings,
- )
- from langchain_community.embeddings import HuggingFaceEmbeddings
- from langchain_community.vectorstores import Chroma
- from langchain_text_splitters import RecursiveCharacterTextSplitter
- from langchain_community.document_loaders.word_document import Docx2txtLoader
- import glob
- import os
- # 数据库路径
- db_dir = "./db"
- # 文档路径
- source_directory = "./docs"
- # 文件后缀
- file_ext = '*.docx'
- # create the open-source embedding function
- # embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
- # 使用中文嵌入层编码器
- ebd_function = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese")
- def add_files_to_db(filepath:str="",file_ext:str=""):
- docx_files = glob.glob(os.path.join(source_directory, file_ext))
- text_list=[]
- for file_name in docx_files:
- print(file_name)
- loader = Docx2txtLoader(file_name)
- documents = loader.load()
- text_list.extend(documents)
- # split it into chunks
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
- docs = text_splitter.split_documents(text_list)
- # load it into Chroma
- db = Chroma.from_documents(docs, ebd_function, persist_directory=db_dir)
- # save db to disk
- db.persist()
- def query_db(db:Chroma,query:str=""):
- # query it
- docs = db.similarity_search(query)
- # print results
- print(docs[0].page_content)
- print("-----------------------------------------")
-
-
- if __name__=="__main__":
- # 只需执行一次
- # add_files_to_db(source_directory,file_ext)
- db = Chroma(persist_directory=db_dir,embedding_function=ebd_function)
- query = "怎么治疗骨质疏松症?"
- query_db(db,query)
- query = "怎么治疗鼻炎?"
- query_db(db,query)
- db = None
- pass
复制代码 文档在当前代码目录下
./docs/第十六章-感染性疾病.docx
./docs/第八章-骨骼关节和肌肉疾病.docx
./docs/第十九章-耳鼻咽喉疾病.docx
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |