This commit is contained in:
SunZhimin2021 2024-01-12 15:32:07 +08:00
parent 94f3cb4ae0
commit bb390f67bb

View File

@ -0,0 +1,179 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "82c31161-940d-458e-abbb-2b58cdd66567",
"metadata": {},
"source": [
"embedding及faissdb使用记录"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "063ccadc-c190-4b1a-8c45-b8a7e7a7004e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: OPENAI_API_BASE=http://172.16.2.61:8000/v1\n",
"env: OPENAI_API_KEY=EMPTY\n"
]
}
],
"source": [
"%env OPENAI_API_BASE=http://172.16.2.61:8000/v1\n",
"%env OPENAI_API_KEY=EMPTY"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "32b040ac-41fd-472e-a672-d2252503d03e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Created a chunk of size 1004, which is longer than the specified 1000\n",
"Created a chunk of size 1203, which is longer than the specified 1000\n",
"Created a chunk of size 1025, which is longer than the specified 1000\n"
]
}
],
"source": [
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"\n",
"# Load the document, split it into chunks, embed each chunk and load it into the vector store.\n",
"embeddings_model = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
"raw_documents = TextLoader('/root/sunzm/llamadata/test1.txt').load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"documents = text_splitter.split_documents(raw_documents)\n",
"db = FAISS.from_documents(documents, embeddings_model)"
]
},
{
"cell_type": "markdown",
"id": "17a8255f-c65d-4022-a424-7d3998394c22",
"metadata": {},
"source": [
"Similarity search1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "275b6703-1711-4eac-86b8-2692d3b894bd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
]
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"print(docs[0].page_content)"
]
},
{
"cell_type": "markdown",
"id": "c300aef2-7388-49d6-9b33-953d73833f57",
"metadata": {},
"source": [
"Similarity search2 "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9d70da40-860a-4164-a4bb-2cadfe264d9d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
]
}
],
"source": [
"embedding_vector = embeddings_model.embed_query(query)\n",
"docs = db.similarity_search_by_vector(embedding_vector)\n",
"print(docs[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0fae5511-44e6-49ae-8e2a-d9d4b722c19e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
]
}
],
"source": [
"db.save_local(\"faiss_db\")\n",
"\n",
"new_db = FAISS.load_local(\"faiss_db\", embeddings_model)\n",
"\n",
"docs = new_db.similarity_search(query)\n",
"\n",
"print (docs[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "040d68d0-1045-428d-9339-39bd8002db5f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bed2340c-b6b7-46f2-80f1-10953bf780ee",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "cnalpaca",
"language": "python",
"name": "cnalpaca"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}