mirror of
https://github.com/SunZhimin2021/AIPentest.git
synced 2025-05-05 10:06:57 +00:00
init
This commit is contained in:
parent
94f3cb4ae0
commit
bb390f67bb
179
note/embedding及faissdb使用记录.ipynb
Normal file
179
note/embedding及faissdb使用记录.ipynb
Normal file
@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82c31161-940d-458e-abbb-2b58cdd66567",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"embedding及faissdb使用记录"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "063ccadc-c190-4b1a-8c45-b8a7e7a7004e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"env: OPENAI_API_BASE=http://172.16.2.61:8000/v1\n",
|
||||
"env: OPENAI_API_KEY=EMPTY\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%env OPENAI_API_BASE=http://172.16.2.61:8000/v1\n",
|
||||
"%env OPENAI_API_KEY=EMPTY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "32b040ac-41fd-472e-a672-d2252503d03e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created a chunk of size 1004, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1203, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1025, which is longer than the specified 1000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"# Load the document, split it into chunks, embed each chunk and load it into the vector store.\n",
|
||||
"embeddings_model = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
||||
"raw_documents = TextLoader('/root/sunzm/llamadata/test1.txt').load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"documents = text_splitter.split_documents(raw_documents)\n",
|
||||
"db = FAISS.from_documents(documents, embeddings_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17a8255f-c65d-4022-a424-7d3998394c22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similarity search1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "275b6703-1711-4eac-86b8-2692d3b894bd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c300aef2-7388-49d6-9b33-953d73833f57",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similarity search2 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9d70da40-860a-4164-a4bb-2cadfe264d9d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embedding_vector = embeddings_model.embed_query(query)\n",
|
||||
"docs = db.similarity_search_by_vector(embedding_vector)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "0fae5511-44e6-49ae-8e2a-d9d4b722c19e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.save_local(\"faiss_db\")\n",
|
||||
"\n",
|
||||
"new_db = FAISS.load_local(\"faiss_db\", embeddings_model)\n",
|
||||
"\n",
|
||||
"docs = new_db.similarity_search(query)\n",
|
||||
"\n",
|
||||
"print (docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "040d68d0-1045-428d-9339-39bd8002db5f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bed2340c-b6b7-46f2-80f1-10953bf780ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "cnalpaca",
|
||||
"language": "python",
|
||||
"name": "cnalpaca"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user