I am building my first chatbot trained on my database. I've stucked and can't move forward with the last part of my code :( My code looks as follows:
import os
from llama_index import SimpleDirectoryReader, GPTListIndex, GPTVectorStoreIndex, LLMPredictor, PromptHelper
from langchain import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch, FAISS
from langchain.document_loaders import TextLoader, PyPDFLoader, DirectoryLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import VectorStoreRetrieverMemory
from langchain.chat_models import ChatOpenAI
import gradio as gr
os.environ["OPENAI_API_KEY"] = "key" #moj API
llm_name = "gpt-3.5-turbo"
from llama_index import ServiceContext, StorageContext, load_index_from_storage
def create_index(path):
max_input = 4096
num_output = 4096
tokens = 200
chunk_size = 500 # for LLM, we need to define chunk size
# define prompt
promptHelper = PromptHelper(max_input, num_output, chunk_overlap_ratio=0.1, chunk_size_limit=1024)
# define LLM — there could be many models we can use, but in this example, let’s go with OpenAI model
llmPredictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens))
# load data — it will take all the .txtx files, if there are more than 1
csv_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data()
pdf_docs = SimpleDirectoryReader("/Python_script/llama/llama").load_data()
docs = csv_docs + pdf_docs
# create vector index
service_context = ServiceContext.from_defaults(llm_predictor=llmPredictor, prompt_helper=promptHelper)
vectorIndex = GPTVectorStoreIndex.from_documents(documents=docs, service_context=service_context)
vectorIndex.storage_context.persist(persist_dir="store_test")
import gradio as gr
from llama_index import GPTVectorStoreIndex, StorageContext, LLMPredictor, load_index_from_storage, SimpleDirectoryReader
storage_context = StorageContext.from_defaults(persist_dir="/Users/renia/Praca_dyplomowa/Python_script/store_test")
index = load_index_from_storage(storage_context)
# Define the LLMPredictor with OpenAI model
tokens = 200
predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=tokens))
# Get the documents from the index
documents = SimpleDirectoryReader("/Python_script/store_test").load_data()
# Create a new GPTVectorStoreIndex using the loaded documents
vector_index = GPTVectorStoreIndex.from_documents(documents=documents, predictor=predictor)
# Load the previously created vector index from storage using the specified index ID (or default ID)
vectorIndex = load_index_from_storage(storage_context)
# Create OpenAIEmbeddings
embedding_size = 1536 # Dimensions of the OpenAIEmbeddings
embedding_fn = OpenAIEmbeddings().embed_query
# Create DocArrayInMemorySearch retriever
def to_vector_store(vector_store_index):
return VectorStore(vector_store_index.documents, vector_store_index.embeddings)
retriever = {
"name": "DocArrayInMemorySearch",
"params": {
"vector_index": vectorIndex,
"embedding_fn": embedding_fn,
},
"vectorstore": to_vector_store(vectorIndex),
}
# Create VectorStoreRetrieverMemory with the retriever
memory = VectorStoreRetrieverMemory(retriever=retriever)
# Define the chat function
def chatbot_response(message, history):
# Convert history to a list of (user_input, bot_response) tuples
history_list = []
for user_input, bot_response in history:
history_list.append((f"User: {user_input}", f"Bot: {bot_response}"))
# Retrieve relevant memories based on previous conversations
relevant_memories = memory.predict_new_summary(messages=history_list, previous_summary="")
# Concatenate all relevant memories into a single string
relevant_memories_str = "\n".join(relevant_memories)
# Get the bot's response using the vector index, considering both the message and relevant memories
bot_response = vectorIndex.query(relevant_memories_str + f"\nUser: {message}\n")
# Only take the last part of the response which is the bot's response
bot_response = bot_response.split("Bot:")[-1].strip()
return bot_response
# Create the Gradio chat interface
gr.ChatInterface(chatbot_response,
title="Iron Ladies Chatbot",
description="Ask the Iron Ladies chatbot any question!",
theme="dark",
examples=[
["Tell me about Iron Ladies.", "The Iron Ladies is a team of powerful female superheroes :)"],
["What are their powers?", "Each member of the Iron Ladies has unique superpowers."],
["Who is the leader?", "The leader of the Iron Ladies is Maja - natural born leader :D."],
])
Once I execute the code I get the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[32], line 37
27 def to_vector_store(vector_store_index):
28 return VectorStore(vector_store_index.documents, vector_store_index.embeddings)
31 retriever = {
32 "name": "DocArrayInMemorySearch",
33 "params": {
34 "vector_index": vectorIndex,
35 "embedding_fn": embedding_fn,
36 },
---> 37 "vectorstore": to_vector_store(vectorIndex),
38 }
40 # Create VectorStoreRetrieverMemory with the retriever
41 memory = VectorStoreRetrieverMemory(retriever=retriever)
Cell In[32], line 28, in to_vector_store(vector_store_index)
27 def to_vector_store(vector_store_index):
---> 28 return VectorStore(vector_store_index.documents, vector_store_index.embeddings)
AttributeError: 'VectorStoreIndex' object has no attribute 'documents'
Is there anyone who have a clue how I can move forward and load the vectorIndex and give my chatbot memory?
I've tried to update my code but I'm unable to find solution. Each time I make any amendments, I get new errors relating to vectorIndex. I was trying to get some help from GPT or Bard, but they are not that smart ;)