import openai
from llama_index import ServiceContext, GPTVectorStoreIndex, LLMPredictor, PromptHelper, SimpleDirectoryReader, load_index_from_storage, StorageContext
from langchain import OpenAI
import os
import re
drivepath = "C://Users//"
# Set the API key
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
openai.api_key = "YOUR_OPENAI_API_KEY"
def construct_index(drivepath):
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_outputs = 2000
# set maximum chunk overlap
max_chunk_overlap = 0.5
# set chunk size limit
chunk_size_limit = 600
# define prompt helper
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.05, model_name="text-davinci-003", max_tokens=num_outputs, top_p=0.1, frequency_penalty=0.8, verbose=None))
documents = SimpleDirectoryReader(drivepath, recursive=True).load_data()
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="index")
return index
def get_response(user_input):
storage_context = StorageContext.from_defaults(persist_dir="index")
index = load_index_from_storage(storage_context)
# index = GPTVectorStoreIndex.load_from_disk('index.json')
query_engine = index.as_query_engine()
streaming_response = query_engine.query(user_input)
return streaming_response.response
# construct_index('C://Users')
while True:
user_input = input("Enter your query: ")
response = get_response(user_input)
print('ANSWER:', response)
This code is for a chatbot. It is trained on pdfs in my directory. It gives answers pertaining to the files present in the directory. What I want is that it also returns me the file it is reading the answers from in the answers so that the user knows which file is used for the information being given.
The limitation is that I have to do it using llama_index library. I found this link llama_index get the document referenced from node_sources but it doesn't work for me. Kindly help me out.