Chat with PDF - Llama2 model | Resume

Introduction

Thanks to Meta’s recent latest large Language Model i.e., Llama-2, we now have the ability to converse with a document and extract answers accordingly.

With help of Llama-2 ranging from 7B to 70B parameters (7B, 13B, 70B), you now have the opportunity to craft your personal chat-bot. This chat-bot engages in conversations, comprehends your queries and questions, and furnishes precise information.

More Information :

Local Setup

We can also setup a quick demo locally using the quantized version of 7B Llama-2 model

Model: We can use this quantized version : bloke
Pre-requisites :
- Install Python : Python
- Also add python to path or use checkbox of add to path while installing
- Also add scripts folder of python installation dir to path for running pip commands
Execute :
- pip install -r requirements.txt
- Create a read token from here : https://huggingface.co/settings/tokens
- Using CMD execute huggingface-cli login and provide read token
- python ingest.py
- python model.py and then ask the question from PDF
Explanation :
- install all the dependency libraries as mentioned in requirements file
- login to huggingface cli with access token
- ingest.py will extract the information from pdf and store it locally using something k/a vector db
- model.py is to navigate through the info present in vector store and answer question asked by user from it

Packages to Install (Requirements)

pypdf
langchain
torch
accelerate
bitsandbytes
transformers
ctransformers
sentence_transformers
faiss_cpu
chainlit
streamlit

Ingest PDF

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

DATA_PATH = "data/"
DB_FAISS_PATH = "vectorstores/db_faiss"


# create vector database
def create_vector_db():
    loader = DirectoryLoader(DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": "cpu"},
    )
    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)


if __name__ == "__main__":
    create_vector_db()

Ask Question

from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA

# import chainlit as cl
import streamlit as st

DB_FAISS_PATH = "vectorstores/db_faiss"

custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""


def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(
        template=custom_prompt_template, input_variables=["context", "question"]
    )
    return prompt


# Retrieval QA Chain
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 2}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    return qa_chain


# Loading the model
def load_llm():
    # Load the locally downloaded model here
    return CTransformers(
        model="TheBloke/Llama-2-7B-Chat-GGML",
        # model="meta-llama/Llama-2-7b-chat-hf",
        model_type="llama",
        max_new_tokens=1024,
        temperature=0.5,
    )


# QA Model Function
def qa_bot():
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": "cpu"},
    )
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)
    return qa


# output function
def final_result(query):
    qa_result = qa_bot()
    response = qa_result({"query": query})
    return response


def process_answer(instruction):
    print("\n********************************* START ******************************\n")
    response = ""
    instruction = instruction
    qa = qa_bot()
    generated_text = qa(instruction)
    answer = generated_text["result"]
    return answer, generated_text


def main():
    # Ask the user for a query
    question = input("What is your query? (Type 'exit' to quit): ")

    # Check if the user wants to exit
    if question.lower() == "exit":
        print("Exiting the program.")
        return

    # Process the user's query
    answer, generated_text = process_answer(question)
    # Display the result
    print("\n\nResult:\n", answer)

    index = 1
    for document_string in generated_text["source_documents"]:
        print(f"\n\nMore Info {index} :\n")
        index += 1
        document_string = str(document_string)
        content_start = document_string.find("page_content='") + len("page_content='")
        content_end = document_string.find("metadata=") - len("metadata=")
        page_content = document_string[content_start:content_end].replace("\\n", "\n")
        print(page_content)
    print("\n********************************* END ******************************\n")

if __name__ == "__main__":
    main()