Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions langchain-rag-app/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__pycache__/
*.pyc
.env
chroma_data/
26 changes: 13 additions & 13 deletions langchain-rag-app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ NEO4J_URI=<YOUR_NEO4J_URI>
NEO4J_USERNAME=<YOUR_NEO4J_USERNAME>
NEO4J_PASSWORD=<YOUR_NEO4J_PASSWORD>

HOSPITALS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/hospitals.csv
PAYERS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/payers.csv
PHYSICIANS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/physicians.csv
PATIENTS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/patients.csv
VISITS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/visits.csv
REVIEWS_CSV_PATH=https://raw.githubusercontent.com/hfhoffman1144/langchain_neo4j_rag_app/main/data/reviews.csv

HOSPITAL_AGENT_MODEL=gpt-3.5-turbo-1106
HOSPITAL_CYPHER_MODEL=gpt-3.5-turbo-1106
HOSPITAL_QA_MODEL=gpt-3.5-turbo-0125

CHATBOT_URL=http://host.docker.internal:8000/hospital-rag-agent
HOSPITALS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/hospitals.csv
PAYERS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/payers.csv
PHYSICIANS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/physicians.csv
PATIENTS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/patients.csv
VISITS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/visits.csv
REVIEWS_CSV_PATH=https://raw.githubusercontent.com/realpython/materials/refs/heads/master/langchain-rag-app/data/reviews.csv

HOSPITAL_AGENT_MODEL=gpt-5.5
HOSPITAL_CYPHER_MODEL=gpt-5.5
HOSPITAL_QA_MODEL=gpt-5.5

CHATBOT_URL=http://chatbot_api:8000/hospital-rag-agent
```

The chatbot uses OpenAI LLMs, so you'll need to create an [OpenAI API key](https://realpython.com/generate-images-with-dalle-openai-api/#get-your-openai-api-key) and store it as `OPENAI_API_KEY`.
Expand All @@ -38,7 +38,7 @@ Once you have a running Neo4j instance, and have filled out all the environment
Once you've filled in all of the environment variables, set up a Neo4j AuraDB instance, and installed Docker Compose, open a terminal and run:

```console
$ docker-compose up --build
$ docker compose up --build
```

After each container finishes building, you'll be able to access the chatbot API at `http://localhost:8000/docs` and the Streamlit app at `http://localhost:8501/`.
8 changes: 3 additions & 5 deletions langchain-rag-app/source_code_final/chatbot_api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# chatbot_api/Dockerfile

FROM python:3.11-slim
FROM python:3.14-slim

WORKDIR /app
COPY ./src/ /app

COPY ./pyproject.toml /code/pyproject.toml
RUN pip install /code/.
RUN python -m pip install /code/.

EXPOSE 8000
CMD ["sh", "entrypoint.sh"]
CMD ["sh", "entrypoint.sh"]
22 changes: 10 additions & 12 deletions langchain-rag-app/source_code_final/chatbot_api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
name = "chatbot_api"
version = "0.1"
dependencies = [
"asyncio==3.4.3",
"fastapi==0.109.0",
"langchain==0.1.0",
"langchain-openai==0.0.2",
"langchainhub==0.1.14",
"neo4j==5.14.1",
"numpy==1.26.2",
"openai==1.7.2",
"opentelemetry-api==1.22.0",
"pydantic==2.5.1",
"uvicorn==0.25.0"
"fastapi==0.136.3",
"langchain==1.3.4",
"langchain-openai==1.2.2",
"langchain-neo4j==0.9.0",
"neo4j==6.2.0",
"numpy==2.4.6",
"openai==2.41.0",
"pydantic==2.13.4",
"uvicorn==0.49.0"
]

[project.optional-dependencies]
dev = ["black", "flake8"]
dev = ["ruff"]
Original file line number Diff line number Diff line change
@@ -1,23 +1,40 @@
import os

from langchain.agents import create_agent
from langchain_core.tools import Tool
from langchain_openai import ChatOpenAI

from chains.hospital_cypher_chain import hospital_cypher_chain
from chains.hospital_review_chain import reviews_vector_chain
from langchain import hub
from langchain.agents import AgentExecutor, Tool, create_openai_functions_agent
from langchain_openai import ChatOpenAI
from tools.wait_times import (
get_current_wait_times,
get_most_available_hospital,
)

HOSPITAL_AGENT_MODEL = os.getenv("HOSPITAL_AGENT_MODEL")

hospital_agent_prompt = hub.pull("hwchase17/openai-functions-agent")
agent_system_prompt = (
"You are a helpful assistant for a hospital system. Use the tools "
"available to you to answer the user's questions about patients, "
"visits, physicians, hospitals, insurance payers, patient reviews, "
"and current wait times."
)


def query_reviews(query: str) -> str:
"""Answer questions about patient experiences from their reviews."""
return reviews_vector_chain.invoke(query)


def query_graph(query: str) -> str:
"""Answer questions by querying the hospital graph database."""
return hospital_cypher_chain.invoke(query)["result"]


tools = [
Tool(
name="Experiences",
func=reviews_vector_chain.invoke,
func=query_reviews,
description="""Useful when you need to answer questions
about patient experiences, feelings, or any other qualitative
question that could be answered about a patient using semantic
Expand All @@ -30,7 +47,7 @@
),
Tool(
name="Graph",
func=hospital_cypher_chain.invoke,
func=query_graph,
description="""Useful for answering questions about patients,
physicians, hospitals, insurance payers, patient review
statistics, and hospital visit details. Use the entire prompt as
Expand Down Expand Up @@ -63,20 +80,10 @@
),
]

chat_model = ChatOpenAI(
model=HOSPITAL_AGENT_MODEL,
temperature=0,
)

hospital_rag_agent = create_openai_functions_agent(
llm=chat_model,
prompt=hospital_agent_prompt,
tools=tools,
)
chat_model = ChatOpenAI(model=HOSPITAL_AGENT_MODEL)

hospital_rag_agent_executor = AgentExecutor(
agent=hospital_rag_agent,
hospital_rag_agent_executor = create_agent(
model=chat_model,
tools=tools,
return_intermediate_steps=True,
verbose=True,
system_prompt=agent_system_prompt,
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os

from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_core.prompts import PromptTemplate
from langchain_neo4j import GraphCypherQAChain, Neo4jGraph
from langchain_openai import ChatOpenAI

HOSPITAL_QA_MODEL = os.getenv("HOSPITAL_QA_MODEL")
Expand Down Expand Up @@ -72,23 +71,23 @@
LIMIT 1

# How many non-emergency patients in North Carolina have written reviews?
match (r:Review)<-[:WRITES]-(v:Visit)-[:AT]->(h:Hospital)
where h.state_name = 'NC' and v.admission_type <> 'Emergency'
return count(*)
MATCH (r:Review)<-[:WRITES]-(v:Visit)-[:AT]->(h:Hospital)
WHERE h.state_name = 'NC' and v.admission_type <> 'Emergency'
RETURN count(*)

String category values:
Test results are one of: 'Inconclusive', 'Normal', 'Abnormal'
Visit statuses are one of: 'OPEN', 'DISCHARGED'
Admission Types are one of: 'Elective', 'Emergency', 'Urgent'
Payer names are one of: 'Cigna', 'Blue Cross', 'UnitedHealthcare', 'Medicare',
Payer names are one of: 'Cigna', 'Blue Cross', 'UnitedHealthcare', 'Medicaid',
'Aetna'

A visit is considered open if its status is 'OPEN' and the discharge date is
missing.
Use abbreviations when
filtering on hospital states (e.g. "Texas" is "TX",
"Colorado" is "CO", "North Carolina" is "NC",
"Florida" is "FL", "Georgia" is "GA, etc.)
"Florida" is "FL", "Georgia" is "GA", etc.)

Make sure to use IS NULL or IS NOT NULL when analyzing missing properties.
Never return embedding properties in your queries. You must never include the
Expand All @@ -109,7 +108,7 @@
qa_generation_template = """You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a users natural language question. The provided
generated based on a user's natural language question. The provided
information is authoritative, you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.
Expand All @@ -128,15 +127,14 @@
results are in units of days unless otherwise specified.

When names are provided in the query results, such as hospital names,
beware of any names that have commas or other punctuation in them.
beware of any names that have commas or other punctuation in them.
For instance, 'Jones, Brown and Murray' is a single hospital name,
not multiple hospitals. Make sure you return any list of names in
a way that isn't ambiguous and allows someone to tell what the full
names are.

Never say you don't have the right information if there is data in
the query results. Make sure to show all the relevant query results
if you're asked.
the query results. Always use the data in the query results.

Helpful Answer:
"""
Expand All @@ -146,12 +144,13 @@
)

hospital_cypher_chain = GraphCypherQAChain.from_llm(
cypher_llm=ChatOpenAI(model=HOSPITAL_CYPHER_MODEL, temperature=0),
qa_llm=ChatOpenAI(model=HOSPITAL_QA_MODEL, temperature=0),
cypher_llm=ChatOpenAI(model=HOSPITAL_CYPHER_MODEL),
qa_llm=ChatOpenAI(model=HOSPITAL_QA_MODEL),
graph=graph,
verbose=True,
qa_prompt=qa_generation_prompt,
cypher_prompt=cypher_generation_prompt,
validate_cypher=True,
top_k=100,
allow_dangerous_requests=True,
)
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import logging
import os

from langchain.chains import RetrievalQA
from langchain.prompts import (
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_neo4j import Neo4jVector
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

# Silence Neo4j's deprecation notice for db.index.vector.queryNodes
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)

HOSPITAL_QA_MODEL = os.getenv("HOSPITAL_QA_MODEL")

Expand All @@ -29,11 +35,10 @@
)

review_template = """Your job is to use patient
reviews to answer questions about their experience at
a hospital. Use the following context to answer questions.
Be as detailed as possible, but don't make up any information
that's not from the context. If you don't know an answer,
say you don't know.
reviews to answer questions about their experience at a hospital. Use
the following context to answer questions. Be as detailed as possible,
but don't make up any information that's not from the context. If you
don't know an answer, say you don't know.
{context}
"""

Expand All @@ -52,9 +57,12 @@
input_variables=["context", "question"], messages=messages
)

reviews_vector_chain = RetrievalQA.from_chain_type(
llm=ChatOpenAI(model=HOSPITAL_QA_MODEL, temperature=0),
chain_type="stuff",
retriever=neo4j_vector_index.as_retriever(k=12),
reviews_retriever = neo4j_vector_index.as_retriever(search_kwargs={"k": 12})
review_chat_model = ChatOpenAI(model=HOSPITAL_QA_MODEL)

reviews_vector_chain = (
{"context": reviews_retriever, "question": RunnablePassthrough()}
| review_prompt
| review_chat_model
| StrOutputParser()
)
reviews_vector_chain.combine_documents_chain.llm_chain.prompt = review_prompt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
echo "Starting hospital RAG FastAPI service..."

# Start the main application
uvicorn main:app --host 0.0.0.0 --port 8000
uvicorn main:app --host 0.0.0.0 --port 8000
26 changes: 15 additions & 11 deletions langchain-rag-app/source_code_final/chatbot_api/src/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from fastapi import FastAPI

from agents.hospital_rag_agent import hospital_rag_agent_executor
from models.hospital_rag_query import HospitalQueryInput, HospitalQueryOutput
from utils.async_utils import async_retry

from fastapi import FastAPI

app = FastAPI(
title="Hospital Chatbot",
description="Endpoints for a hospital system graph RAG chatbot",
Expand All @@ -12,12 +12,14 @@

@async_retry(max_retries=10, delay=1)
async def invoke_agent_with_retry(query: str):
"""
Retry the agent if a tool fails to run. This can help when there
are intermittent connection issues to external APIs.
"""
"""Retry the agent if a tool fails to run.

return await hospital_rag_agent_executor.ainvoke({"input": query})
This can help when there are intermittent connection issues
to external APIs.
"""
return await hospital_rag_agent_executor.ainvoke(
{"messages": [{"role": "user", "content": query}]}
)


@app.get("/")
Expand All @@ -30,8 +32,10 @@ async def query_hospital_agent(
query: HospitalQueryInput,
) -> HospitalQueryOutput:
query_response = await invoke_agent_with_retry(query.text)
query_response["intermediate_steps"] = [
str(s) for s in query_response["intermediate_steps"]
]
messages = query_response["messages"]

return query_response
return HospitalQueryOutput(
input=query.text,
output=messages[-1].content,
intermediate_steps=[str(message) for message in messages],
)
Loading
Loading