import os
import logging
from dotenv import load_dotenv
import streamlit as st
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
# Get a logger for this module
logger = logging.getLogger(__name__)
logger.info("Design Page...")
# -------------------------------
# PAGE CONFIG (MUST BE FIRST)
# -------------------------------
PORT = int(os.environ.get("PORT", 8501))
st.markdown("""
""", unsafe_allow_html=True)
st.markdown(
'
💊 AI Medical Labelling System
',
unsafe_allow_html=True
)
st.markdown(
'Simplifying FDA Drug Safety Information using Generative AI & RAG
',
unsafe_allow_html=True
)
# -------------------------------
# CUSTOM CSS (FANCY DESIGN)
# -------------------------------
st.markdown("""
""", unsafe_allow_html=True)
# -------------------------------
# HEADER
# -------------------------------
st.divider()
# -------------------------------
# SIDEBAR CONTROLS
# -------------------------------
with st.sidebar:
st.header("⚙️ Search Options")
drug_name = st.text_input(
"Drug Name",
placeholder="PHENYTOIN SODIUM"
)
selected_results = st.radio(
"Information Type",
["Side Effects", "Warnings", "Both"]
)
run_button = st.button("🔍 Generate Explanation")
# -------------------------------
# LOAD ENV + MODELS
# -------------------------------
logger.info("Loading HuggingFace embedding model...")
load_dotenv()
working_dir = os.path.dirname(os.path.abspath(__file__))
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectordb = Chroma(
persist_directory=os.path.join(working_dir, "Chroma_db"),
embedding_function=embeddings
)
logger.info("Calling OpenAI model gpt-4o-mini...")
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0
)
# -------------------------------
# RAG FUNCTION
# -------------------------------
def generate_section(drug_name, section, rules):
results = vectordb.get(
where={
"$and": [
{"generic_name": drug_name},
{"section": section}
]
}
)
documents = results.get("documents", [])
if not documents:
st.warning(f"No data found for {section}")
return
context = "\n".join(set(documents))
prompt = f"""
You are a medical assistant.
Rewrite the FDA drug information into simplified,
easy-to-understand language.
Rules:
{rules}
Drug: {drug_name}
FDA TEXT:
{context}
"""
with st.spinner("🧠 AI is analysing FDA data..."):
response = llm.invoke(prompt)
st.markdown(
f'{response.content}
',
unsafe_allow_html=True
)
logger.info("Configuring prompt..")
# -------------------------------
# RULES
# -------------------------------
SIDE_EFFECT_RULES = """
- Use simple English
- Bullet points (max 7)
- Group similar side effects
- Separate common vs serious
"""
WARNING_RULES = """
- Use simple English
- Bullet points (max 7)
- Group warnings clearly
"""
SECTION_MAP = {
"Side Effects": [("adverse_reactions", SIDE_EFFECT_RULES)],
"Warnings": [("warnings_and_cautions", WARNING_RULES)],
"Both": [
("adverse_reactions", SIDE_EFFECT_RULES),
("warnings_and_cautions", WARNING_RULES),
],
}
# -------------------------------
# MAIN ACTION
# -------------------------------
if run_button and drug_name:
st.subheader(f"Results for: {drug_name.upper()}")
for section, rules in SECTION_MAP[selected_results]:
generate_section(drug_name, section, rules)
elif run_button:
st.warning("Please enter a drug name.")