import os import logging from dotenv import load_dotenv import streamlit as st from langchain_chroma import Chroma from langchain_huggingface import HuggingFaceEmbeddings from langchain_openai import ChatOpenAI # Get a logger for this module logger = logging.getLogger(__name__) logger.info("Design Page...") # ------------------------------- # PAGE CONFIG (MUST BE FIRST) # ------------------------------- PORT = int(os.environ.get("PORT", 8501)) st.markdown(""" """, unsafe_allow_html=True) st.markdown( '
💊 AI Medical Labelling System
', unsafe_allow_html=True ) st.markdown( '
Simplifying FDA Drug Safety Information using Generative AI & RAG
', unsafe_allow_html=True ) # ------------------------------- # CUSTOM CSS (FANCY DESIGN) # ------------------------------- st.markdown(""" """, unsafe_allow_html=True) # ------------------------------- # HEADER # ------------------------------- st.divider() # ------------------------------- # SIDEBAR CONTROLS # ------------------------------- with st.sidebar: st.header("⚙️ Search Options") drug_name = st.text_input( "Drug Name", placeholder="PHENYTOIN SODIUM" ) selected_results = st.radio( "Information Type", ["Side Effects", "Warnings", "Both"] ) run_button = st.button("🔍 Generate Explanation") # ------------------------------- # LOAD ENV + MODELS # ------------------------------- logger.info("Loading HuggingFace embedding model...") load_dotenv() working_dir = os.path.dirname(os.path.abspath(__file__)) embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vectordb = Chroma( persist_directory=os.path.join(working_dir, "Chroma_db"), embedding_function=embeddings ) logger.info("Calling OpenAI model gpt-4o-mini...") llm = ChatOpenAI( model="gpt-4o-mini", temperature=0 ) # ------------------------------- # RAG FUNCTION # ------------------------------- def generate_section(drug_name, section, rules): results = vectordb.get( where={ "$and": [ {"generic_name": drug_name}, {"section": section} ] } ) documents = results.get("documents", []) if not documents: st.warning(f"No data found for {section}") return context = "\n".join(set(documents)) prompt = f""" You are a medical assistant. Rewrite the FDA drug information into simplified, easy-to-understand language. Rules: {rules} Drug: {drug_name} FDA TEXT: {context} """ with st.spinner("🧠 AI is analysing FDA data..."): response = llm.invoke(prompt) st.markdown( f'
{response.content}
', unsafe_allow_html=True ) logger.info("Configuring prompt..") # ------------------------------- # RULES # ------------------------------- SIDE_EFFECT_RULES = """ - Use simple English - Bullet points (max 7) - Group similar side effects - Separate common vs serious """ WARNING_RULES = """ - Use simple English - Bullet points (max 7) - Group warnings clearly """ SECTION_MAP = { "Side Effects": [("adverse_reactions", SIDE_EFFECT_RULES)], "Warnings": [("warnings_and_cautions", WARNING_RULES)], "Both": [ ("adverse_reactions", SIDE_EFFECT_RULES), ("warnings_and_cautions", WARNING_RULES), ], } # ------------------------------- # MAIN ACTION # ------------------------------- if run_button and drug_name: st.subheader(f"Results for: {drug_name.upper()}") for section, rules in SECTION_MAP[selected_results]: generate_section(drug_name, section, rules) elif run_button: st.warning("Please enter a drug name.")