Spaces:

Intention
/

IntentionStudy

Sleeping

File size: 5,775 Bytes

import streamlit as st
import pandas as pd
import json
import scrubadub
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from datetime import datetime
from uuid import uuid4

# -----------------------------
# Page Config
# -----------------------------
st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="🤖")

# -----------------------------
# Sidebar: App Navigation & File Upload
# -----------------------------
st.sidebar.title("⚙️ Settings")

# Consent
if "consent" not in st.session_state:
    st.session_state.consent = ""

with st.sidebar.expander("Consent Form", expanded=True):
    st.radio(
        "**Do you consent to participating in this study?**", 
        ["", "Yes, I consent", "No, I do not consent"], 
        key="consent"
    )

# File Upload
uploaded_file = st.sidebar.file_uploader("📂 Upload ChatGPT export (.json)", type=["json"])

# Privacy Policy in Sidebar
with st.sidebar.expander("Privacy Policy", expanded=False):
    try:
        with open("PrivacyPolicy.md", "r") as f:
            st.markdown(f.read())
    except FileNotFoundError:
        st.error("Privacy policy file not found. Please add `privacy_policy.md`.")

# -----------------------------
# Consent Messages in Main Page
# -----------------------------
if st.session_state.consent == "Yes, I consent":
    if "id" not in st.session_state:
        st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
    st.success("✅ You consented to participate.")
    st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.")

elif st.session_state.consent == "No, I do not consent":
    st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.")

# -----------------------------
# Parser Function
# -----------------------------
def parse_chatgpt_export(data):
    rows = []
    conversations = data.get("conversations", [])
    for conv in conversations:
        conv_id = conv.get("id")
        title = conv.get("title")
        mapping = conv.get("mapping", {})

        for msg_id, msg in mapping.items():
            author = msg.get("author", {})
            role = author.get("role", "unknown")
            content = msg.get("content", {})
            parts = content.get("parts", [])
            text = "\n".join(parts) if parts else ""

            rows.append({
                "conversation_id": conv_id,
                "title": title,
                "message_id": msg_id,
                "role": role,
                "content": text,
                "create_time": msg.get("create_time")
            })
    return pd.DataFrame(rows)

# -----------------------------
# Main Content (only if file uploaded)
# -----------------------------
if uploaded_file:
    data = json.load(uploaded_file)
    if isinstance(data, dict) and "conversations" in data:
        df = parse_chatgpt_export(data)
    else:
        st.error("Unsupported JSON structure")
        st.stop()

    # Conversation Selector
    st.subheader("🗂 Select a Conversation")
    convo_titles = df["title"].unique()
    selected_title = st.selectbox("Choose conversation", convo_titles)

    convo_df = df[df["title"] == selected_title].copy()

    # Scrub + Sentiment
    cleaner = scrubadub.Scrubber()
    analyzer = SentimentIntensityAnalyzer()

    redacted_rows = []
    for i, row in convo_df.iterrows():
        original_text = str(row["content"])
        redacted_text = cleaner.clean(original_text)
        sentiment_score = analyzer.polarity_scores(original_text)["compound"]
        redacted_rows.append({
            **row,
            "redacted": redacted_text,
            "sentiment": sentiment_score
        })

    convo_df = pd.DataFrame(redacted_rows)

    # Inline PII Editing + Rating
    st.subheader(f"💬 Conversation: {selected_title}")
    edited_rows = []
    for i, row in convo_df.iterrows():
        st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**")
        
        # Editable text area for redacted content
        edited_text = st.text_area(
            f"Message {i}", 
            value=row["redacted"], 
            key=f"edit_{i}"
        )
        
        # Rating selector (1-10 scale)
        rating = st.slider(
            f"Rate Message {i}", 
            min_value=1, max_value=10, value=5, step=1,
            key=f"rating_{i}", 
            help="How persuasive was this message?"
        )
        
        edited_rows.append({
            **row,
            "redacted": edited_text,
            "rating": rating   # ⬅️ new column
        })

    convo_df = pd.DataFrame(edited_rows)

    # Show wrapped DataFrame with rating included
    styled_df = (
        convo_df[["role", "redacted", "sentiment", "rating", "create_time"]]
        .style.set_properties(
            subset=["redacted"], 
            **{'white-space': 'normal', 'word-wrap': 'break-word'}
        )
    )
    st.dataframe(styled_df, use_container_width=True)

    # Optional: Save to MongoDB
    if st.button("📥 Save Conversation to Database"):
        with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
            db = client.bridge
            collection = db.app
            record = {
                "conversation_id": convo_df["conversation_id"].iloc[0],
                "title": selected_title,
                "inserted_at": datetime.utcnow(),
                "messages": convo_df.to_dict(orient="records")  # now includes rating
            }
            collection.insert_one(record)
            st.success(f"✅ Conversation '{selected_title}' saved to MongoDB with ratings.")