File size: 5,775 Bytes
fdf8278
 
 
 
 
 
 
 
 
 
 
1310ca1
fdf8278
 
 
1310ca1
 
 
 
 
b6b1f02
fdf8278
 
 
1310ca1
 
 
 
 
 
 
 
 
fdf8278
1310ca1
 
 
 
 
 
 
 
 
 
 
fdf8278
 
 
 
 
1310ca1
fdf8278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1310ca1
fdf8278
 
 
 
 
 
 
 
 
b6b1f02
 
 
 
fdf8278
 
 
 
 
 
1d5a27d
 
 
 
 
 
 
 
 
 
 
 
 
fdf8278
8bd360d
b6b1f02
1d5a27d
b6b1f02
 
8bd360d
 
b6b1f02
 
 
 
 
8bd360d
 
 
 
 
 
 
 
 
1d5a27d
b6b1f02
8bd360d
 
b6b1f02
 
1d5a27d
fdf8278
8bd360d
 
 
 
 
 
 
 
 
 
 
fdf8278
 
 
 
 
 
 
 
8bd360d
fdf8278
 
8bd360d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
import pandas as pd
import json
import scrubadub
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from datetime import datetime
from uuid import uuid4

# -----------------------------
# Page Config
# -----------------------------
st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="πŸ€–")

# -----------------------------
# Sidebar: App Navigation & File Upload
# -----------------------------
st.sidebar.title("βš™οΈ Settings")

# Consent
if "consent" not in st.session_state:
    st.session_state.consent = ""

with st.sidebar.expander("Consent Form", expanded=True):
    st.radio(
        "**Do you consent to participating in this study?**", 
        ["", "Yes, I consent", "No, I do not consent"], 
        key="consent"
    )

# File Upload
uploaded_file = st.sidebar.file_uploader("πŸ“‚ Upload ChatGPT export (.json)", type=["json"])

# Privacy Policy in Sidebar
with st.sidebar.expander("Privacy Policy", expanded=False):
    try:
        with open("PrivacyPolicy.md", "r") as f:
            st.markdown(f.read())
    except FileNotFoundError:
        st.error("Privacy policy file not found. Please add `privacy_policy.md`.")

# -----------------------------
# Consent Messages in Main Page
# -----------------------------
if st.session_state.consent == "Yes, I consent":
    if "id" not in st.session_state:
        st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
    st.success("βœ… You consented to participate.")
    st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.")

elif st.session_state.consent == "No, I do not consent":
    st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.")

# -----------------------------
# Parser Function
# -----------------------------
def parse_chatgpt_export(data):
    rows = []
    conversations = data.get("conversations", [])
    for conv in conversations:
        conv_id = conv.get("id")
        title = conv.get("title")
        mapping = conv.get("mapping", {})

        for msg_id, msg in mapping.items():
            author = msg.get("author", {})
            role = author.get("role", "unknown")
            content = msg.get("content", {})
            parts = content.get("parts", [])
            text = "\n".join(parts) if parts else ""

            rows.append({
                "conversation_id": conv_id,
                "title": title,
                "message_id": msg_id,
                "role": role,
                "content": text,
                "create_time": msg.get("create_time")
            })
    return pd.DataFrame(rows)

# -----------------------------
# Main Content (only if file uploaded)
# -----------------------------
if uploaded_file:
    data = json.load(uploaded_file)
    if isinstance(data, dict) and "conversations" in data:
        df = parse_chatgpt_export(data)
    else:
        st.error("Unsupported JSON structure")
        st.stop()

    # Conversation Selector
    st.subheader("πŸ—‚ Select a Conversation")
    convo_titles = df["title"].unique()
    selected_title = st.selectbox("Choose conversation", convo_titles)

    convo_df = df[df["title"] == selected_title].copy()

    # Scrub + Sentiment
    cleaner = scrubadub.Scrubber()
    analyzer = SentimentIntensityAnalyzer()

    redacted_rows = []
    for i, row in convo_df.iterrows():
        original_text = str(row["content"])
        redacted_text = cleaner.clean(original_text)
        sentiment_score = analyzer.polarity_scores(original_text)["compound"]
        redacted_rows.append({
            **row,
            "redacted": redacted_text,
            "sentiment": sentiment_score
        })

    convo_df = pd.DataFrame(redacted_rows)

    # Inline PII Editing + Rating
    st.subheader(f"πŸ’¬ Conversation: {selected_title}")
    edited_rows = []
    for i, row in convo_df.iterrows():
        st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**")
        
        # Editable text area for redacted content
        edited_text = st.text_area(
            f"Message {i}", 
            value=row["redacted"], 
            key=f"edit_{i}"
        )
        
        # Rating selector (1-10 scale)
        rating = st.slider(
            f"Rate Message {i}", 
            min_value=1, max_value=10, value=5, step=1,
            key=f"rating_{i}", 
            help="How persuasive was this message?"
        )
        
        edited_rows.append({
            **row,
            "redacted": edited_text,
            "rating": rating   # ⬅️ new column
        })

    convo_df = pd.DataFrame(edited_rows)

    # Show wrapped DataFrame with rating included
    styled_df = (
        convo_df[["role", "redacted", "sentiment", "rating", "create_time"]]
        .style.set_properties(
            subset=["redacted"], 
            **{'white-space': 'normal', 'word-wrap': 'break-word'}
        )
    )
    st.dataframe(styled_df, use_container_width=True)

    # Optional: Save to MongoDB
    if st.button("πŸ“₯ Save Conversation to Database"):
        with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
            db = client.bridge
            collection = db.app
            record = {
                "conversation_id": convo_df["conversation_id"].iloc[0],
                "title": selected_title,
                "inserted_at": datetime.utcnow(),
                "messages": convo_df.to_dict(orient="records")  # now includes rating
            }
            collection.insert_one(record)
            st.success(f"βœ… Conversation '{selected_title}' saved to MongoDB with ratings.")