Spaces:
Sleeping
Sleeping
File size: 5,775 Bytes
fdf8278 1310ca1 fdf8278 1310ca1 b6b1f02 fdf8278 1310ca1 fdf8278 1310ca1 fdf8278 1310ca1 fdf8278 1310ca1 fdf8278 b6b1f02 fdf8278 1d5a27d fdf8278 8bd360d b6b1f02 1d5a27d b6b1f02 8bd360d b6b1f02 8bd360d 1d5a27d b6b1f02 8bd360d b6b1f02 1d5a27d fdf8278 8bd360d fdf8278 8bd360d fdf8278 8bd360d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | import streamlit as st
import pandas as pd
import json
import scrubadub
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from datetime import datetime
from uuid import uuid4
# -----------------------------
# Page Config
# -----------------------------
st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="π€")
# -----------------------------
# Sidebar: App Navigation & File Upload
# -----------------------------
st.sidebar.title("βοΈ Settings")
# Consent
if "consent" not in st.session_state:
st.session_state.consent = ""
with st.sidebar.expander("Consent Form", expanded=True):
st.radio(
"**Do you consent to participating in this study?**",
["", "Yes, I consent", "No, I do not consent"],
key="consent"
)
# File Upload
uploaded_file = st.sidebar.file_uploader("π Upload ChatGPT export (.json)", type=["json"])
# Privacy Policy in Sidebar
with st.sidebar.expander("Privacy Policy", expanded=False):
try:
with open("PrivacyPolicy.md", "r") as f:
st.markdown(f.read())
except FileNotFoundError:
st.error("Privacy policy file not found. Please add `privacy_policy.md`.")
# -----------------------------
# Consent Messages in Main Page
# -----------------------------
if st.session_state.consent == "Yes, I consent":
if "id" not in st.session_state:
st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
st.success("β
You consented to participate.")
st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.")
elif st.session_state.consent == "No, I do not consent":
st.warning("β οΈ You did not consent. You can still use the app, but your logs will not be stored.")
# -----------------------------
# Parser Function
# -----------------------------
def parse_chatgpt_export(data):
rows = []
conversations = data.get("conversations", [])
for conv in conversations:
conv_id = conv.get("id")
title = conv.get("title")
mapping = conv.get("mapping", {})
for msg_id, msg in mapping.items():
author = msg.get("author", {})
role = author.get("role", "unknown")
content = msg.get("content", {})
parts = content.get("parts", [])
text = "\n".join(parts) if parts else ""
rows.append({
"conversation_id": conv_id,
"title": title,
"message_id": msg_id,
"role": role,
"content": text,
"create_time": msg.get("create_time")
})
return pd.DataFrame(rows)
# -----------------------------
# Main Content (only if file uploaded)
# -----------------------------
if uploaded_file:
data = json.load(uploaded_file)
if isinstance(data, dict) and "conversations" in data:
df = parse_chatgpt_export(data)
else:
st.error("Unsupported JSON structure")
st.stop()
# Conversation Selector
st.subheader("π Select a Conversation")
convo_titles = df["title"].unique()
selected_title = st.selectbox("Choose conversation", convo_titles)
convo_df = df[df["title"] == selected_title].copy()
# Scrub + Sentiment
cleaner = scrubadub.Scrubber()
analyzer = SentimentIntensityAnalyzer()
redacted_rows = []
for i, row in convo_df.iterrows():
original_text = str(row["content"])
redacted_text = cleaner.clean(original_text)
sentiment_score = analyzer.polarity_scores(original_text)["compound"]
redacted_rows.append({
**row,
"redacted": redacted_text,
"sentiment": sentiment_score
})
convo_df = pd.DataFrame(redacted_rows)
# Inline PII Editing + Rating
st.subheader(f"π¬ Conversation: {selected_title}")
edited_rows = []
for i, row in convo_df.iterrows():
st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**")
# Editable text area for redacted content
edited_text = st.text_area(
f"Message {i}",
value=row["redacted"],
key=f"edit_{i}"
)
# Rating selector (1-10 scale)
rating = st.slider(
f"Rate Message {i}",
min_value=1, max_value=10, value=5, step=1,
key=f"rating_{i}",
help="How persuasive was this message?"
)
edited_rows.append({
**row,
"redacted": edited_text,
"rating": rating # β¬
οΈ new column
})
convo_df = pd.DataFrame(edited_rows)
# Show wrapped DataFrame with rating included
styled_df = (
convo_df[["role", "redacted", "sentiment", "rating", "create_time"]]
.style.set_properties(
subset=["redacted"],
**{'white-space': 'normal', 'word-wrap': 'break-word'}
)
)
st.dataframe(styled_df, use_container_width=True)
# Optional: Save to MongoDB
if st.button("π₯ Save Conversation to Database"):
with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
db = client.bridge
collection = db.app
record = {
"conversation_id": convo_df["conversation_id"].iloc[0],
"title": selected_title,
"inserted_at": datetime.utcnow(),
"messages": convo_df.to_dict(orient="records") # now includes rating
}
collection.insert_one(record)
st.success(f"β
Conversation '{selected_title}' saved to MongoDB with ratings.")
|