| | import streamlit as st |
| | import requests |
| | import pandas as pd |
| | import io |
| | import os |
| | from PIL import Image |
| | import time |
| |
|
| | |
| | st.set_page_config( |
| | page_title="PromptPrepML - Auto ML Data Preprocessing", |
| | page_icon="π€", |
| | layout="wide", |
| | initial_sidebar_state="expanded" |
| | ) |
| |
|
| | |
| | st.markdown(""" |
| | <style> |
| | .main-header { |
| | font-size: 2.5rem; |
| | font-weight: bold; |
| | color: #1f2937; |
| | text-align: center; |
| | margin-bottom: 2rem; |
| | } |
| | .step-header { |
| | font-size: 1.5rem; |
| | font-weight: 600; |
| | color: #374151; |
| | margin: 1rem 0; |
| | } |
| | .success-box { |
| | background-color: #f0fdf4; |
| | border: 1px solid #bbf7d0; |
| | border-radius: 0.5rem; |
| | padding: 1rem; |
| | margin: 1rem 0; |
| | } |
| | .info-box { |
| | background-color: #eff6ff; |
| | border: 1px solid #bfdbfe; |
| | border-radius: 0.5rem; |
| | padding: 1rem; |
| | margin: 1rem 0; |
| | } |
| | .warning-box { |
| | background-color: #fffbeb; |
| | border: 1px solid #fed7aa; |
| | border-radius: 0.5rem; |
| | padding: 1rem; |
| | margin: 1rem 0; |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| |
|
| | |
| | DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app" |
| | LOCAL_BACKEND = "http://localhost:8000" |
| |
|
| | def check_backend_health(): |
| | """Check if backend is running (try deployed first, then local)""" |
| | backends = [DEPLOYED_BACKEND, LOCAL_BACKEND] |
| | |
| | for backend_url in backends: |
| | try: |
| | response = requests.get(f"{backend_url}/health", timeout=5) |
| | if response.status_code == 200: |
| | st.session_state.backend_url = backend_url |
| | return True, backend_url |
| | except: |
| | continue |
| | return False, None |
| |
|
| | def upload_dataset(uploaded_file): |
| | """Upload dataset to backend""" |
| | if 'backend_url' not in st.session_state: |
| | return None, "Backend not connected" |
| | |
| | try: |
| | files = {'file': uploaded_file} |
| | response = requests.post(f"{st.session_state.backend_url}/upload-dataset", files=files) |
| | if response.status_code == 200: |
| | return response.json(), None |
| | else: |
| | return None, f"Upload failed: {response.text}" |
| | except Exception as e: |
| | return None, f"Upload error: {str(e)}" |
| |
|
| | def process_pipeline(uploaded_file, prompt): |
| | """Process dataset through ML pipeline""" |
| | if 'backend_url' not in st.session_state: |
| | return None, "Backend not connected" |
| | |
| | try: |
| | files = {'file': uploaded_file} |
| | data = {'prompt': prompt} |
| | response = requests.post(f"{st.session_state.backend_url}/process-pipeline", files=files, data=data) |
| | if response.status_code == 200: |
| | return response.json(), None |
| | else: |
| | return None, f"Processing failed: {response.text}" |
| | except Exception as e: |
| | return None, f"Processing error: {str(e)}" |
| |
|
| | def download_file(filename): |
| | """Download processed file""" |
| | if 'backend_url' not in st.session_state: |
| | return None, "Backend not connected" |
| | |
| | try: |
| | response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}") |
| | if response.status_code == 200: |
| | return response.content, None |
| | else: |
| | return None, f"Download failed: {response.text}" |
| | except Exception as e: |
| | return None, f"Download error: {str(e)}" |
| |
|
| | def main(): |
| | |
| | st.markdown('<h1 class="main-header">π€ PromptPrepML</h1>', unsafe_allow_html=True) |
| | st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True) |
| | |
| | |
| | backend_healthy, backend_url = check_backend_health() |
| | |
| | if not backend_healthy: |
| | st.error("β Backend is not running! Please start the backend:") |
| | st.code(""" |
| | cd promptprepml/backend |
| | venv\\Scripts\\activate |
| | python app/main.py |
| | |
| | # OR wait for deployed backend to be ready |
| | """) |
| | st.info("π **Deploying backend to cloud...** This will make the app work standalone!") |
| | return |
| | |
| | st.success(f"β
Backend connected at: {backend_url}") |
| | |
| | |
| | st.sidebar.title("π Processing Steps") |
| | |
| | |
| | if 'step' not in st.session_state: |
| | st.session_state.step = 'upload' |
| | if 'upload_result' not in st.session_state: |
| | st.session_state.upload_result = None |
| | if 'processing_result' not in st.session_state: |
| | st.session_state.processing_result = None |
| | |
| | |
| | steps = ['π€ Upload', 'βοΈ Configure', 'π Process', 'π Results'] |
| | current_step_index = 0 |
| | |
| | if st.session_state.step == 'upload': |
| | current_step_index = 0 |
| | elif st.session_state.step == 'configure': |
| | current_step_index = 1 |
| | elif st.session_state.step == 'process': |
| | current_step_index = 2 |
| | elif st.session_state.step == 'results': |
| | current_step_index = 3 |
| | |
| | |
| | for i, step in enumerate(steps): |
| | if i < current_step_index: |
| | st.sidebar.success(f"β
{step}") |
| | elif i == current_step_index: |
| | st.sidebar.info(f"π {step}") |
| | else: |
| | st.sidebar.write(f"β³ {step}") |
| | |
| | |
| | if st.session_state.step == 'upload': |
| | st.markdown('<h2 class="step-header">π€ Step 1: Upload Dataset</h2>', unsafe_allow_html=True) |
| | |
| | uploaded_file = st.file_uploader( |
| | "Choose a CSV file", |
| | type=['csv'], |
| | help="Upload your dataset for preprocessing" |
| | ) |
| | |
| | if uploaded_file is not None: |
| | st.info(f"π File uploaded: `{uploaded_file.name}`") |
| | |
| | |
| | try: |
| | df = pd.read_csv(uploaded_file) |
| | st.markdown('<div class="info-box">', unsafe_allow_html=True) |
| | st.markdown(f"**Dataset Shape:** {df.shape}") |
| | st.markdown(f"**Columns:** {', '.join(df.columns)}") |
| | st.dataframe(df.head()) |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | if st.button("π Continue to Configuration", type="primary"): |
| | |
| | with st.spinner("Uploading dataset..."): |
| | result, error = upload_dataset(uploaded_file) |
| | if error: |
| | st.error(f"β Upload failed: {error}") |
| | else: |
| | st.session_state.upload_result = result |
| | st.session_state.step = 'configure' |
| | st.rerun() |
| | except Exception as e: |
| | st.error(f"β Error reading file: {str(e)}") |
| | |
| | |
| | elif st.session_state.step == 'configure': |
| | st.markdown('<h2 class="step-header">βοΈ Step 2: Configure Processing</h2>', unsafe_allow_html=True) |
| | |
| | if st.session_state.upload_result: |
| | file_info = st.session_state.upload_result |
| | st.markdown('<div class="info-box">', unsafe_allow_html=True) |
| | st.markdown(f"**File:** {file_info.get('filename', 'Unknown')}") |
| | st.markdown(f"**Size:** {file_info.get('size', 'Unknown')} bytes") |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | prompt = st.text_area( |
| | "Describe your preprocessing needs:", |
| | value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.", |
| | height=100, |
| | help="Describe what you want to do with your dataset in natural language" |
| | ) |
| | |
| | col1, col2 = st.columns([1, 1]) |
| | with col1: |
| | if st.button("β¬
οΈ Back", type="secondary"): |
| | st.session_state.step = 'upload' |
| | st.rerun() |
| | |
| | with col2: |
| | if st.button("π Start Processing", type="primary"): |
| | if uploaded_file is not None: |
| | with st.spinner("Processing dataset... This may take a few minutes."): |
| | result, error = process_pipeline(uploaded_file, prompt) |
| | if error: |
| | st.error(f"β Processing failed: {error}") |
| | else: |
| | st.session_state.processing_result = result |
| | st.session_state.step = 'results' |
| | st.rerun() |
| | |
| | |
| | elif st.session_state.step == 'results': |
| | st.markdown('<h2 class="step-header">π Step 3: Results</h2>', unsafe_allow_html=True) |
| | |
| | if st.session_state.processing_result: |
| | result = st.session_state.processing_result |
| | |
| | |
| | st.markdown('<div class="success-box">', unsafe_allow_html=True) |
| | st.success("β
Dataset processed successfully!") |
| | st.markdown('</div>', unsafe_allow_html=True) |
| | |
| | |
| | col1, col2 = st.columns([2, 1]) |
| | |
| | with col1: |
| | st.markdown("### π Processing Summary") |
| | |
| | dataset_info = result.get('dataset_info', {}) |
| | if dataset_info: |
| | basic_info = dataset_info.get('basic_info', {}) |
| | st.markdown(f"- **Original Shape:** {basic_info.get('shape', 'Unknown')}") |
| | st.markdown(f"- **Columns:** {basic_info.get('columns', 'Unknown')}") |
| | |
| | preprocessing_info = result.get('preprocessing_info', {}) |
| | if preprocessing_info: |
| | st.markdown(f"- **Processed Shape:** {preprocessing_info.get('processed_shape', 'Unknown')}") |
| | |
| | |
| | st.markdown("### π Dataset Preview") |
| | preview_data = result.get('preview_data', []) |
| | if preview_data: |
| | df_preview = pd.DataFrame(preview_data) |
| | st.dataframe(df_preview) |
| | |
| | with col2: |
| | st.markdown("### π₯ Download Files") |
| | |
| | download_links = [ |
| | ("Processed Dataset", "processed_dataset.csv"), |
| | ("Training Set", "train.csv"), |
| | ("Test Set", "test.csv"), |
| | ("Pipeline", "pipeline.pkl"), |
| | ("EDA Report", "eda_report.html") |
| | ] |
| | |
| | for name, filename in download_links: |
| | if st.button(f"π₯ {name}", key=f"download_{filename}"): |
| | with st.spinner(f"Downloading {filename}..."): |
| | file_content, error = download_file(filename) |
| | if error: |
| | st.error(f"β Download failed: {error}") |
| | else: |
| | st.download_button( |
| | label=f"πΎ Save {filename}", |
| | data=file_content, |
| | file_name=filename, |
| | mime="application/octet-stream" |
| | ) |
| | |
| | |
| | col1, col2 = st.columns([1, 1]) |
| | with col1: |
| | if st.button("π Process New Dataset", type="secondary"): |
| | |
| | for key in list(st.session_state.keys()): |
| | del st.session_state[key] |
| | st.session_state.step = 'upload' |
| | st.rerun() |
| | |
| | with col2: |
| | if st.button("π View EDA Report", type="primary"): |
| | st.info("π EDA Report feature coming soon!") |
| | |
| | |
| | st.markdown("---") |
| | st.markdown(""" |
| | <div style="text-align: center; color: #6b7280; margin-top: 2rem;"> |
| | <p><strong>PromptPrepML</strong> - Automated ML Data Preprocessing</p> |
| | <p><small>Convert natural language prompts into ML-ready datasets</small></p> |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|