import streamlit as st import requests import pandas as pd import io import os from PIL import Image import time # Configure page st.set_page_config( page_title="PromptPrepML - Auto ML Data Preprocessing", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) # API base URLs - try deployed backend first, fallback to localhost DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app" LOCAL_BACKEND = "http://localhost:8000" def check_backend_health(): """Check if backend is running (try deployed first, then local)""" backends = [DEPLOYED_BACKEND, LOCAL_BACKEND] for backend_url in backends: try: response = requests.get(f"{backend_url}/health", timeout=5) if response.status_code == 200: st.session_state.backend_url = backend_url return True, backend_url except: continue return False, None def upload_dataset(uploaded_file): """Upload dataset to backend""" if 'backend_url' not in st.session_state: return None, "Backend not connected" try: files = {'file': uploaded_file} response = requests.post(f"{st.session_state.backend_url}/upload-dataset", files=files) if response.status_code == 200: return response.json(), None else: return None, f"Upload failed: {response.text}" except Exception as e: return None, f"Upload error: {str(e)}" def process_pipeline(uploaded_file, prompt): """Process dataset through ML pipeline""" if 'backend_url' not in st.session_state: return None, "Backend not connected" try: files = {'file': uploaded_file} data = {'prompt': prompt} response = requests.post(f"{st.session_state.backend_url}/process-pipeline", files=files, data=data) if response.status_code == 200: return response.json(), None else: return None, f"Processing failed: {response.text}" except Exception as e: return None, f"Processing error: {str(e)}" def download_file(filename): """Download processed file""" if 'backend_url' not in st.session_state: return None, "Backend not connected" try: response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}") if response.status_code == 200: return response.content, None else: return None, f"Download failed: {response.text}" except Exception as e: return None, f"Download error: {str(e)}" def main(): # Main header st.markdown('

🤖 PromptPrepML

', unsafe_allow_html=True) st.markdown('

Convert natural language prompts into ML-ready datasets

', unsafe_allow_html=True) # Check backend health backend_healthy, backend_url = check_backend_health() if not backend_healthy: st.error("❌ Backend is not running! Please start the backend:") st.code(""" cd promptprepml/backend venv\\Scripts\\activate python app/main.py # OR wait for deployed backend to be ready """) st.info("🚀 **Deploying backend to cloud...** This will make the app work standalone!") return st.success(f"✅ Backend connected at: {backend_url}") # Sidebar for navigation st.sidebar.title("📋 Processing Steps") # Initialize session state if 'step' not in st.session_state: st.session_state.step = 'upload' if 'upload_result' not in st.session_state: st.session_state.upload_result = None if 'processing_result' not in st.session_state: st.session_state.processing_result = None # Step indicators steps = ['📤 Upload', '⚙️ Configure', '🚀 Process', '📊 Results'] current_step_index = 0 if st.session_state.step == 'upload': current_step_index = 0 elif st.session_state.step == 'configure': current_step_index = 1 elif st.session_state.step == 'process': current_step_index = 2 elif st.session_state.step == 'results': current_step_index = 3 # Display step indicators for i, step in enumerate(steps): if i < current_step_index: st.sidebar.success(f"✅ {step}") elif i == current_step_index: st.sidebar.info(f"🔄 {step}") else: st.sidebar.write(f"⏳ {step}") # Step 1: Upload Dataset if st.session_state.step == 'upload': st.markdown('

📤 Step 1: Upload Dataset

', unsafe_allow_html=True) uploaded_file = st.file_uploader( "Choose a CSV file", type=['csv'], help="Upload your dataset for preprocessing" ) if uploaded_file is not None: st.info(f"📄 File uploaded: `{uploaded_file.name}`") # Show file preview try: df = pd.read_csv(uploaded_file) st.markdown('

', unsafe_allow_html=True) st.markdown(f"**Dataset Shape:** {df.shape}") st.markdown(f"**Columns:** {', '.join(df.columns)}") st.dataframe(df.head()) st.markdown('

', unsafe_allow_html=True) if st.button("🚀 Continue to Configuration", type="primary"): # Upload to backend with st.spinner("Uploading dataset..."): result, error = upload_dataset(uploaded_file) if error: st.error(f"❌ Upload failed: {error}") else: st.session_state.upload_result = result st.session_state.step = 'configure' st.rerun() except Exception as e: st.error(f"❌ Error reading file: {str(e)}") # Step 2: Configure Processing elif st.session_state.step == 'configure': st.markdown('

⚙️ Step 2: Configure Processing

', unsafe_allow_html=True) if st.session_state.upload_result: file_info = st.session_state.upload_result st.markdown('

', unsafe_allow_html=True) st.markdown(f"**File:** {file_info.get('filename', 'Unknown')}") st.markdown(f"**Size:** {file_info.get('size', 'Unknown')} bytes") st.markdown('

', unsafe_allow_html=True) # Processing options prompt = st.text_area( "Describe your preprocessing needs:", value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.", height=100, help="Describe what you want to do with your dataset in natural language" ) col1, col2 = st.columns([1, 1]) with col1: if st.button("⬅️ Back", type="secondary"): st.session_state.step = 'upload' st.rerun() with col2: if st.button("🚀 Start Processing", type="primary"): if uploaded_file is not None: with st.spinner("Processing dataset... This may take a few minutes."): result, error = process_pipeline(uploaded_file, prompt) if error: st.error(f"❌ Processing failed: {error}") else: st.session_state.processing_result = result st.session_state.step = 'results' st.rerun() # Step 3: Results elif st.session_state.step == 'results': st.markdown('

📊 Step 3: Results

', unsafe_allow_html=True) if st.session_state.processing_result: result = st.session_state.processing_result # Success message st.markdown('

', unsafe_allow_html=True) st.success("✅ Dataset processed successfully!") st.markdown('

', unsafe_allow_html=True) # Results summary col1, col2 = st.columns([2, 1]) with col1: st.markdown("### 📈 Processing Summary") dataset_info = result.get('dataset_info', {}) if dataset_info: basic_info = dataset_info.get('basic_info', {}) st.markdown(f"- **Original Shape:** {basic_info.get('shape', 'Unknown')}") st.markdown(f"- **Columns:** {basic_info.get('columns', 'Unknown')}") preprocessing_info = result.get('preprocessing_info', {}) if preprocessing_info: st.markdown(f"- **Processed Shape:** {preprocessing_info.get('processed_shape', 'Unknown')}") # Dataset preview st.markdown("### 👀 Dataset Preview") preview_data = result.get('preview_data', []) if preview_data: df_preview = pd.DataFrame(preview_data) st.dataframe(df_preview) with col2: st.markdown("### 📥 Download Files") download_links = [ ("Processed Dataset", "processed_dataset.csv"), ("Training Set", "train.csv"), ("Test Set", "test.csv"), ("Pipeline", "pipeline.pkl"), ("EDA Report", "eda_report.html") ] for name, filename in download_links: if st.button(f"📥 {name}", key=f"download_{filename}"): with st.spinner(f"Downloading {filename}..."): file_content, error = download_file(filename) if error: st.error(f"❌ Download failed: {error}") else: st.download_button( label=f"💾 Save {filename}", data=file_content, file_name=filename, mime="application/octet-stream" ) # Action buttons col1, col2 = st.columns([1, 1]) with col1: if st.button("🔄 Process New Dataset", type="secondary"): # Reset session state for key in list(st.session_state.keys()): del st.session_state[key] st.session_state.step = 'upload' st.rerun() with col2: if st.button("📈 View EDA Report", type="primary"): st.info("📊 EDA Report feature coming soon!") # Footer st.markdown("---") st.markdown("""

PromptPrepML - Automated ML Data Preprocessing

Convert natural language prompts into ML-ready datasets

""", unsafe_allow_html=True) if __name__ == "__main__": main()