Learnerbegginer's picture
Fix API endpoints - use correct backend endpoints (/upload-dataset and /process-pipeline)
301288a
import streamlit as st
import requests
import pandas as pd
import io
import os
from PIL import Image
import time
# Configure page
st.set_page_config(
page_title="PromptPrepML - Auto ML Data Preprocessing",
page_icon="πŸ€–",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f2937;
text-align: center;
margin-bottom: 2rem;
}
.step-header {
font-size: 1.5rem;
font-weight: 600;
color: #374151;
margin: 1rem 0;
}
.success-box {
background-color: #f0fdf4;
border: 1px solid #bbf7d0;
border-radius: 0.5rem;
padding: 1rem;
margin: 1rem 0;
}
.info-box {
background-color: #eff6ff;
border: 1px solid #bfdbfe;
border-radius: 0.5rem;
padding: 1rem;
margin: 1rem 0;
}
.warning-box {
background-color: #fffbeb;
border: 1px solid #fed7aa;
border-radius: 0.5rem;
padding: 1rem;
margin: 1rem 0;
}
</style>
""", unsafe_allow_html=True)
# API base URLs - try deployed backend first, fallback to localhost
DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app"
LOCAL_BACKEND = "http://localhost:8000"
def check_backend_health():
"""Check if backend is running (try deployed first, then local)"""
backends = [DEPLOYED_BACKEND, LOCAL_BACKEND]
for backend_url in backends:
try:
response = requests.get(f"{backend_url}/health", timeout=5)
if response.status_code == 200:
st.session_state.backend_url = backend_url
return True, backend_url
except:
continue
return False, None
def upload_dataset(uploaded_file):
"""Upload dataset to backend"""
if 'backend_url' not in st.session_state:
return None, "Backend not connected"
try:
files = {'file': uploaded_file}
response = requests.post(f"{st.session_state.backend_url}/upload-dataset", files=files)
if response.status_code == 200:
return response.json(), None
else:
return None, f"Upload failed: {response.text}"
except Exception as e:
return None, f"Upload error: {str(e)}"
def process_pipeline(uploaded_file, prompt):
"""Process dataset through ML pipeline"""
if 'backend_url' not in st.session_state:
return None, "Backend not connected"
try:
files = {'file': uploaded_file}
data = {'prompt': prompt}
response = requests.post(f"{st.session_state.backend_url}/process-pipeline", files=files, data=data)
if response.status_code == 200:
return response.json(), None
else:
return None, f"Processing failed: {response.text}"
except Exception as e:
return None, f"Processing error: {str(e)}"
def download_file(filename):
"""Download processed file"""
if 'backend_url' not in st.session_state:
return None, "Backend not connected"
try:
response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}")
if response.status_code == 200:
return response.content, None
else:
return None, f"Download failed: {response.text}"
except Exception as e:
return None, f"Download error: {str(e)}"
def main():
# Main header
st.markdown('<h1 class="main-header">πŸ€– PromptPrepML</h1>', unsafe_allow_html=True)
st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True)
# Check backend health
backend_healthy, backend_url = check_backend_health()
if not backend_healthy:
st.error("❌ Backend is not running! Please start the backend:")
st.code("""
cd promptprepml/backend
venv\\Scripts\\activate
python app/main.py
# OR wait for deployed backend to be ready
""")
st.info("πŸš€ **Deploying backend to cloud...** This will make the app work standalone!")
return
st.success(f"βœ… Backend connected at: {backend_url}")
# Sidebar for navigation
st.sidebar.title("πŸ“‹ Processing Steps")
# Initialize session state
if 'step' not in st.session_state:
st.session_state.step = 'upload'
if 'upload_result' not in st.session_state:
st.session_state.upload_result = None
if 'processing_result' not in st.session_state:
st.session_state.processing_result = None
# Step indicators
steps = ['πŸ“€ Upload', 'βš™οΈ Configure', 'πŸš€ Process', 'πŸ“Š Results']
current_step_index = 0
if st.session_state.step == 'upload':
current_step_index = 0
elif st.session_state.step == 'configure':
current_step_index = 1
elif st.session_state.step == 'process':
current_step_index = 2
elif st.session_state.step == 'results':
current_step_index = 3
# Display step indicators
for i, step in enumerate(steps):
if i < current_step_index:
st.sidebar.success(f"βœ… {step}")
elif i == current_step_index:
st.sidebar.info(f"πŸ”„ {step}")
else:
st.sidebar.write(f"⏳ {step}")
# Step 1: Upload Dataset
if st.session_state.step == 'upload':
st.markdown('<h2 class="step-header">πŸ“€ Step 1: Upload Dataset</h2>', unsafe_allow_html=True)
uploaded_file = st.file_uploader(
"Choose a CSV file",
type=['csv'],
help="Upload your dataset for preprocessing"
)
if uploaded_file is not None:
st.info(f"πŸ“„ File uploaded: `{uploaded_file.name}`")
# Show file preview
try:
df = pd.read_csv(uploaded_file)
st.markdown('<div class="info-box">', unsafe_allow_html=True)
st.markdown(f"**Dataset Shape:** {df.shape}")
st.markdown(f"**Columns:** {', '.join(df.columns)}")
st.dataframe(df.head())
st.markdown('</div>', unsafe_allow_html=True)
if st.button("πŸš€ Continue to Configuration", type="primary"):
# Upload to backend
with st.spinner("Uploading dataset..."):
result, error = upload_dataset(uploaded_file)
if error:
st.error(f"❌ Upload failed: {error}")
else:
st.session_state.upload_result = result
st.session_state.step = 'configure'
st.rerun()
except Exception as e:
st.error(f"❌ Error reading file: {str(e)}")
# Step 2: Configure Processing
elif st.session_state.step == 'configure':
st.markdown('<h2 class="step-header">βš™οΈ Step 2: Configure Processing</h2>', unsafe_allow_html=True)
if st.session_state.upload_result:
file_info = st.session_state.upload_result
st.markdown('<div class="info-box">', unsafe_allow_html=True)
st.markdown(f"**File:** {file_info.get('filename', 'Unknown')}")
st.markdown(f"**Size:** {file_info.get('size', 'Unknown')} bytes")
st.markdown('</div>', unsafe_allow_html=True)
# Processing options
prompt = st.text_area(
"Describe your preprocessing needs:",
value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
height=100,
help="Describe what you want to do with your dataset in natural language"
)
col1, col2 = st.columns([1, 1])
with col1:
if st.button("⬅️ Back", type="secondary"):
st.session_state.step = 'upload'
st.rerun()
with col2:
if st.button("πŸš€ Start Processing", type="primary"):
if uploaded_file is not None:
with st.spinner("Processing dataset... This may take a few minutes."):
result, error = process_pipeline(uploaded_file, prompt)
if error:
st.error(f"❌ Processing failed: {error}")
else:
st.session_state.processing_result = result
st.session_state.step = 'results'
st.rerun()
# Step 3: Results
elif st.session_state.step == 'results':
st.markdown('<h2 class="step-header">πŸ“Š Step 3: Results</h2>', unsafe_allow_html=True)
if st.session_state.processing_result:
result = st.session_state.processing_result
# Success message
st.markdown('<div class="success-box">', unsafe_allow_html=True)
st.success("βœ… Dataset processed successfully!")
st.markdown('</div>', unsafe_allow_html=True)
# Results summary
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("### πŸ“ˆ Processing Summary")
dataset_info = result.get('dataset_info', {})
if dataset_info:
basic_info = dataset_info.get('basic_info', {})
st.markdown(f"- **Original Shape:** {basic_info.get('shape', 'Unknown')}")
st.markdown(f"- **Columns:** {basic_info.get('columns', 'Unknown')}")
preprocessing_info = result.get('preprocessing_info', {})
if preprocessing_info:
st.markdown(f"- **Processed Shape:** {preprocessing_info.get('processed_shape', 'Unknown')}")
# Dataset preview
st.markdown("### πŸ‘€ Dataset Preview")
preview_data = result.get('preview_data', [])
if preview_data:
df_preview = pd.DataFrame(preview_data)
st.dataframe(df_preview)
with col2:
st.markdown("### πŸ“₯ Download Files")
download_links = [
("Processed Dataset", "processed_dataset.csv"),
("Training Set", "train.csv"),
("Test Set", "test.csv"),
("Pipeline", "pipeline.pkl"),
("EDA Report", "eda_report.html")
]
for name, filename in download_links:
if st.button(f"πŸ“₯ {name}", key=f"download_{filename}"):
with st.spinner(f"Downloading {filename}..."):
file_content, error = download_file(filename)
if error:
st.error(f"❌ Download failed: {error}")
else:
st.download_button(
label=f"πŸ’Ύ Save {filename}",
data=file_content,
file_name=filename,
mime="application/octet-stream"
)
# Action buttons
col1, col2 = st.columns([1, 1])
with col1:
if st.button("πŸ”„ Process New Dataset", type="secondary"):
# Reset session state
for key in list(st.session_state.keys()):
del st.session_state[key]
st.session_state.step = 'upload'
st.rerun()
with col2:
if st.button("πŸ“ˆ View EDA Report", type="primary"):
st.info("πŸ“Š EDA Report feature coming soon!")
# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #6b7280; margin-top: 2rem;">
<p><strong>PromptPrepML</strong> - Automated ML Data Preprocessing</p>
<p><small>Convert natural language prompts into ML-ready datasets</small></p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()