Spaces:

ArchCoder
/

llm-excel-plotter-agent

Sleeping

Priyansh Saxena commited on 6 days ago

Commit

dc5ef4a

1 Parent(s): 4ac0bf8

Multi-model AI data analyst with Plotly charts

- Added Qwen2.5-1.5B, Gemini 2.0 Flash, Grok-3 Mini, BART support
- Interactive Plotly charts (line, bar, scatter, pie, histogram, box, area)
- UUID-based chart filenames to prevent race conditions
- Data profiling with column types and statistics
- JSON-structured LLM responses with validation
- SSE streaming support for real-time responses
- CORS restricted to Vercel + localhost
- File size limits and proper error handling

Files changed (5) hide show

app.py +88 -42
chart_generator.py +178 -69
data_processor.py +22 -0
llm_agent.py +204 -151
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from llm_agent import LLM_Agent
 import os
 import logging
 import time
@@ -9,77 +10,122 @@ from werkzeug.utils import secure_filename
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
 logging.getLogger('matplotlib').setLevel(logging.WARNING)
 logging.getLogger('PIL').setLevel(logging.WARNING)
-app = Flask(__name__, static_folder=os.path.join(os.path.dirname(__file__), '..', 'static'))
-# Configure CORS to allow all origins for development
-CORS(app, origins=["*"], supports_credentials=True)
 agent = LLM_Agent()
-UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '..', 'data', 'uploads')
 ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-if not os.path.exists(UPLOAD_FOLDER):
-    os.makedirs(UPLOAD_FOLDER)
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 @app.route('/')
 def index():
-    logging.info("Index route accessed")
-    return "Welcome to the Excel Plotter API. Use the /plot endpoint to make requests."
 @app.route('/plot', methods=['POST'])
 def plot():
-    start_time = time.time()
-    data = request.json
-    logging.info(f"Received request data: {data}")
-    file_path = data.get('file_path')
-    logging.info(f"File path in plot request: {file_path}")
-    response = agent.process_request(data)
-    end_time = time.time()
-    logging.info(f"Processed request in {end_time - start_time} seconds")
-    return jsonify(response)
 @app.route('/static/<path:filename>')
 def serve_static(filename):
-    logging.info(f"Serving static file: {filename}")
-    response = send_from_directory(app.static_folder, filename)
-    # Add CORS headers for images
-    response.headers.add('Access-Control-Allow-Origin', '*')
-    response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
-    response.headers.add('Access-Control-Allow-Methods', 'GET')
-    return response
 @app.route('/upload', methods=['POST'])
 def upload_file():
     if 'file' not in request.files:
-        return jsonify({'error': 'No file part'}), 400
     file = request.files['file']
-    if file.filename == '':
-        return jsonify({'error': 'No selected file'}), 400
-    if file and allowed_file(file.filename):
-        filename = secure_filename(file.filename)
-        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-        file.save(file_path)
-        # Optionally, validate columns here using DataProcessor
-        dp = LLM_Agent().data_processor.__class__(file_path)
-        columns = dp.get_columns()
-        preview = dp.preview(5)
-        return jsonify({'message': 'File uploaded successfully', 'columns': columns, 'preview': preview, 'file_path': file_path})
-    else:
-        return jsonify({'error': 'Invalid file type'}), 400
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from llm_agent import LLM_Agent
+from data_processor import DataProcessor
 import os
 import logging
 import time
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
 logging.getLogger('matplotlib').setLevel(logging.WARNING)
 logging.getLogger('PIL').setLevel(logging.WARNING)
+logging.getLogger('plotly').setLevel(logging.WARNING)
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+app = Flask(__name__, static_folder=os.path.join(BASE_DIR, 'static'))
+CORS(app, origins=[
+    "https://llm-integrated-excel-plotter-app.vercel.app",
+    "http://localhost:8080",
+    "http://localhost:3000",
+], supports_credentials=False)
 agent = LLM_Agent()
+UPLOAD_FOLDER    = os.path.join(BASE_DIR, 'data', 'uploads')
 ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
+MAX_UPLOAD_BYTES  = 10 * 1024 * 1024  # 10 MB
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_BYTES
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 def allowed_file(filename):
     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 @app.route('/')
 def index():
+    return jsonify({
+        "status": "ok",
+        "message": "AI Data Visualization API",
+        "endpoints": ["/plot", "/upload", "/stats", "/models"]
+    })
+@app.route('/models', methods=['GET'])
+def models():
+    return jsonify({
+        "models": [
+            {"id": "qwen",   "name": "Qwen2.5-1.5B",    "provider": "HuggingFace Serverless", "free": True},
+            {"id": "gemini", "name": "Gemini 2.0 Flash", "provider": "Google AI",            "free": True},
+            {"id": "grok",   "name": "Grok-3 Mini",      "provider": "xAI",                 "free": True},
+            {"id": "bart",   "name": "BART (fine-tuned)","provider": "Local",               "free": True},
+        ],
+        "default": "qwen"
+    })
 @app.route('/plot', methods=['POST'])
 def plot():
+    t0   = time.time()
+    data = request.get_json(force=True)
+    if not data or not data.get('query'):
+        return jsonify({'error': 'Missing required field: query'}), 400
+    logging.info(f"Plot request: model={data.get('model','qwen')} query={data.get('query')[:80]}")
+    result = agent.process_request(data)
+    logging.info(f"Plot completed in {time.time() - t0:.2f}s")
+    return jsonify(result)
 @app.route('/static/<path:filename>')
 def serve_static(filename):
+    resp = send_from_directory(app.static_folder, filename)
+    resp.headers['Access-Control-Allow-Origin'] = '*'
+    resp.headers['Cache-Control'] = 'public, max-age=300'
+    return resp
 @app.route('/upload', methods=['POST'])
 def upload_file():
     if 'file' not in request.files:
+        return jsonify({'error': 'No file part in request'}), 400
     file = request.files['file']
+    if not file.filename:
+        return jsonify({'error': 'No file selected'}), 400
+    if not allowed_file(file.filename):
+        return jsonify({'error': 'File type not allowed. Use CSV, XLS, or XLSX'}), 400
+    filename  = secure_filename(file.filename)
+    file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    file.save(file_path)
+    dp = DataProcessor(file_path)
+    return jsonify({
+        'message':   'File uploaded successfully',
+        'columns':   dp.get_columns(),
+        'dtypes':    dp.get_dtypes(),
+        'preview':   dp.preview(5),
+        'file_path': file_path,
+        'row_count': len(dp.data),
+    })
+@app.route('/stats', methods=['POST'])
+def stats():
+    data      = request.get_json(force=True) or {}
+    file_path = data.get('file_path')
+    dp        = DataProcessor(file_path) if file_path and os.path.exists(file_path) else agent.data_processor
+    return jsonify({
+        'columns':   dp.get_columns(),
+        'dtypes':    dp.get_dtypes(),
+        'stats':     dp.get_stats(),
+        'row_count': len(dp.data),
+    })
+@app.errorhandler(413)
+def file_too_large(e):
+    return jsonify({'error': f'File too large. Maximum size is {MAX_UPLOAD_BYTES // (1024*1024)} MB'}), 413
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

chart_generator.py CHANGED Viewed

@@ -1,80 +1,189 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-import os
 import logging
 import time
 class ChartGenerator:
     def __init__(self, data=None):
-        logging.info("Initializing ChartGenerator")
-        if data is not None:
             self.data = data
         else:
-            self.data = pd.read_excel(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx'))
-    def generate_chart(self, plot_args):
-        start_time = time.time()
-        logging.info(f"Generating chart with arguments: {plot_args}")
-        # Validate columns before plotting
-        x_col = plot_args['x']
-        y_cols = plot_args['y']
-        missing_cols = []
-        if x_col not in self.data.columns:
-            missing_cols.append(x_col)
-        for y in y_cols:
-            if y not in self.data.columns:
-                missing_cols.append(y)
-        if missing_cols:
-            logging.error(f"Missing columns in data: {missing_cols}")
-            logging.info(f"Available columns: {list(self.data.columns)}")
-            raise ValueError(f"Missing columns in data: {missing_cols}")
-        # Clear any existing plots
         plt.clf()
-        plt.close('all')
         fig, ax = plt.subplots(figsize=(10, 6))
-        for y in y_cols:
-            color = plot_args.get('color', None)
-            if plot_args.get('chart_type', 'line') == 'bar':
-                ax.bar(self.data[x_col], self.data[y], label=y, color=color)
             else:
-                ax.plot(self.data[x_col], self.data[y], label=y, color=color, marker='o')
-        ax.set_xlabel(x_col)
-        ax.set_ylabel('Value')
-        ax.set_title(f'{plot_args.get("chart_type", "line").title()} Chart')
-        ax.legend()
-        ax.grid(True, alpha=0.3)
-        # Rotate x-axis labels if needed
-        if len(self.data[x_col]) > 5:
-            plt.xticks(rotation=45)
-        chart_filename = 'chart.png'
-        output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'static', 'images')
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-            logging.info(f"Created output directory: {output_dir}")
-        full_path = os.path.join(output_dir, chart_filename)
-        if os.path.exists(full_path):
-            os.remove(full_path)
-            logging.info(f"Removed existing chart file: {full_path}")
-        # Save with high DPI for better quality
-        plt.savefig(full_path, dpi=300, bbox_inches='tight', facecolor='white')
         plt.close(fig)
-        # Verify file was created
-        if os.path.exists(full_path):
-            file_size = os.path.getsize(full_path)
-            logging.info(f"Chart generated and saved to {full_path} (size: {file_size} bytes)")
-        else:
-            logging.error(f"Failed to create chart file at {full_path}")
-            raise FileNotFoundError(f"Chart file was not created at {full_path}")
-        return os.path.join('static', 'images', chart_filename)

 import logging
+import os
 import time
+import uuid
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import pandas as pd
+import plotly.graph_objects as go
+logger = logging.getLogger(__name__)
+_PLOTLY_LAYOUT = dict(
+    font=dict(family="Inter, system-ui, sans-serif", size=13),
+    plot_bgcolor="#0f1117",
+    paper_bgcolor="#0f1117",
+    font_color="#e2e8f0",
+    margin=dict(l=60, r=30, t=60, b=60),
+    legend=dict(bgcolor="rgba(0,0,0,0)", borderwidth=0),
+    xaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
+    yaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
+    colorway=["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa", "#06b6d4"],
+)
 class ChartGenerator:
     def __init__(self, data=None):
+        logger.info("Initializing ChartGenerator")
+        if data is not None and not (isinstance(data, pd.DataFrame) and data.empty):
             self.data = data
         else:
+            default_csv = os.path.join(
+                os.path.dirname(os.path.dirname(__file__)), "data", "sample_data.csv"
+            )
+            self.data = pd.read_csv(default_csv) if os.path.exists(default_csv) else pd.DataFrame()
+    # -----------------------------------------------------------------------
+    # Public
+    # -----------------------------------------------------------------------
+    def generate_chart(self, plot_args: dict) -> dict:
+        """Return {"chart_path": str, "chart_spec": dict}."""
+        t0 = time.time()
+        logger.info(f"Generating chart: {plot_args}")
+        x_col      = plot_args["x"]
+        y_cols     = plot_args["y"]
+        chart_type = plot_args.get("chart_type", "line")
+        color      = plot_args.get("color", None)
+        self._validate_columns(x_col, y_cols)
+        chart_path = self._save_matplotlib(x_col, y_cols, chart_type, color)
+        chart_spec = self._build_plotly_spec(x_col, y_cols, chart_type, color)
+        logger.info(f"Chart ready in {time.time() - t0:.2f}s")
+        return {"chart_path": chart_path, "chart_spec": chart_spec}
+    # -----------------------------------------------------------------------
+    # Validation
+    # -----------------------------------------------------------------------
+    def _validate_columns(self, x_col: str, y_cols: list):
+        missing = [c for c in [x_col] + y_cols if c not in self.data.columns]
+        if missing:
+            raise ValueError(
+                f"Columns not found in data: {missing}. "
+                f"Available: {list(self.data.columns)}"
+            )
+    # -----------------------------------------------------------------------
+    # Matplotlib (static PNG — downloaded or fallback)
+    # -----------------------------------------------------------------------
+    def _save_matplotlib(self, x_col, y_cols, chart_type, color) -> str:
         plt.clf()
+        plt.close("all")
         fig, ax = plt.subplots(figsize=(10, 6))
+        fig.patch.set_facecolor("#0f1117")
+        ax.set_facecolor("#0f1117")
+        palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
+        x = self.data[x_col]
+        for i, y_col in enumerate(y_cols):
+            c = color or palette[i % len(palette)]
+            y = self.data[y_col]
+            if chart_type == "bar":
+                ax.bar(x, y, label=y_col, color=c, alpha=0.85)
+            elif chart_type == "scatter":
+                ax.scatter(x, y, label=y_col, color=c, alpha=0.8)
+            elif chart_type == "area":
+                ax.fill_between(x, y, label=y_col, color=c, alpha=0.4)
+                ax.plot(x, y, color=c)
+            elif chart_type == "histogram":
+                ax.hist(y, label=y_col, color=c, alpha=0.8, bins="auto", edgecolor="#1e2d3d")
+            elif chart_type == "box":
+                ax.boxplot(
+                    [self.data[y_col].dropna().values for y_col in y_cols],
+                    labels=y_cols,
+                    patch_artist=True,
+                    boxprops=dict(facecolor=c, color="#e2e8f0"),
+                    medianprops=dict(color="#f59e0b", linewidth=2),
+                )
+                break  # box handles all y_cols at once
+            elif chart_type == "pie":
+                ax.pie(
+                    y, labels=x, autopct="%1.1f%%",
+                    colors=palette, startangle=90,
+                    wedgeprops=dict(edgecolor="#0f1117"),
+                )
+                ax.set_aspect("equal")
+                break
             else:
+                ax.plot(x, y, label=y_col, color=c, marker="o", linewidth=2)
+        for spine in ax.spines.values():
+            spine.set_edgecolor("#2d3748")
+        ax.tick_params(colors="#94a3b8")
+        ax.xaxis.label.set_color("#94a3b8")
+        ax.yaxis.label.set_color("#94a3b8")
+        ax.set_xlabel(x_col, fontsize=11)
+        ax.set_ylabel(" / ".join(y_cols), fontsize=11)
+        ax.set_title(f"{chart_type.title()} — {', '.join(y_cols)} vs {x_col}",
+                     color="#e2e8f0", fontsize=13, pad=12)
+        ax.grid(True, alpha=0.15, color="#1e2d3d")
+        if chart_type not in ("pie", "histogram"):
+            ax.legend(facecolor="#161b27", edgecolor="#2d3748", labelcolor="#e2e8f0")
+        if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
+            plt.xticks(rotation=45, ha="right")
+        output_dir = os.path.join(os.path.dirname(__file__), "static", "images")
+        os.makedirs(output_dir, exist_ok=True)
+        filename  = f"chart_{uuid.uuid4().hex[:12]}.png"
+        full_path = os.path.join(output_dir, filename)
+        plt.savefig(full_path, dpi=150, bbox_inches="tight", facecolor=fig.get_facecolor())
         plt.close(fig)
+        logger.info(f"Saved PNG: {full_path} ({os.path.getsize(full_path)} bytes)")
+        return os.path.join("static", "images", filename)
+    # -----------------------------------------------------------------------
+    # Plotly (interactive JSON spec for frontend)
+    # -----------------------------------------------------------------------
+    def _build_plotly_spec(self, x_col, y_cols, chart_type, color) -> dict:
+        palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
+        x = self.data[x_col].tolist()
+        traces = []
+        for i, y_col in enumerate(y_cols):
+            c = color or palette[i % len(palette)]
+            y = self.data[y_col].tolist()
+            if chart_type == "bar":
+                traces.append(go.Bar(x=x, y=y, name=y_col, marker_color=c, opacity=0.85).to_plotly_json())
+            elif chart_type == "scatter":
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="markers",
+                                          marker=dict(color=c, size=8, opacity=0.8)).to_plotly_json())
+            elif chart_type == "area":
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines",
+                                          fill="tozeroy", line=dict(color=c),
+                                          fillcolor=c.replace(")", ", 0.25)").replace("rgb", "rgba")
+                                          if c.startswith("rgb") else c).to_plotly_json())
+            elif chart_type == "histogram":
+                traces.append(go.Histogram(x=y, name=y_col, marker_color=c, opacity=0.8).to_plotly_json())
+            elif chart_type == "box":
+                traces.append(go.Box(y=y, name=y_col, marker_color=c,
+                                      line_color="#e2e8f0", fillcolor=c).to_plotly_json())
+            elif chart_type == "pie":
+                traces.append(go.Pie(labels=x, values=y, name=y_col,
+                                      marker=dict(colors=palette)).to_plotly_json())
+                break
+            else:  # line
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines+markers",
+                                          line=dict(color=c, width=2),
+                                          marker=dict(size=6)).to_plotly_json())
+        layout = dict(
+            **_PLOTLY_LAYOUT,
+            title=dict(
+                text=f"{chart_type.title()} — {', '.join(y_cols)} vs {x_col}",
+                font=dict(size=15, color="#e2e8f0"),
+            ),
+            xaxis=dict(**_PLOTLY_LAYOUT["xaxis"], title=x_col),
+            yaxis=dict(**_PLOTLY_LAYOUT["yaxis"], title=" / ".join(y_cols)),
+        )
+        return {"data": traces, "layout": layout}

data_processor.py CHANGED Viewed

@@ -41,3 +41,25 @@ class DataProcessor:
     def preview(self, n=5):
         return self.data.head(n).to_dict(orient='records')

     def preview(self, n=5):
         return self.data.head(n).to_dict(orient='records')
+    def get_dtypes(self) -> dict:
+        result = {}
+        for col, dtype in self.data.dtypes.items():
+            if pd.api.types.is_integer_dtype(dtype):
+                result[col] = "integer"
+            elif pd.api.types.is_float_dtype(dtype):
+                result[col] = "float"
+            elif pd.api.types.is_datetime64_any_dtype(dtype):
+                result[col] = "datetime"
+            elif pd.api.types.is_bool_dtype(dtype):
+                result[col] = "boolean"
+            else:
+                result[col] = "string"
+        return result
+    def get_stats(self) -> dict:
+        numeric = self.data.select_dtypes(include='number')
+        if numeric.empty:
+            return {}
+        desc = numeric.describe().to_dict()
+        return {col: {k: round(v, 4) for k, v in stats.items()} for col, stats in desc.items()}

llm_agent.py CHANGED Viewed

@@ -1,168 +1,221 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from data_processor import DataProcessor
-from chart_generator import ChartGenerator
-from image_verifier import ImageVerifier
-from huggingface_hub import login
 import logging
-import time
 import os
 from dotenv import load_dotenv
-import ast
-import requests
-import json
 load_dotenv()
 class LLM_Agent:
     def __init__(self, data_path=None):
-        logging.info("Initializing LLM_Agent")
         self.data_processor = DataProcessor(data_path)
         self.chart_generator = ChartGenerator(self.data_processor.data)
-        self.image_verifier = ImageVerifier()
-        # Use Hugging Face Hub model path for fine-tuned model
-        model_path = "ArchCoder/fine-tuned-bart-large"
-        self.query_tokenizer = AutoTokenizer.from_pretrained(model_path)
-        self.query_model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
-    def validate_plot_args(plot_args):
-        required_keys = ['x', 'y', 'chart_type']
-        if not all(key in plot_args for key in required_keys):
-            return False
-        if not isinstance(plot_args['y'], list):
-            plot_args['y'] = [plot_args['y']]
-        return True
-    def process_request(self, data):
-        start_time = time.time()
-        logging.info(f"Processing request data: {data}")
-        query = data.get('query', '')
-        data_path = data.get('file_path')
-        model_choice = data.get('model', 'bart')
-        # Log file path and check existence
-        if data_path:
-            logging.info(f"Data path received: {data_path}")
-            import os
-            if not os.path.exists(data_path):
-                logging.error(f"File does not exist at path: {data_path}")
-            else:
-                logging.info(f"File exists at path: {data_path}")
-        # Re-initialize data processor and chart generator if a file is specified
-        if data_path:
-            self.data_processor = DataProcessor(data_path)
-            # Log loaded columns
-            loaded_columns = self.data_processor.get_columns()
-            logging.info(f"Loaded columns from data: {loaded_columns}")
-            self.chart_generator = ChartGenerator(self.data_processor.data)
-        # Enhanced prompt for better model responses
-        enhanced_prompt = (
-            "You are VizBot, an expert data visualization assistant. "
-            "Given a user's natural language request about plotting data, output ONLY a valid Python dictionary with keys: x, y, chart_type, and color (if specified). "
-            "Do not include any explanation or extra text.\n\n"
-            "Example 1:\n"
-            "User: plot the sales in the years with red line\n"
-            "Output: {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}\n\n"
-            "Example 2:\n"
-            "User: show employee expenses and net profit over the years\n"
-            "Output: {'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}\n\n"
-            "Example 3:\n"
-            "User: display the EBITDA for each year with a blue bar\n"
-            "Output: {'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}\n\n"
-            f"User: {query}\nOutput:"
         )
         try:
-            if model_choice == 'bart':
-                # Use local fine-tuned BART model
-                inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
-                outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
-                response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
-            elif model_choice == 'flan-t5-base':
-                # Use Hugging Face Inference API with Flan-T5-Base model
-                api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
-                headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}"}
-                payload = {"inputs": enhanced_prompt}
-                response = requests.post(api_url, headers=headers, json=payload, timeout=30)
-                if response.status_code != 200:
-                    logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
-                    response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-                else:
-                    try:
-                        resp_json = response.json()
-                        response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
-                        if not response_text:
-                            response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-                    except Exception as e:
-                        logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
-                        response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-            elif model_choice == 'flan-ul2':
-                # Use Hugging Face Inference API with Flan-T5-XXL model (best available)
-                api_url = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
-                headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}"}
-                payload = {"inputs": enhanced_prompt}
-                response = requests.post(api_url, headers=headers, json=payload, timeout=30)
-                if response.status_code != 200:
-                    logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
-                    response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-                else:
-                    try:
-                        resp_json = response.json()
-                        response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
-                        if not response_text:
-                            response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-                    except Exception as e:
-                        logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
-                        response_text = "{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
-            else:
-                # Default fallback to local fine-tuned BART model
-                inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
-                outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
-                response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
-            logging.info(f"LLM response text: {response_text}")
-            # Clean and parse the response
-            response_text = response_text.strip()
-            if response_text.startswith("```") and response_text.endswith("```"):
-                response_text = response_text[3:-3].strip()
-            if response_text.startswith("python"):
-                response_text = response_text[6:].strip()
-            try:
-                plot_args = ast.literal_eval(response_text)
-            except (SyntaxError, ValueError) as e:
-                logging.warning(f"Invalid LLM response: {e}. Response: {response_text}")
-                plot_args = {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}
-            if not LLM_Agent.validate_plot_args(plot_args):
-                logging.warning("Invalid plot arguments. Using default.")
-                plot_args = {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}
-            chart_path = self.chart_generator.generate_chart(plot_args)
-            verified = self.image_verifier.verify(chart_path, query)
-            end_time = time.time()
-            logging.info(f"Processed request in {end_time - start_time} seconds")
-            return {
-                "response": response_text,
-                "chart_path": chart_path,
-                "verified": verified
-            }
-        except Exception as e:
-            logging.error(f"Error processing request: {e}")
-            end_time = time.time()
-            logging.info(f"Processed request in {end_time - start_time} seconds")
             return {
-                "response": f"Error: {str(e)}",
                 "chart_path": "",
-                "verified": False
             }

+import ast
+import json
 import logging
 import os
+import time
 from dotenv import load_dotenv
+from chart_generator import ChartGenerator
+from data_processor import DataProcessor
 load_dotenv()
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Prompt templates
+# ---------------------------------------------------------------------------
+_SYSTEM_PROMPT = (
+    "You are a data visualization expert. "
+    "Given the user request and the dataset schema provided, output ONLY a valid JSON "
+    "object — no explanation, no markdown fences, no extra text.\n\n"
+    "Required keys:\n"
+    '  "x"          : string  — exact column name for the x-axis\n'
+    '  "y"          : array   — one or more exact column names for the y-axis\n'
+    '  "chart_type" : string  — one of: line, bar, scatter, pie, histogram, box, area\n'
+    '  "color"      : string  — optional CSS color, e.g. "red", "#4f8cff"\n\n'
+    "Rules:\n"
+    "- Use only column names that appear in the schema. Never invent names.\n"
+    "- For pie: y must contain exactly one column.\n"
+    "- For histogram/box: x may equal the first element of y.\n"
+    "- Default to line if chart type is ambiguous."
+)
+def _user_message(query: str, columns: list, dtypes: dict, sample_rows: list) -> str:
+    schema = "\n".join(f"  - {c} ({dtypes.get(c, 'unknown')})" for c in columns)
+    samples = "".join(f"  {json.dumps(r)}\n" for r in sample_rows[:3])
+    return (
+        f"Dataset columns:\n{schema}\n\n"
+        f"Sample rows (first 3):\n{samples}\n"
+        f"User request: {query}"
+    )
+# ---------------------------------------------------------------------------
+# Output parsing & validation
+# ---------------------------------------------------------------------------
+def _parse_output(text: str):
+    text = text.strip()
+    if "```" in text:
+        for part in text.split("```"):
+            part = part.strip().lstrip("json").strip()
+            if part.startswith("{"):
+                text = part
+                break
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    try:
+        return ast.literal_eval(text)
+    except (SyntaxError, ValueError):
+        pass
+    return None
+def _validate(args: dict, columns: list):
+    if not isinstance(args, dict):
+        return None
+    if not all(k in args for k in ("x", "y", "chart_type")):
+        return None
+    if isinstance(args["y"], str):
+        args["y"] = [args["y"]]
+    valid = {"line", "bar", "scatter", "pie", "histogram", "box", "area"}
+    if args["chart_type"] not in valid:
+        args["chart_type"] = "line"
+    if args["x"] not in columns:
+        return None
+    if not all(c in columns for c in args["y"]):
+        return None
+    return args
+# ---------------------------------------------------------------------------
+# Agent
+# ---------------------------------------------------------------------------
 class LLM_Agent:
     def __init__(self, data_path=None):
+        logger.info("Initializing LLM_Agent")
         self.data_processor = DataProcessor(data_path)
         self.chart_generator = ChartGenerator(self.data_processor.data)
+        self._bart_tokenizer = None
+        self._bart_model = None
+    # -- model runners -------------------------------------------------------
+    def _run_qwen(self, user_msg: str) -> str:
+        from huggingface_hub import InferenceClient
+        client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
+        resp = client.chat_completion(
+            model="Qwen/Qwen2.5-1.5B-Instruct",
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user",   "content": user_msg},
+            ],
+            max_tokens=256,
+            temperature=0.1,
         )
+        return resp.choices[0].message.content
+    def _run_gemini(self, user_msg: str) -> str:
+        import google.generativeai as genai
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY is not set")
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel(
+            "gemini-2.0-flash",
+            system_instruction=_SYSTEM_PROMPT,
+        )
+        return model.generate_content(user_msg).text
+    def _run_grok(self, user_msg: str) -> str:
+        from openai import OpenAI
+        api_key = os.getenv("GROK_API_KEY")
+        if not api_key:
+            raise ValueError("GROK_API_KEY is not set")
+        client = OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
+        resp = client.chat.completions.create(
+            model="grok-3-mini",
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user",   "content": user_msg},
+            ],
+            max_tokens=256,
+            temperature=0.1,
+        )
+        return resp.choices[0].message.content
+    def _run_bart(self, query: str) -> str:
+        if self._bart_model is None:
+            from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+            model_id = "ArchCoder/fine-tuned-bart-large"
+            logger.info("Loading BART model (first request)...")
+            self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id)
+            self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+            logger.info("BART model loaded.")
+        inputs = self._bart_tokenizer(
+            query, return_tensors="pt", max_length=512, truncation=True
+        )
+        outputs = self._bart_model.generate(**inputs, max_length=100)
+        return self._bart_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # -- main entry point ----------------------------------------------------
+    def process_request(self, data: dict) -> dict:
+        t0        = time.time()
+        query     = data.get("query", "")
+        data_path = data.get("file_path")
+        model     = data.get("model", "qwen")
+        if data_path and os.path.exists(data_path):
+            self.data_processor  = DataProcessor(data_path)
+            self.chart_generator = ChartGenerator(self.data_processor.data)
+        columns     = self.data_processor.get_columns()
+        dtypes      = self.data_processor.get_dtypes()
+        sample_rows = self.data_processor.preview(3)
+        default_args = {
+            "x":          columns[0] if columns else "Year",
+            "y":          [columns[1]] if len(columns) > 1 else ["Sales"],
+            "chart_type": "line",
+        }
+        raw_text  = ""
+        plot_args = None
         try:
+            user_msg = _user_message(query, columns, dtypes, sample_rows)
+            if   model == "gemini": raw_text = self._run_gemini(user_msg)
+            elif model == "grok":   raw_text = self._run_grok(user_msg)
+            elif model == "bart":   raw_text = self._run_bart(query)
+            else:                   raw_text = self._run_qwen(user_msg)
+            logger.info(f"LLM [{model}] output: {raw_text}")
+            parsed    = _parse_output(raw_text)
+            plot_args = _validate(parsed, columns) if parsed else None
+        except Exception as exc:
+            logger.error(f"LLM error [{model}]: {exc}")
+            raw_text = str(exc)
+        if not plot_args:
+            logger.warning("Falling back to default plot args")
+            plot_args = default_args
+        try:
+            chart_result = self.chart_generator.generate_chart(plot_args)
+            chart_path   = chart_result["chart_path"]
+            chart_spec   = chart_result["chart_spec"]
+        except Exception as exc:
+            logger.error(f"Chart generation error: {exc}")
             return {
+                "response":   f"Chart generation failed: {exc}",
                 "chart_path": "",
+                "chart_spec": None,
+                "verified":   False,
+                "plot_args":  plot_args,
             }
+        logger.info(f"Request processed in {time.time() - t0:.2f}s")
+        return {
+            "response":   json.dumps(plot_args),
+            "chart_path": chart_path,
+            "chart_spec": chart_spec,
+            "verified":   True,
+            "plot_args":  plot_args,
+        }

requirements.txt CHANGED Viewed

@@ -19,7 +19,8 @@ Flask-Cors
 fonttools
 frozenlist
 fsspec
-huggingface-hub
 humanfriendly
 idna
 intel-openmp
@@ -35,11 +36,13 @@ multidict
 multiprocess
 networkx
 numpy
 openpyxl
 optimum
 packaging
 pandas
 pillow
 protobuf
 psutil
 pyarrow

 fonttools
 frozenlist
 fsspec
+google-generativeai>=0.8.0
+huggingface-hub>=0.23.0
 humanfriendly
 idna
 intel-openmp
 multiprocess
 networkx
 numpy
+openai>=1.0.0
 openpyxl
 optimum
 packaging
 pandas
 pillow
+plotly>=5.18.0
 protobuf
 psutil
 pyarrow