sf-api / app.py
sixfingerdev's picture
Update app.py
22e0453 verified
"""
Sixfinger Backend API - FRONTEND UYUMLU VERSİYON
Ultra-fast AI Chat Backend with Multi-Model Support
Supports: Groq, LLM7.io, OpenRouter
"""
import os
import time
import json
import logging
from typing import Optional, Dict, Any
from datetime import datetime
from fastapi import FastAPI, HTTPException, Header, Request
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from groq import Groq
from openai import OpenAI
# ========== CONFIGURATION ==========
API_VERSION = "1.1.0"
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("openrouter_api_key")
OPENROUTER_SITE_URL = os.getenv("OPENROUTER_SITE_URL", "https://sfapi.pythonanywhere.com")
OPENROUTER_SITE_NAME = os.getenv("OPENROUTER_SITE_NAME", "Sixfinger Backend")
# ========== API PROVIDERS ==========
PROVIDERS = {
"groq": {
"name": "Groq",
"type": "groq",
"requires_key": True
},
"llm7": {
"name": "LLM7.io",
"type": "openai_compatible",
"base_url": "https://api.llm7.io/v1",
"api_key": "unused",
"requires_key": False
},
"openrouter": {
"name": "OpenRouter",
"type": "openai_compatible",
"base_url": "https://openrouter.ai/api/v1",
"api_key": OPENROUTER_API_KEY,
"requires_key": True
}
}
# ========== MODEL MAPPING ==========
MODELS = {
# ============ FREE PLAN MODELS ============
# Groq Models (Free)
"llama-8b-instant": {
"provider": "groq",
"model_id": "llama-3.1-8b-instant",
"display_name": "Llama 3.1 8B Instant",
"size": "8B",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"description": "Hızlı ve hafif genel amaçlı model",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 14400
},
"allam-2-7b": {
"provider": "groq",
"model_id": "allam-2-7b",
"display_name": "Allam 2 7B",
"size": "7B",
"language": "Turkish/Arabic",
"speed": "⚡⚡",
"description": "Türkçe ve Arapça optimizeli model",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 300
},
"step-3.5-flash": {
"provider": "openrouter",
"model_id": "stepfun/step-3.5-flash:free",
"display_name": "Step 3.5 Flash (Free)",
"size": "Unknown",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"description": "OpenRouter uzerinden hizli StepFun modeli",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 1000
},
"nemotron-3-super-120b-a12b": {
"provider": "openrouter",
"model_id": "nvidia/nemotron-3-super-120b-a12b:free",
"display_name": "Nemotron 3 Super 120B A12B (Free)",
"size": "120B",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "OpenRouter uzerinden NVIDIA Nemotron modeli",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 1000
},
# ============ STARTER PLAN MODELS ============
# LLM7.io Models (Starter+)
"gpt4-nano": {
"provider": "llm7",
"model_id": "gpt-4.1-nano-2025-04-14",
"display_name": "GPT-4.1 Nano",
"size": "Nano",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"description": "OpenAI GPT-4 tabanlı hızlı model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
# Groq Models (Starter+)
"qwen3-32b": {
"provider": "groq",
"model_id": "llama-3.3-70b-versatile",
"display_name": "Qwen3 32B",
"size": "32B",
"language": "Turkish/Chinese",
"speed": "⚡⚡",
"description": "Türkçe ve Çince optimize edilmiş model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-70b": {
"provider": "groq",
"model_id": "llama-3.3-70b-versatile",
"display_name": "Llama 3.3 70B",
"size": "70B",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "Güçlü ve çok yönlü büyük model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-maverick-17b": {
"provider": "groq",
"model_id": "llama-3.1-8b-instant",
"display_name": "Llama Maverick 17B",
"size": "17B",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "Deneysel maverick model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-scout-17b": {
"provider": "groq",
"model_id": "llama-3.1-8b-instant",
"display_name": "Llama Scout 17B",
"size": "17B",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"description": "Keşif odaklı hızlı model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"gpt-oss-20b": {
"provider": "groq",
"model_id": "llama-3.1-8b-instant",
"display_name": "GPT-OSS 20B",
"size": "20B",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "Açık kaynak GPT alternatifleri",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"glm-4.5-air": {
"provider": "openrouter",
"model_id": "z-ai/glm-4.5-air:free",
"display_name": "GLM 4.5 Air (Free)",
"size": "Unknown",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "OpenRouter uzerinden GLM 4.5 Air modeli",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"qwen3-coder": {
"provider": "openrouter",
"model_id": "qwen/qwen3-coder:free",
"display_name": "Qwen3 Coder (Free)",
"size": "Unknown",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "OpenRouter uzerinden kod odakli Qwen modeli",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"lfm-2.5-1.2b-thinking": {
"provider": "openrouter",
"model_id": "liquid/lfm-2.5-1.2b-thinking:free",
"display_name": "LFM 2.5 1.2B Thinking (Free)",
"size": "1.2B",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"description": "OpenRouter uzerinden hizli dusunme odakli model",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
# ============ PRO PLAN MODELS ============
"gpt-oss-120b": {
"provider": "groq",
"model_id": "llama-3.3-70b-versatile",
"display_name": "GPT-OSS 120B",
"size": "120B",
"language": "Multilingual",
"speed": "⚡⚡",
"description": "En büyük açık kaynak model",
"plans": ["pro", "plus"],
"daily_limit": 1000
},
"kimi-k2": {
"provider": "groq",
"model_id": "llama-3.3-70b-versatile",
"display_name": "Kimi K2",
"size": "Unknown",
"language": "Chinese",
"speed": "⚡⚡",
"description": "Çince uzmanı güçlü model",
"plans": ["pro", "plus"],
"daily_limit": 1000
}
}
# Plan bazlı otomatik model seçimi
DEFAULT_MODELS = {
"free": "step-3.5-flash",
"starter": "glm-4.5-air",
"pro": "glm-4.5-air",
"plus": "glm-4.5-air"
}
# Primary provider başarısız olursa kullanılacak fallback modeli
# Key: birincil provider adı, Value: fallback olarak denenecek model key'i
PROVIDER_FALLBACK = {
"groq": "gpt4-nano", # Groq hata verirse → LLM7 üzerinden gpt4-nano
"llm7": "llama-8b-instant", # LLM7 hata verirse → Groq üzerinden llama-8b-instant
"openrouter": "llama-8b-instant"
}
# ========== LOGGING ==========
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# ========== FASTAPI APP ==========
app = FastAPI(
title="Sixfinger Backend API",
version=API_VERSION,
description="Ultra-fast AI Chat Backend with Multi-Provider Support",
docs_url="/docs",
redoc_url="/redoc"
)
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ========== API CLIENTS ==========
# Groq Client
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
# LLM7 Client
llm7_client = OpenAI(
base_url=PROVIDERS["llm7"]["base_url"],
api_key=PROVIDERS["llm7"]["api_key"]
)
# OpenRouter Client
openrouter_client = OpenAI(
base_url=PROVIDERS["openrouter"]["base_url"],
api_key=PROVIDERS["openrouter"]["api_key"]
) if OPENROUTER_API_KEY else None
# ========== PYDANTIC MODELS ==========
class ChatRequest(BaseModel):
prompt: str = Field(..., description="User's message")
max_tokens: int = Field(default=100, ge=1, le=4000)
temperature: float = Field(default=0.7, ge=0, le=2)
top_p: float = Field(default=0.9, ge=0, le=1)
system_prompt: Optional[str] = None
history: Optional[list] = None
class ChatResponse(BaseModel):
response: str
model: str
model_key: str
model_size: str
model_language: str
provider: str
attempts: int
usage: Dict[str, int]
parameters: Dict[str, Any]
# ========== HELPER FUNCTIONS ==========
def get_allowed_models(plan: str) -> list:
"""Plan'a göre izin verilen modelleri döndür"""
return [k for k, v in MODELS.items() if plan in v["plans"]]
def select_model(plan: str, preferred_model: Optional[str] = None) -> str:
"""Model seçimi yap"""
allowed_models = get_allowed_models(plan)
if preferred_model and preferred_model in allowed_models:
return preferred_model
default = DEFAULT_MODELS.get(plan, "llama-8b-instant")
return default if default in allowed_models else allowed_models[0]
def build_messages(prompt: str, system_prompt: Optional[str], history: Optional[list]) -> list:
"""Chat messages listesi oluştur"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
else:
messages.append({"role": "system", "content": "Sen yardımcı bir asistansın. Adın SixFinger."})
if history:
for msg in history:
if "role" in msg and "content" in msg:
messages.append(msg)
messages.append({"role": "user", "content": prompt})
return messages
def _chat_candidates(primary_key: str, primary_config: dict):
"""Primary provider'ı dene, başarısız olursa fallback'i yield et."""
yield primary_key, primary_config, False
fallback_key = PROVIDER_FALLBACK.get(primary_config["provider"])
if fallback_key and fallback_key != primary_key and fallback_key in MODELS:
yield fallback_key, MODELS[fallback_key], True
# ========== PROVIDER-SPECIFIC API CALLS ==========
def call_groq_api(
model_id: str,
messages: list,
max_tokens: int,
temperature: float,
top_p: float,
stream: bool = False
):
"""Groq API'ye istek at"""
if not groq_client:
raise HTTPException(status_code=500, detail="Groq API key not configured")
try:
response = groq_client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=stream
)
return response
except Exception as e:
logger.error(f"Groq API error: {e}")
raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
def call_llm7_api(
model_id: str,
messages: list,
max_tokens: int,
temperature: float,
top_p: float,
stream: bool = False
):
"""LLM7.io API'ye istek at"""
try:
response = llm7_client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=stream
)
return response
except Exception as e:
logger.error(f"LLM7 API error: {e}")
raise HTTPException(status_code=500, detail=f"LLM7 API error: {str(e)}")
def call_openrouter_api(
model_id: str,
messages: list,
max_tokens: int,
temperature: float,
top_p: float,
stream: bool = False
):
"""OpenRouter API'ye istek at"""
if not openrouter_client:
raise HTTPException(status_code=500, detail="OpenRouter API key not configured")
try:
response = openrouter_client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=stream,
extra_headers={
"HTTP-Referer": OPENROUTER_SITE_URL,
"X-OpenRouter-Title": OPENROUTER_SITE_NAME
}
)
return response
except Exception as e:
logger.error(f"OpenRouter API error: {e}")
raise HTTPException(status_code=500, detail=f"OpenRouter API error: {str(e)}")
def call_api(
provider: str,
model_id: str,
messages: list,
max_tokens: int,
temperature: float,
top_p: float,
stream: bool = False
):
"""Universal API caller - provider'a göre yönlendir"""
if provider == "groq":
return call_groq_api(model_id, messages, max_tokens, temperature, top_p, stream)
elif provider == "llm7":
return call_llm7_api(model_id, messages, max_tokens, temperature, top_p, stream)
elif provider == "openrouter":
return call_openrouter_api(model_id, messages, max_tokens, temperature, top_p, stream)
else:
raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
# ========== ENDPOINTS ==========
@app.get("/health")
def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"version": API_VERSION,
"timestamp": datetime.now().isoformat(),
"providers": {
"groq": bool(GROQ_API_KEY),
"llm7": True,
"openrouter": bool(OPENROUTER_API_KEY)
}
}
@app.post("/api/chat")
def chat(
request: ChatRequest,
x_user_plan: str = Header(default="free", alias="X-User-Plan"),
x_model: Optional[str] = Header(default=None, alias="X-Model")
):
"""
Normal chat endpoint (JSON response)
Frontend'e TAM UYUMLU format
"""
start_time = time.time()
# Model seçimi
model_key = select_model(x_user_plan, x_model)
model_config = MODELS[model_key]
provider = model_config["provider"]
model_id = model_config["model_id"]
logger.info(f"Chat request: plan={x_user_plan}, model={model_key}, provider={provider}")
# Messages
messages = build_messages(
request.prompt,
request.system_prompt,
request.history
)
attempts = 0
last_error = None
for attempt_model_key, attempt_model_config, is_fallback in _chat_candidates(model_key, model_config):
attempts += 1
attempt_provider = attempt_model_config["provider"]
attempt_model_id = attempt_model_config["model_id"]
if is_fallback:
logger.warning(f"Primary provider failed, retrying with fallback: {attempt_model_key} via {attempt_provider}")
try:
response = call_api(
provider=attempt_provider,
model_id=attempt_model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=False
)
content = response.choices[0].message.content
usage = {
"prompt_tokens": getattr(response.usage, 'prompt_tokens', 0),
"completion_tokens": getattr(response.usage, 'completion_tokens', 0),
"total_tokens": getattr(response.usage, 'total_tokens', 0)
}
elapsed = time.time() - start_time
logger.info(f"Chat completed: provider={attempt_provider}, model={attempt_model_key}, tokens={usage['total_tokens']}, time={elapsed:.2f}s, attempts={attempts}")
return {
"response": content,
"model": attempt_model_id,
"model_key": attempt_model_key,
"model_size": attempt_model_config["size"],
"model_language": attempt_model_config["language"],
"fallback_used": is_fallback,
"attempts": attempts,
"usage": usage,
"parameters": {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"top_p": request.top_p
}
}
except HTTPException as e:
last_error = e
logger.error(f"Provider {attempt_provider} failed (attempt {attempts}): {e.detail}")
except Exception as e:
last_error = HTTPException(status_code=500, detail=str(e))
logger.error(f"Provider {attempt_provider} error (attempt {attempts}): {e}")
raise last_error or HTTPException(status_code=500, detail="All providers failed")
@app.post("/api/chat/stream")
def chat_stream(
request: ChatRequest,
x_user_plan: str = Header(default="free", alias="X-User-Plan"),
x_model: Optional[str] = Header(default=None, alias="X-Model")
):
"""
Streaming chat endpoint (SSE)
Tüm provider'ları destekler
"""
model_key = select_model(x_user_plan, x_model)
model_config = MODELS[model_key]
provider = model_config["provider"]
model_id = model_config["model_id"]
logger.info(f"Stream request: plan={x_user_plan}, model={model_key}, provider={provider}")
messages = build_messages(
request.prompt,
request.system_prompt,
request.history
)
def generate_groq():
"""Groq streaming generator"""
try:
yield f"data: {json.dumps({'info': f'Using {model_key} via Groq'})}\n\n"
response = call_groq_api(
model_id=model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=True
)
total_tokens = 0
prompt_tokens = 0
completion_tokens = 0
for chunk in response:
if chunk.choices[0].delta.content:
text = chunk.choices[0].delta.content
yield f"data: {json.dumps({'text': text})}\n\n"
if hasattr(chunk, 'x_groq') and hasattr(chunk.x_groq, 'usage'):
usage_data = chunk.x_groq.usage
prompt_tokens = getattr(usage_data, 'prompt_tokens', 0)
completion_tokens = getattr(usage_data, 'completion_tokens', 0)
total_tokens = getattr(usage_data, 'total_tokens', 0)
yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'groq', 'attempts': 1, 'usage': {'prompt_tokens': prompt_tokens, 'completion_tokens': completion_tokens, 'total_tokens': total_tokens}})}\n\n"
except Exception as e:
logger.error(f"Groq stream error: {e}")
yield f"data: {json.dumps({'error': str(e)})}\n\n"
def generate_llm7():
"""LLM7.io streaming generator"""
try:
yield f"data: {json.dumps({'info': f'Using {model_key} via LLM7.io'})}\n\n"
stream = llm7_client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=True
)
total_completion_tokens = 0
for chunk in stream:
if chunk.choices[0].delta.content:
text = chunk.choices[0].delta.content
yield f"data: {json.dumps({'text': text})}\n\n"
total_completion_tokens += 1
yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'llm7', 'attempts': 1, 'usage': {'prompt_tokens': 0, 'completion_tokens': total_completion_tokens, 'total_tokens': total_completion_tokens}})}\n\n"
except Exception as e:
logger.error(f"LLM7 stream error: {e}")
yield f"data: {json.dumps({'error': str(e)})}\n\n"
def generate_openrouter():
"""OpenRouter streaming generator"""
try:
yield f"data: {json.dumps({'info': f'Using {model_key} via OpenRouter'})}\n\n"
stream = call_openrouter_api(
model_id=model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=True
)
total_completion_tokens = 0
for chunk in stream:
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
text = chunk.choices[0].delta.content
yield f"data: {json.dumps({'text': text})}\n\n"
total_completion_tokens += 1
yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'openrouter', 'attempts': 1, 'usage': {'prompt_tokens': 0, 'completion_tokens': total_completion_tokens, 'total_tokens': total_completion_tokens}})}\n\n"
except Exception as e:
logger.error(f"OpenRouter stream error: {e}")
yield f"data: {json.dumps({'error': str(e)})}\n\n"
# Provider'a göre generator seç
if provider == "groq":
generator = generate_groq()
elif provider == "llm7":
generator = generate_llm7()
elif provider == "openrouter":
generator = generate_openrouter()
else:
raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
return StreamingResponse(
generator,
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive"
}
)
@app.get("/api/models")
def list_models(x_user_plan: str = Header(default="free", alias="X-User-Plan")):
"""
Kullanıcının erişebileceği modelleri listele
"""
allowed_models = get_allowed_models(x_user_plan)
models_info = []
for model_key in allowed_models:
config = MODELS[model_key]
models_info.append({
"key": model_key,
"display_name": config.get("display_name", model_key),
"size": config["size"],
"language": config["language"],
"speed": config["speed"],
"description": config.get("description", ""),
"provider": config["provider"],
"daily_limit": config["daily_limit"]
})
# Provider'a göre grupla
grouped = {}
for model in models_info:
provider = model["provider"]
if provider not in grouped:
grouped[provider] = []
grouped[provider].append(model)
return {
"plan": x_user_plan,
"total_models": len(models_info),
"models": models_info,
"models_by_provider": grouped,
"default_model": DEFAULT_MODELS.get(x_user_plan, "llama-8b-instant"),
"providers": list(grouped.keys())
}
@app.get("/api/providers")
def list_providers():
"""Mevcut API provider'larını listele"""
return {
"providers": [
{
"id": "groq",
"name": "Groq",
"status": "active" if GROQ_API_KEY else "inactive",
"description": "Ultra-fast inference with Groq LPU"
},
{
"id": "llm7",
"name": "LLM7.io",
"status": "active",
"description": "GPT-4 based models - Free tier available"
}
]
}
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
"""Custom HTTP exception handler"""
return JSONResponse(
status_code=exc.status_code,
content={
"error": exc.detail,
"status_code": exc.status_code
}
)
@app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
"""General exception handler"""
logger.error(f"Unhandled exception: {exc}")
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"detail": str(exc)
}
)
# ========== STARTUP/SHUTDOWN ==========
@app.on_event("startup")
async def startup_event():
logger.info("🚀 Sixfinger Backend API started")
logger.info(f"📦 Version: {API_VERSION}")
logger.info(f"🔑 Groq API: {'✅ Configured' if GROQ_API_KEY else '❌ Not configured'}")
logger.info(f"🌐 LLM7.io: ✅ Active (Free tier)")
logger.info(f"🤖 Total Models: {len(MODELS)}")
# Plan başına model sayısı
for plan in ["free", "starter", "pro", "plus"]:
count = len(get_allowed_models(plan))
logger.info(f" └─ {plan.upper()} plan: {count} models")
@app.on_event("shutdown")
async def shutdown_event():
logger.info("👋 Sixfinger Backend API shutting down")
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8000,
reload=True,
log_level="info"
)