File size: 34,118 Bytes
6c51155 772d13a 6c51155 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 | import os
import pickle
import torch
import numpy as np
import ast
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModel
# --- IMPORT ETHAN'S HYBRID ENGINE (Checker 1 & 2) ---
from codesense.analyzer import analyze_code
app = Flask(__name__)
# --- CONFIG ---
MODEL_FILE = 'NEW_BRAIN_MLP.pk2'
base_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(base_dir, MODEL_FILE)
print("β³ Booting up CodeSense Triple-Checker System...")
print("β³ Loading Checker 1 & 2: AST Rule-Engine + CodeT5...")
# --- LOAD YOUR SAFETY NET (Checker 3: CodeBERT + pk2) ---
try:
print(f"π Loading Checker 3: CodeBERT Safety Net ({MODEL_FILE})...")
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
codebert = AutoModel.from_pretrained("microsoft/codebert-base")
with open(model_path, 'rb') as f:
classifier = pickle.load(f)
print("β
All 3 AI Brains Loaded! Server Ready on Port 5000.")
except Exception as e:
print(f"β WARNING: Could not load Safety Net. {e}")
classifier = None
def get_vector(code):
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True)
with torch.no_grad():
outputs = codebert(**inputs)
return outputs.last_hidden_state[:, 0, :].numpy().flatten()
# --- YOUR MAGIC VARIABLE EXTRACTOR & AST LOOP COUNTER ---
class CodeAnalyzer(ast.NodeVisitor):
def __init__(self):
self.max_loop_depth = 0
self.current_loop_depth = 0
self.func_name = "optimized_function"
self.args_string = "data"
self.first_arg = "data"
def visit_FunctionDef(self, node):
self.func_name = node.name
arg_names = [arg.arg for arg in node.args.args]
if arg_names:
self.args_string = ", ".join(arg_names)
self.first_arg = arg_names[0]
self.generic_visit(node)
def visit_For(self, node):
self.current_loop_depth += 1
if self.current_loop_depth > self.max_loop_depth:
self.max_loop_depth = self.current_loop_depth
self.generic_visit(node)
self.current_loop_depth -= 1
def visit_While(self, node):
self.current_loop_depth += 1
if self.current_loop_depth > self.max_loop_depth:
self.max_loop_depth = self.current_loop_depth
self.generic_visit(node)
self.current_loop_depth -= 1
# --- API ROUTE ---
@app.route('/predict', methods=['POST'])
def predict():
data = request.json
code = data.get('code', '')
if not code: return jsonify({"error": "No code provided"}), 400
try:
# 1. Extract your variables & Count Loops
analyzer = CodeAnalyzer()
try:
tree = ast.parse(code)
analyzer.visit(tree)
# --- CALCULATE TIME COMPLEXITY USING AST ---
loop_depth = analyzer.max_loop_depth
if loop_depth == 0: dynamic_comp = "O(1) (No loops)"
elif loop_depth == 1: dynamic_comp = "O(n) (Linear)"
elif loop_depth == 2: dynamic_comp = "O(n^2) (Nested loops)"
else: dynamic_comp = f"O(n^{loop_depth}) (Deeply nested)"
except Exception:
dynamic_comp = "Error parsing code structure"
# ==========================================
# π‘οΈ THE TRIPLE-CHECKER LOGIC
# ==========================================
# Run Ethan's Engine
analysis_result = analyze_code(code)
algo = analysis_result.get("pattern", "Unknown")
ethan_summary = analysis_result.get("summary", "")
t5_raw_conf = analysis_result.get("ml_insights", {}).get("confidence", 0.0)
final_conf = t5_raw_conf
safety_net_triggered = False
# Patterns that mean the first engine is slightly confused
GENERIC_PATTERNS = ["Unknown", "Nested Iterative", "Linear Iterative", "Recursive (Linear)", "Recursive (Exponential)"]
# If AST/CodeT5 is confused, trigger YOUR CodeBERT model!
if (algo in GENERIC_PATTERNS or t5_raw_conf < 0.90) and classifier is not None:
print(f"β οΈ Primary engine unsure (Confidence: {t5_raw_conf:.2f}). Triggering CodeBERT Safety Net...")
try:
vector = get_vector(code)
bert_pred = classifier.predict([vector])[0]
try:
bert_probs = classifier.predict_proba([vector])[0]
bert_conf = float(max(bert_probs))
except:
bert_conf = 0.85
if bert_conf >= 0.80:
algo = bert_pred
final_conf = bert_conf
safety_net_triggered = True
print(f"π‘οΈ Safety Net Override Successful: {algo} ({bert_conf*100:.1f}%)")
except Exception as e:
print(f"Safety Net Error: {e}")
conf_percentage = (final_conf * 100) if final_conf else 100.0
# ==========================================
# π YOUR FULL UI DICTIONARY
# ==========================================
complexity_map = {
# --- SORTING ---
"Bubble Sort": {
"space": "O(1)",
"explanation": "β οΈ CRITICAL: Bubble Sort is inefficient (O(n^2)) for large datasets. Refactor to Quick Sort or Merge Sort (O(n log n)).",
"improvements": ["Added a 'swapped' flag to exit early if sorted.", "Shrink inner loop by 'i' each pass."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n n = len([FIRST_ARG])\n for i in range(n):\n swapped = False\n for j in range(0, n - i - 1):\n if [FIRST_ARG][j] > [FIRST_ARG][j + 1]:\n [FIRST_ARG][j], [FIRST_ARG][j + 1] = [FIRST_ARG][j + 1], [FIRST_ARG][j]\n swapped = True\n if not swapped: break\n return [FIRST_ARG]",
"recommended_name": "Quick Sort OR Merge Sort (O(n log n))",
"recommended_code": "# --- OPTION 1: QUICK SORT ---\nimport random\ndef [FUNC_NAME]_quicksort([ARGS]):\n if len([FIRST_ARG]) <= 1: return [FIRST_ARG]\n pivot = random.choice([FIRST_ARG])\n L = [x for x in [FIRST_ARG] if x < pivot]\n M = [x for x in [FIRST_ARG] if x == pivot]\n R = [x for x in [FIRST_ARG] if x > pivot]\n return [FUNC_NAME]_quicksort(L) + M + [FUNC_NAME]_quicksort(R)\n\n# --- OPTION 2: BUILT-IN SORT ---\ndef [FUNC_NAME]_timsort([ARGS]):\n return sorted([FIRST_ARG])"
},
"Insertion Sort": {
"space": "O(1)",
"explanation": "β οΈ WARNING: Insertion Sort is slow (O(n^2)) for large lists. It is okay for small inputs (<50 items), but consider Quick Sort for scalability.",
"improvements": ["Your logic is correct for small datasets, but fails to scale.", "For >50 items, pivot to a divide-and-conquer approach."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n for i in range(1, len([FIRST_ARG])):\n key = [FIRST_ARG][i]\n j = i - 1\n while j >= 0 and key < [FIRST_ARG][j]:\n [FIRST_ARG][j + 1] = [FIRST_ARG][j]\n j -= 1\n [FIRST_ARG][j + 1] = key\n return [FIRST_ARG]",
"recommended_name": "Quick Sort OR Merge Sort (O(n log n))",
"recommended_code": "# --- OPTION 1: QUICK SORT ---\ndef [FUNC_NAME]_quicksort([ARGS]):\n if len([FIRST_ARG]) <= 1: return [FIRST_ARG]\n pivot = [FIRST_ARG][len([FIRST_ARG]) // 2]\n return [FUNC_NAME]_quicksort([x for x in [FIRST_ARG] if x < pivot]) + \\\n [x for x in [FIRST_ARG] if x == pivot] + \\\n [FUNC_NAME]_quicksort([x for x in [FIRST_ARG] if x > pivot])"
},
"Hash Map Lookup": {
"space": "O(n)",
"explanation": "β
GOOD: Uses a dictionary (Hash Map) for instant O(1) lookups instead of repeatedly searching through a list. This drops the time complexity from O(n^2) down to a highly efficient O(n).",
"improvements": ["You are correctly using a dictionary to map elements.", "This trades a small amount of memory O(n) for a massive speedup O(1) per lookup."],
"optimized_code": "def [FUNC_NAME]([ARGS], target):\n seen = {}\n for i, val in enumerate([FIRST_ARG]):\n needed = target - val\n if needed in seen:\n return [seen[needed], i]\n seen[val] = i\n return []"
},
"Selection Sort": {
"space": "O(1)",
"explanation": "β οΈ WARNING: Selection Sort is always O(n^2), even if the list is sorted. Switch to Insertion Sort (for small lists) or Merge Sort.",
"improvements": ["Avoid this algorithm entirely for production code.", "Replace with an algorithm that adapts to already-sorted data."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n for i in range(len([FIRST_ARG])):\n min_idx = i\n for j in range(i+1, len([FIRST_ARG])):\n if [FIRST_ARG][j] < [FIRST_ARG][min_idx]:\n min_idx = j\n [FIRST_ARG][i], [FIRST_ARG][min_idx] = [FIRST_ARG][min_idx], [FIRST_ARG][i]\n return [FIRST_ARG]",
"recommended_name": "Insertion Sort OR Merge Sort",
"recommended_code": "# --- OPTION 1: INSERTION SORT ---\ndef [FUNC_NAME]_insertion([ARGS]):\n for i in range(1, len([FIRST_ARG])):\n key = [FIRST_ARG][i]\n j = i - 1\n while j >= 0 and key < [FIRST_ARG][j]:\n [FIRST_ARG][j + 1] = [FIRST_ARG][j]\n j -= 1\n [FIRST_ARG][j + 1] = key\n return [FIRST_ARG]"
},
"Quick Sort": {
"space": "O(log n)",
"explanation": "β
EXCELLENT: Quick Sort is a standard, efficient O(n log n) algorithm. Ensure you handle the worst-case pivot selection.",
"improvements": ["Ensure your pivot is chosen randomly instead of always picking the first or last element."],
"optimized_code": "import random\ndef [FUNC_NAME]([ARGS]):\n if len([FIRST_ARG]) <= 1: return [FIRST_ARG]\n pivot = random.choice([FIRST_ARG])\n left = [x for x in [FIRST_ARG] if x < pivot]\n middle = [x for x in [FIRST_ARG] if x == pivot]\n right = [x for x in [FIRST_ARG] if x > pivot]\n return [FUNC_NAME](left) + middle + [FUNC_NAME](right)"
},
"Merge Sort": {
"space": "O(n)",
"explanation": "β
EXCELLENT: Merge Sort is stable and consistent O(n log n). Great for large datasets.",
"improvements": ["Your algorithm is optimal for time complexity.", "Be aware that Merge Sort uses O(n) extra memory."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n if len([FIRST_ARG]) > 1:\n mid = len([FIRST_ARG]) // 2\n L, R = [FIRST_ARG][:mid], [FIRST_ARG][mid:]\n [FUNC_NAME](L)\n [FUNC_NAME](R)\n i = j = k = 0\n while i < len(L) and j < len(R):\n if L[i] < R[j]:\n [FIRST_ARG][k] = L[i]\n i += 1\n else:\n [FIRST_ARG][k] = R[j]\n j += 1\n k += 1\n while i < len(L):\n [FIRST_ARG][k] = L[i]\n i, k = i + 1, k + 1\n while j < len(R):\n [FIRST_ARG][k] = R[j]\n j, k = j + 1, k + 1\n return [FIRST_ARG]"
},
"Heap Sort": {
"space": "O(1)",
"explanation": "β
GOOD: Heap Sort is memory efficient (O(1) extra space) and fast (O(n log n)).",
"improvements": ["Use Python's built-in `heapq` library for C-level optimization."],
"optimized_code": "import heapq\ndef [FUNC_NAME]([ARGS]):\n heapq.heapify([FIRST_ARG])\n return [heapq.heappop([FIRST_ARG]) for _ in range(len([FIRST_ARG]))]"
},
"Counting Sort": {
"space": "O(n + k)",
"explanation": "β
EFFICIENT: Very fast (O(n+k)) for integers with a small range. Not suitable for strings or large ranges.",
"improvements": ["Ensure your data strictly contains integers with a known maximum value."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n if not [FIRST_ARG]: return [FIRST_ARG]\n max_val = max([FIRST_ARG])\n count = [0] * (max_val + 1)\n for num in [FIRST_ARG]: count[num] += 1\n idx = 0\n for i in range(len(count)):\n while count[i] > 0:\n [FIRST_ARG][idx] = i\n idx += 1\n count[i] -= 1\n return [FIRST_ARG]"
},
# --- SEARCHING ---
"Linear Search": {
"space": "O(1)",
"explanation": "β οΈ INEFFICIENT: iterating through every item is O(n). If your data is sorted, switch to Binary Search (O(log n)) for a massive speedup.",
"improvements": ["Only checking items one by one requires O(n) time.", "If your data is sorted, use Binary Search."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n for i in range(len([FIRST_ARG])):\n if [FIRST_ARG][i] == target: return i # Target must be passed\n return -1",
"recommended_name": "Binary Search (O(log n))",
"recommended_code": "import bisect\ndef [FUNC_NAME]_binary([ARGS]):\n # Assuming [FIRST_ARG] is sorted\n idx = bisect.bisect_left([FIRST_ARG], target)\n if idx < len([FIRST_ARG]) and [FIRST_ARG][idx] == target:\n return idx\n return -1"
},
"Binary Search": {
"space": "O(1)",
"explanation": "β
PERFECT: Binary Search is the gold standard (O(log n)) for sorted arrays.",
"improvements": ["Ensure your array is sorted first.", "You can manually write it with a while loop, or use Python's built in `bisect` library."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n # Manual implementation\n left, right = 0, len([FIRST_ARG]) - 1\n while left <= right:\n mid = (left + right) // 2\n if [FIRST_ARG][mid] == target: return mid\n elif [FIRST_ARG][mid] < target: left = mid + 1\n else: right = mid - 1\n return -1"
},
"Jump Search": {
"space": "O(1)",
"explanation": "β
GOOD: Jump Search (O(βn)) is better than Linear Search, but Binary Search is still faster if random access is allowed.",
"improvements": ["If you are using standard lists, switch to Binary Search (O(log n))."],
"optimized_code": "import math\ndef [FUNC_NAME]([ARGS]):\n n = len([FIRST_ARG])\n step = int(math.sqrt(n))\n prev = 0\n while [FIRST_ARG][min(step, n)-1] < target:\n prev = step\n step += int(math.sqrt(n))\n if prev >= n: return -1\n while [FIRST_ARG][prev] < target:\n prev += 1\n if prev == min(step, n): return -1\n if [FIRST_ARG][prev] == target: return prev\n return -1",
"recommended_name": "Binary Search (O(log n))",
"recommended_code": "import bisect\ndef [FUNC_NAME]_binary([ARGS]):\n idx = bisect.bisect_left([FIRST_ARG], target)\n if idx < len([FIRST_ARG]) and [FIRST_ARG][idx] == target:\n return idx\n return -1"
},
"Nested Iterative": {
"space": "O(1)",
"explanation": "β οΈ WARNING: Nested loops usually indicate a 'brute force' O(n^2) time complexity. This scales poorly for large datasets and can often be optimized.",
"improvements": ["Check if the inner loop can be replaced with a Hash Map (dictionary) for O(1) lookups.", "If analyzing contiguous subarrays, switch to the Sliding Window technique (O(n))."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n n = len([FIRST_ARG])\n # If you MUST use nested loops, ensure the inner loop shrinks each pass (j = i + 1)\n for i in range(n):\n for j in range(i + 1, n):\n pass # Your logic here\n return [FIRST_ARG]",
"recommended_name": "Hash Map Lookup OR Sliding Window (O(n))",
"recommended_code": "# --- OPTION 1: HASH MAP (For finding pairs/duplicates) ---\ndef [FUNC_NAME]_hashmap([ARGS]):\n seen = set()\n for item in [FIRST_ARG]:\n if item in seen: return item\n seen.add(item)\n return [FIRST_ARG]\n\n# --- OPTION 2: SLIDING WINDOW (For subarrays) ---\ndef [FUNC_NAME]_window([ARGS], k=2):\n # Assumes [FIRST_ARG] is a list of numbers\n window_sum = sum([FIRST_ARG][:k])\n for i in range(len([FIRST_ARG]) - k):\n window_sum = window_sum - [FIRST_ARG][i] + [FIRST_ARG][i+k]\n return window_sum"
},
# --- RECURSION, DP, & MATH ---
"Fibonacci Sequence": {
"space": "O(n)",
"explanation": "β οΈ RECURSION ALERT: Simple recursive Fibonacci is O(2^n) (Exponential). Refactor to use Dynamic Programming (Memoization) or an Iterative Loop to make it O(n).",
"improvements": ["Recursion causes stack overflow risks."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n if [FIRST_ARG] <= 0: return 0\n if [FIRST_ARG] == 1: return 1\n return [FUNC_NAME]([FIRST_ARG]-1) + [FUNC_NAME]([FIRST_ARG]-2)",
"recommended_name": "Iterative OR Memoized Fibonacci (O(n))",
"recommended_code": "# --- OPTION 1: ITERATIVE (O(1) Space) ---\ndef [FUNC_NAME]_iterative([ARGS]):\n if [FIRST_ARG] <= 0: return 0\n if [FIRST_ARG] == 1: return 1\n a, b = 0, 1\n for _ in range(2, [FIRST_ARG] + 1):\n a, b = b, a + b\n return b\n\n# --- OPTION 2: MEMOIZATION (Caching) ---\nfrom functools import lru_cache\n@lru_cache(maxsize=None)\ndef [FUNC_NAME]_memoized([ARGS]):\n if [FIRST_ARG] <= 1: return [FIRST_ARG]\n return [FUNC_NAME]_memoized([FIRST_ARG]-1) + [FUNC_NAME]_memoized([FIRST_ARG]-2)"
},
"Memoization": {
"space": "O(n)",
"explanation": "β
EFFICIENT: You successfully applied Memoization (Top-Down DP) to prevent redundant calculations.",
"improvements": ["Caching results prevents the O(2^n) exponential blow-up of standard recursion."],
"optimized_code": "memo = {}\ndef [FUNC_NAME]([ARGS]):\n if [FIRST_ARG] in memo: return memo[[FIRST_ARG]]\n if [FIRST_ARG] <= 1: return [FIRST_ARG]\n memo[[FIRST_ARG]] = [FUNC_NAME]([FIRST_ARG]-1) + [FUNC_NAME]([FIRST_ARG]-2)\n return memo[[FIRST_ARG]]"
},
"Tabulation": {
"space": "O(n)",
"explanation": "β
EFFICIENT: You successfully applied Tabulation (Bottom-Up DP) to prevent recursion stack overflow limits.",
"improvements": ["Iteratively building the table is memory safe and prevents RecursionError."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n if [FIRST_ARG] <= 0: return 0\n dp = [0] * ([FIRST_ARG] + 1)\n dp[1] = 1\n for i in range(2, [FIRST_ARG] + 1):\n dp[i] = dp[i-1] + dp[i-2]\n return dp[[FIRST_ARG]]"
},
"Factorial (Recursive)": {
"space": "O(n)",
"explanation": "β οΈ STACK RISK: Recursive Factorial works, but large inputs will cause a `RecursionError`. Use an Iterative Loop for safety.",
"improvements": ["Use a `for` loop to accumulate the result instead of recursion.", "Better yet, use Python's built in `math.factorial()`."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n if [FIRST_ARG] == 1 or [FIRST_ARG] == 0: return 1\n return [FIRST_ARG] * [FUNC_NAME]([FIRST_ARG] - 1)",
"recommended_name": "Iterative Factorial (O(n))",
"recommended_code": "import math\ndef [FUNC_NAME]_safe([ARGS]):\n # Built-in math functions run in C and avoid RecursionErrors\n return math.factorial([FIRST_ARG])"
},
"Factorial (Iterative)": {
"space": "O(1)",
"explanation": "β
GOOD: Iterative Factorial is safe and efficient (O(n)).",
"improvements": ["Your logic is solid, but you can achieve even faster execution using the built-in math module."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n result = 1\n for i in range(2, [FIRST_ARG] + 1):\n result *= i\n return result"
},
"Prime Check": {
"space": "O(1)",
"explanation": "β
EFFICIENT: This O(βn) approach is much faster than checking all numbers up to N.",
"improvements": ["Ensure your loop stops at the square root of n.", "Handle edge cases for 1, 2, and 3 explicitly."],
"optimized_code": "import math\ndef [FUNC_NAME]([ARGS]):\n if [FIRST_ARG] <= 1: return False\n if [FIRST_ARG] in (2, 3): return True\n if [FIRST_ARG] % 2 == 0 or [FIRST_ARG] % 3 == 0: return False\n for i in range(5, int(math.sqrt([FIRST_ARG])) + 1, 6):\n if [FIRST_ARG] % i == 0 or [FIRST_ARG] % (i + 2) == 0: return False\n return True"
},
"GCD (Euclidean)": {
"space": "O(1)",
"explanation": "β
PERFECT: Euclidean algorithm is the most efficient way to find GCD.",
"improvements": ["Use Python's built-in `math.gcd` for cleaner and slightly faster code."],
"optimized_code": "def [FUNC_NAME](a, b):\n while b:\n a, b = b, a % b\n return abs(a)"
},
"Palindrome Check": {
"space": "O(1)",
"explanation": "β
GOOD: O(n) is the optimal complexity for checking palindromes.",
"improvements": ["In Python, slicing is often faster than setting up a while loop with two pointers."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n return str([FIRST_ARG]) == str([FIRST_ARG])[::-1]"
},
"Armstrong Number": {
"space": "O(log n)",
"explanation": "β
GOOD: Analyzing digits is O(log n), which is optimal.",
"improvements": ["Convert the number to a string to easily iterate over the digits."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n digits = str([FIRST_ARG])\n power = len(digits)\n return [FIRST_ARG] == sum(int(d)**power for d in digits)"
},
# --- GRAPHS, POINTERS, & DATA STRUCTURES ---
"Dijkstra's Algorithm": {
"space": "O(V)",
"explanation": "β
INDUSTRY STANDARD: Best for finding shortest paths in weighted graphs.",
"improvements": ["Ensure you are using a Priority Queue (`heapq`) to achieve O(E log V) time complexity."],
"optimized_code": "import heapq\ndef [FUNC_NAME](graph, start):\n distances = {node: float('inf') for node in graph}\n distances[start] = 0\n pq = [(0, start)]\n while pq:\n curr_dist, curr_node = heapq.heappop(pq)\n if curr_dist > distances[curr_node]: continue\n for neighbor, weight in graph[curr_node].items():\n dist = curr_dist + weight\n if dist < distances[neighbor]:\n distances[neighbor] = dist\n heapq.heappush(pq, (dist, neighbor))\n return distances"
},
"Heap-Based Algorithm": {
"space": "O(N)",
"explanation": "β
GOOD: Ideal for dynamically finding the top K elements (smallest/largest), scheduling tasks, or building priority queues without sorting the whole list.",
"improvements": ["Always use Python's built-in `heapq` module instead of manually sorting arrays with `.sort()`.", "Use `heapq.heapify()` to build your initial heap in O(N) time instead of pushing items one-by-one."],
"optimized_code": "import heapq\n\ndef [FUNC_NAME](data, k):\n # O(N) time to build the heap in-place\n heapq.heapify(data)\n # Extract the k smallest elements efficiently\n result = [heapq.heappop(data) for _ in range(k)]\n return result"
},
"Breadth-First Search": {
"space": "O(V)",
"explanation": "β
GOOD: Perfect for shortest paths in unweighted graphs or level-order traversal.",
"improvements": ["Always use `collections.deque` for the queue instead of a standard list."],
"optimized_code": "from collections import deque\ndef [FUNC_NAME](graph, start):\n visited = set([start])\n queue = deque([start])\n result = []\n while queue:\n node = queue.popleft()\n result.append(node)\n for neighbor in graph[node]:\n if neighbor not in visited:\n visited.add(neighbor)\n queue.append(neighbor)\n return result"
},
"BFS (Graph)": {
"space": "O(V)",
"explanation": "β
GOOD: Perfect for shortest paths in unweighted graphs or level-order traversal.",
"improvements": ["Always use `collections.deque` for the queue instead of a standard list."],
"optimized_code": "from collections import deque\ndef [FUNC_NAME](graph, start):\n visited = set([start])\n queue = deque([start])\n result = []\n while queue:\n node = queue.popleft()\n result.append(node)\n for neighbor in graph[node]:\n if neighbor not in visited:\n visited.add(neighbor)\n queue.append(neighbor)\n return result"
},
"Depth-First Search": {
"space": "O(V)",
"explanation": "β
GOOD: Standard for pathfinding puzzles, topological sorting, and cycle detection.",
"improvements": ["If recursion depth is a concern, switch to an iterative DFS using a stack."],
"optimized_code": "def [FUNC_NAME](graph, start, visited=None):\n if visited is None: visited = set()\n visited.add(start)\n # Process node here\n for neighbor in graph[start]:\n if neighbor not in visited:\n [FUNC_NAME](graph, neighbor, visited)\n return visited"
},
"DFS (Graph)": {
"space": "O(V)",
"explanation": "β
GOOD: Standard for pathfinding puzzles, topological sorting, and cycle detection.",
"improvements": ["If recursion depth is a concern, switch to an iterative DFS using a stack."],
"optimized_code": "def [FUNC_NAME](graph, start, visited=None):\n if visited is None: visited = set()\n visited.add(start)\n # Process node here\n for neighbor in graph[start]:\n if neighbor not in visited:\n [FUNC_NAME](graph, neighbor, visited)\n return visited"
},
"Sliding Window": {
"space": "O(1)",
"explanation": "β
EFFICIENT: Sliding window prevents duplicate work when analyzing subarrays.",
"improvements": ["Maintains a running sum/condition without needing nested O(n^2) loops."],
"optimized_code": "def [FUNC_NAME](arr, k):\n current_sum = 0\n left = 0\n for right in range(len(arr)):\n current_sum += arr[right]\n if right - left + 1 > k:\n current_sum -= arr[left]\n left += 1\n return current_sum"
},
"Two-Pointer Technique": {
"space": "O(1)",
"explanation": "β
EFFICIENT: Two pointers walking towards each other saves incredible amounts of memory.",
"improvements": ["Much faster than nested O(n^2) loops when dealing with sorted arrays."],
"optimized_code": "def [FUNC_NAME](arr, target):\n left, right = 0, len(arr) - 1\n while left < right:\n s = arr[left] + arr[right]\n if s == target: return True\n elif s < target: left += 1\n else: right -= 1\n return False"
},
"Binary Search Tree": {
"space": "O(n)",
"explanation": "βΉοΈ INFO: Operations are O(log n) on average, but can degrade to O(n) if the tree is unbalanced. Consider an AVL Tree or Red-Black Tree for guaranteed performance.",
"improvements": ["Consider just using the built-in `dict` or `set` which are highly optimized Hash Tables in Python."],
"optimized_code": "class Node:\n def __init__(self, key):\n self.left, self.right, self.val = None, None, key\n\ndef [FUNC_NAME](root, key):\n if root is None or root.val == key: return root\n if root.val < key: return [FUNC_NAME](root.right, key)\n return [FUNC_NAME](root.left, key)"
},
"Stack Operations": {
"space": "O(n)",
"explanation": "β
OPTIMAL: Push/Pop operations are O(1).",
"improvements": ["Standard Python lists are perfect for stacks. Just ensure you only use `.append()` and `.pop()`."],
"optimized_code": "def [FUNC_NAME]():\n stack = []\n stack.append('item') # Push O(1)\n item = stack.pop() # Pop O(1)\n return item"
},
"Queue (List)": {
"space": "O(n)",
"explanation": "β οΈ WARNING: Using `list.pop(0)` in Python is O(n). Use `collections.deque` for true O(1) queue operations.",
"improvements": ["Import 'deque' from the collections module.", "Use 'popleft()' instead of 'pop(0)' to achieve O(1) constant time."],
"optimized_code": "def [FUNC_NAME]([ARGS]):\n # Using list.pop(0) causes O(n) memory shifts\n queue = list([FIRST_ARG])\n queue.append('new_item')\n item = queue.pop(0) \n return item",
"recommended_name": "Collections.deque (True O(1) Queue)",
"recommended_code": "from collections import deque\ndef [FUNC_NAME]_optimized([ARGS]):\n queue = deque([FIRST_ARG])\n queue.append('new_item')\n # queue.popleft() is now O(1) instead of O(n)\n item = queue.popleft()\n return queue"
},
"Linked List (Singly)": {
"space": "O(n)",
"explanation": "βΉοΈ INFO: Insertions are O(1) if you have the pointer, but searching is O(n).",
"improvements": ["Linked lists are rarely used in standard Python because standard lists are dynamic arrays."],
"optimized_code": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n\nclass LinkedList:\n def __init__(self):\n self.head = None\n def print_list(self):\n temp = self.head\n while temp:\n print(temp.data)\n temp = temp.next"
},
"Linked List (Doubly)": {
"space": "O(n)",
"explanation": "β
FLEXIBLE: Doubly Linked Lists allow bidirectional traversal, at the cost of slightly more memory.",
"improvements": ["Python's built-in `collections.deque` is actually a Doubly Linked List under the hood! Use it for max performance."],
"optimized_code": "from collections import deque\n# deque is an optimized doubly linked list\ndef [FUNC_NAME]():\n dll = deque(['a', 'b', 'c'])\n dll.append('d') # Add to right\n dll.appendleft('z') # Add to left\n return dll"
}
}
# 4. Fallback Data
default_data = {
"space": "O(N)",
"explanation": f"CodeSense Engine Analysis: {ethan_summary}",
"improvements": ["Analysis pending for this algorithm."],
"optimized_code": "# Recognized Pattern. Specific optimization currently unavailable."
}
# Handle matching names (e.g. "Linear Search" vs "Linear Iterative" from different models)
if algo == "Linear Iterative" and "Linear Search" in complexity_map:
algo_key = "Linear Search"
elif algo == "Recursive (Exponential)" and "Factorial (Recursive)" in complexity_map:
algo_key = "Fibonacci Sequence" # General fallback for exponential recursion
else:
algo_key = algo
result_data = complexity_map.get(algo_key, default_data)
# --- YOUR MAGIC VARIABLE REPLACEMENT FOR BOTH CODES ---
# 1. Format Optimized Code
opt_code = result_data.get("optimized_code", "").replace("[FUNC_NAME]", analyzer.func_name).replace("[ARGS]", analyzer.args_string).replace("[FIRST_ARG]", analyzer.first_arg)
# 2. Format Recommended Code (if it exists)
rec_code = result_data.get("recommended_code", "")
if rec_code:
rec_code = rec_code.replace("[FUNC_NAME]", analyzer.func_name).replace("[ARGS]", analyzer.args_string).replace("[FIRST_ARG]", analyzer.first_arg)
rec_name = result_data.get("recommended_name", "")
# 5. Send EVERYTHING back to VS Code
return jsonify({
"algorithm": algo,
"confidence": f"{conf_percentage:.1f}%",
"dynamic_complexity": dynamic_comp,
"space_complexity": result_data.get("space", "O(N)"),
"explanation": result_data.get("explanation", ""),
"improvements": result_data.get("improvements", []),
"optimized_code": opt_code,
"recommended_name": rec_name,
"recommended_code": rec_code,
"safety_net_used": safety_net_triggered
})
except Exception as e:
import traceback
print(traceback.format_exc())
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860) |