Spaces:

sallima
/

mcp_fetch_coding_guidelines

Sleeping

App Files Files Community

sallima commited on Aug 13, 2025

Commit

917b3f5

verified ·

1 Parent(s): d62e71f

claude improvement

Browse files

Files changed (1) hide show

app.py +285 -30

app.py CHANGED Viewed

@@ -1,37 +1,292 @@
 import os, hashlib, re, base64, requests, gradio as gr
 GH = "https://api.github.com"
 TOKEN = os.getenv("GITHUB_TOKEN")
-RULES_REPO = os.getenv("RULES_REPO")  # e.g. "stefanoallima/awesome-cursorrules"
 DEFAULT_REF = os.getenv("DEFAULT_REF", "main")
-def _hdr(): return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}
-def _sha256(b): return hashlib.sha256(b).hexdigest()
-def list_rules(tech_key: str|None=None, ref: str|None=None):
-  ref = ref or DEFAULT_REF
-  r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr()); r.raise_for_status()
-  out=[]
-  for e in r.json().get("tree", []):
-    if e.get("type")=="blob" and re.search(r"\.(md|mdc|cursorrules)$", e["path"], re.I):
-      tk = re.sub(r"\.(md|mdc|cursorrules)$","",e["path"].split("/")[-1].lower())
-      if not tech_key or tk==tech_key.lower():
-        out.append({"tech_key": tk, "path": e["path"], "repo": RULES_REPO, "commit_sha": ref})
-  return out
-def fetch_rule(tech_key: str, ref: str|None=None):
-  ref = ref or DEFAULT_REF
-  files = list_rules(tech_key=tech_key, ref=ref)
-  if not files: raise ValueError(f"no rule for '{tech_key}' in {RULES_REPO}@{ref}")
-  path = files[0]["path"]
-  r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/{path}?ref={ref}", headers=_hdr()); r.raise_for_status()
-  j = r.json(); raw = base64.b64decode(j["content"]) if j.get("encoding")=="base64" else j["content"].encode()
-  return {"filename": path.split("/")[-1], "content": raw.decode("utf-8","replace"),
-          "repo": RULES_REPO, "commit_sha": ref, "sha256": _sha256(raw)}
-with gr.Blocks() as demo:
-  gr.Markdown("# Rules Catalog MCP (read-only)")
-  gr.api(fn=list_rules)
-  gr.api(fn=fetch_rule)
 if __name__ == "__main__":
-  demo.launch(mcp_server=True)

 import os, hashlib, re, base64, requests, gradio as gr
+from typing import List, Dict, Optional, Any
+import json
 GH = "https://api.github.com"
 TOKEN = os.getenv("GITHUB_TOKEN")
+RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
 DEFAULT_REF = os.getenv("DEFAULT_REF", "main")
+def _hdr():
+    return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}
+def _sha256(b):
+    return hashlib.sha256(b).hexdigest()
+def get_readme_content(ref: str = None) -> str:
+    """Fetch README content from the repository for context"""
+    ref = ref or DEFAULT_REF
+    try:
+        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
+        r.raise_for_status()
+        j = r.json()
+        raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
+        return raw.decode("utf-8", "replace")
+    except Exception as e:
+        return f"Error fetching README: {str(e)}"
+def extract_available_technologies(ref: str = None) -> List[str]:
+    """Extract all available technologies from the rules directory"""
+    ref = ref or DEFAULT_REF
+    try:
+        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
+        r.raise_for_status()
+        technologies = []
+        for item in r.json().get("tree", []):
+            if item.get("type") == "blob" and item["path"].startswith("rules/"):
+                # Extract technology name from directory structure
+                path_parts = item["path"].split("/")
+                if len(path_parts) >= 2:
+                    tech_dir = path_parts[1]
+                    # Clean up the directory name to extract technology
+                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
+                    if tech_name not in technologies:
+                        technologies.append(tech_name)
+        return sorted(technologies)
+    except Exception as e:
+        return [f"Error: {str(e)}"]
+def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
+    """Use simple semantic matching to find relevant technologies"""
+    matches = {}
+    for requested in requested_techs:
+        requested_lower = requested.lower()
+        matched_techs = []
+        for available in available_techs:
+            available_lower = available.lower()
+            # Direct match
+            if requested_lower == available_lower:
+                matched_techs.append(available)
+                continue
+            # Partial match (contains)
+            if requested_lower in available_lower or available_lower in requested_lower:
+                matched_techs.append(available)
+                continue
+            # Common technology mappings
+            tech_mappings = {
+                'python': ['python', 'django', 'fastapi', 'flask'],
+                'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
+                'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
+                'react': ['react', 'nextjs', 'typescript'],
+                'vue': ['vue', 'vuejs', 'nuxt'],
+                'node': ['node', 'nodejs', 'javascript'],
+                'postgres': ['postgres', 'postgresql', 'database'],
+                'fastapi': ['fastapi', 'python', 'api'],
+                'nextjs': ['nextjs', 'next', 'react', 'typescript']
+            }
+            # Check if requested tech maps to available tech
+            if requested_lower in tech_mappings:
+                for mapped_tech in tech_mappings[requested_lower]:
+                    if mapped_tech in available_lower:
+                        matched_techs.append(available)
+                        break
+        matches[requested] = matched_techs
+    return matches
+def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
+    """List available coding rules with enhanced metadata"""
+    ref = ref or DEFAULT_REF
+    try:
+        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
+        r.raise_for_status()
+        rules = []
+        for item in r.json().get("tree", []):
+            if item.get("type") == "blob" and item["path"].startswith("rules/"):
+                path_parts = item["path"].split("/")
+                if len(path_parts) >= 2:
+                    tech_dir = path_parts[1]
+                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
+                    if not tech_key or tech_key.lower() in tech_name.lower():
+                        rules.append({
+                            "tech_key": tech_name,
+                            "directory": tech_dir,
+                            "path": item["path"],
+                            "repo": RULES_REPO,
+                            "commit_sha": ref,
+                            "url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
+                        })
+        return rules
+    except Exception as e:
+        return [{"error": str(e)}]
+def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
+    """Fetch the actual rule content from a technology directory"""
+    ref = ref or DEFAULT_REF
+    try:
+        # Get files in the specific rule directory
+        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
+        r.raise_for_status()
+        files = r.json()
+        if not isinstance(files, list):
+            files = [files]
+        # Look for .cursorrules or .md files
+        rule_file = None
+        for file in files:
+            if file["name"].endswith(('.cursorrules', '.md')):
+                rule_file = file
+                break
+        if not rule_file:
+            return {"error": f"No rule file found in {tech_directory}"}
+        # Fetch the file content
+        content_r = requests.get(rule_file["download_url"])
+        content_r.raise_for_status()
+        return {
+            "tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
+            "filename": rule_file["name"],
+            "content": content_r.text,
+            "directory": tech_directory,
+            "repo": RULES_REPO,
+            "commit_sha": ref,
+            "sha256": _sha256(content_r.content),
+            "url": rule_file["html_url"]
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
+    """Fetch rule with semantic matching fallback"""
+    ref = ref or DEFAULT_REF
+    # First try direct match
+    rules = list_rules(tech_key=tech_key, ref=ref)
+    if rules and "error" not in rules[0]:
+        return fetch_rule_content(rules[0]["directory"], ref)
+    # If no direct match, try semantic matching
+    available_techs = extract_available_technologies(ref)
+    matches = semantic_match_technologies([tech_key], available_techs)
+    if tech_key in matches and matches[tech_key]:
+        # Return the first match
+        best_match = matches[tech_key][0]
+        tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
+        return fetch_rule_content(tech_directory, ref)
+    return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}
+def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
+    """Get coding guidelines for multiple technologies in a stack"""
+    ref = ref or DEFAULT_REF
+    available_techs = extract_available_technologies(ref)
+    matches = semantic_match_technologies(tech_stack, available_techs)
+    guidelines = {}
+    for requested_tech, matched_techs in matches.items():
+        guidelines[requested_tech] = []
+        for matched_tech in matched_techs[:3]:  # Limit to top 3 matches
+            tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
+            rule_content = fetch_rule_content(tech_directory, ref)
+            if "error" not in rule_content:
+                guidelines[requested_tech].append(rule_content)
+    return {
+        "tech_stack": tech_stack,
+        "guidelines": guidelines,
+        "available_technologies": available_techs,
+        "matches": matches,
+        "repo": RULES_REPO,
+        "commit_sha": ref
+    }
+def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
+    """Analyze a project's technology stack and return relevant guidelines"""
+    ref = ref or DEFAULT_REF
+    # Parse the framework list (assume comma-separated or newline-separated)
+    techs = []
+    for line in framework_list.replace(",", "\n").split("\n"):
+        tech = line.strip()
+        if tech:
+            techs.append(tech)
+    if not techs:
+        return {"error": "No technologies found in the provided list"}
+    # Get README for context
+    readme_content = get_readme_content(ref)
+    # Get guidelines for the entire stack
+    stack_guidelines = get_guidelines_for_stack(techs, ref)
+    return {
+        "project_analysis": {
+            "detected_technologies": techs,
+            "readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
+        },
+        "guidelines": stack_guidelines,
+        "summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
+    }
+# Gradio Interface
+with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
+    gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
+    gr.Markdown("Intelligent coding guideline retrieval with semantic matching")
+    with gr.Tab("Single Technology"):
+        with gr.Row():
+            tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
+            ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
+        fetch_btn = gr.Button("Fetch Guidelines")
+        single_output = gr.JSON(label="Guidelines")
+        fetch_btn.click(
+            fn=fetch_rule,
+            inputs=[tech_input, ref_input],
+            outputs=single_output
+        )
+    with gr.Tab("Technology Stack"):
+        stack_input = gr.Textbox(
+            label="Technology Stack",
+            placeholder="python, fastapi, postgres, react, typescript",
+            lines=3
+        )
+        stack_ref_input = gr.Textbox(label="Git Reference", value="main")
+        analyze_btn = gr.Button("Analyze Stack")
+        stack_output = gr.JSON(label="Stack Analysis")
+        analyze_btn.click(
+            fn=analyze_project_stack,
+            inputs=[stack_input, stack_ref_input],
+            outputs=stack_output
+        )
+    with gr.Tab("Available Technologies"):
+        list_ref_input = gr.Textbox(label="Git Reference", value="main")
+        list_btn = gr.Button("List Available Technologies")
+        list_output = gr.JSON(label="Available Technologies")
+        list_btn.click(
+            fn=extract_available_technologies,
+            inputs=[list_ref_input],
+            outputs=list_output
+        )
+    # Register MCP API endpoints
+    gr.api(fn=list_rules, name="list_rules")
+    gr.api(fn=fetch_rule, name="fetch_rule")
+    gr.api(fn=get_guidelines_for_stack, name="get_guidelines_for_stack")
+    gr.api(fn=analyze_project_stack, name="analyze_project_stack")
+    gr.api(fn=extract_available_technologies, name="extract_available_technologies")
 if __name__ == "__main__":
+    demo.launch(mcp_server=True)