| | import os |
| | import gradio as gr |
| | import warnings |
| | import json |
| | from dotenv import load_dotenv |
| | from typing import List |
| | import time |
| | from functools import lru_cache |
| | import logging |
| |
|
| | from langchain_community.vectorstores import FAISS |
| | from langchain_community.embeddings import AzureOpenAIEmbeddings |
| | from openai import AzureOpenAI |
| |
|
| | |
| | import gradio_client.utils |
| | gradio_client.utils.json_schema_to_python_type = lambda schema, defs=None: "string" |
| |
|
| | |
| | load_dotenv() |
| | AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") |
| | AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") |
| | AZURE_OPENAI_LLM_DEPLOYMENT = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT") |
| | AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") |
| |
|
| | if not all([AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_LLM_DEPLOYMENT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT]): |
| | raise ValueError("Missing one or more Azure OpenAI environment variables.") |
| |
|
| | warnings.filterwarnings("ignore") |
| |
|
| | |
| | embeddings = AzureOpenAIEmbeddings( |
| | azure_deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT, |
| | azure_endpoint=AZURE_OPENAI_ENDPOINT, |
| | openai_api_key=AZURE_OPENAI_API_KEY, |
| | openai_api_version="2025-01-01-preview", |
| | chunk_size=1000 |
| | ) |
| |
|
| | |
| | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| | FAISS_INDEX_PATH = os.path.join(SCRIPT_DIR, "faiss_index_sysml") |
| | vectorstore = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True) |
| |
|
| | |
| | client = AzureOpenAI( |
| | api_key=AZURE_OPENAI_API_KEY, |
| | api_version="2025-01-01-preview", |
| | azure_endpoint=AZURE_OPENAI_ENDPOINT |
| | ) |
| |
|
| | |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | def clean_em_dashes(text: str) -> str: |
| | """Remove em dashes and replace with natural alternatives""" |
| | |
| | text = text.replace("—which", ", which") |
| | text = text.replace("—that", ", that") |
| | text = text.replace("—no", ". No") |
| | text = text.replace("—and", ", and") |
| | text = text.replace("—but", ", but") |
| | text = text.replace("—so", ", so") |
| | text = text.replace("—you", ". You") |
| | text = text.replace("—it", ". It") |
| | text = text.replace("—just", ". Just") |
| | text = text.replace("—great", ", great") |
| | text = text.replace("—this", ". This") |
| | |
| | text = text.replace("—", ", ") |
| | return text |
| |
|
| | |
| | @lru_cache(maxsize=100) |
| | def sysml_retriever(query: str) -> str: |
| | try: |
| | print(f"\n🔍 QUERY: {query}") |
| | print("="*80) |
| | |
| | |
| | results = vectorstore.similarity_search_with_score(query, k=100) |
| | print(f"📊 Total results retrieved: {len(results)}") |
| | |
| | |
| | weighted_results = [] |
| | sysmodeler_count = 0 |
| | other_count = 0 |
| | |
| | for i, (doc, score) in enumerate(results): |
| | |
| | doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower() |
| | |
| | |
| | is_sysmodeler = ( |
| | 'sysmodeler' in doc_source or |
| | 'user manual' in doc_source or |
| | 'sysmodeler.ai' in doc.page_content.lower() or |
| | 'workspace.sysmodeler.ai' in doc.page_content.lower() or |
| | 'Create with AI' in doc.page_content or |
| | 'Canvas Overview' in doc.page_content or |
| | 'AI-powered' in doc.page_content or |
| | 'voice input' in doc.page_content or |
| | 'Canvas interface' in doc.page_content or |
| | 'Project Creation' in doc.page_content or |
| | 'Shape Palette' in doc.page_content or |
| | 'AI Copilot' in doc.page_content or |
| | 'SynthAgent' in doc.page_content or |
| | 'workspace dashboard' in doc.page_content.lower() |
| | ) |
| | |
| | |
| | if is_sysmodeler: |
| | |
| | weighted_score = score * 0.6 |
| | source_type = "SysModeler" |
| | sysmodeler_count += 1 |
| | else: |
| | |
| | weighted_score = score |
| | source_type = "Other" |
| | other_count += 1 |
| | |
| | |
| | doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {} |
| | doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other' |
| | doc.metadata['weighted_score'] = weighted_score |
| | doc.metadata['original_score'] = score |
| | |
| | weighted_results.append((doc, weighted_score, source_type)) |
| | |
| | |
| | source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown' |
| | print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}") |
| | |
| | print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:") |
| | print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)") |
| | print(f" Other docs: {other_count} (original scores)") |
| | |
| | |
| | weighted_results.sort(key=lambda x: x[1]) |
| | |
| | |
| | final_docs = [] |
| | query_lower = query.lower() |
| | |
| | |
| | is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better']) |
| | is_general_sysml = not is_tool_comparison |
| | |
| | if is_tool_comparison: |
| | |
| | print(f"\n🎯 TOOL COMPARISON QUERY DETECTED") |
| | print(f" Strategy: Heavy SysModeler focus + selective others") |
| | |
| | |
| | sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8] |
| | other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4] |
| | |
| | final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs] |
| | |
| | else: |
| | |
| | print(f"\n🎯 GENERAL SYSML QUERY DETECTED") |
| | print(f" Strategy: Balanced with SysModeler preference") |
| | |
| | |
| | final_docs = [doc for doc, _, _ in weighted_results[:12]] |
| | |
| | |
| | print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):") |
| | sysmodeler_selected = 0 |
| | other_selected = 0 |
| | |
| | for i, doc in enumerate(final_docs): |
| | source_type = doc.metadata.get('source_type', 'unknown') |
| | source_name = doc.metadata.get('source', 'Unknown') |
| | weighted_score = doc.metadata.get('weighted_score', 0) |
| | original_score = doc.metadata.get('original_score', 0) |
| | |
| | if source_type == 'sysmodeler': |
| | sysmodeler_selected += 1 |
| | type_emoji = "✅" |
| | else: |
| | other_selected += 1 |
| | type_emoji = "📚" |
| | |
| | print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})") |
| | |
| | print(f"\n📊 FINAL COMPOSITION:") |
| | print(f" SysModeler docs: {sysmodeler_selected}") |
| | print(f" Other docs: {other_selected}") |
| | print("="*80) |
| | |
| | contexts = [doc.page_content for doc in final_docs] |
| | return "\n\n".join(contexts) |
| | |
| | except Exception as e: |
| | logger.error(f"Retrieval error: {str(e)}") |
| | print(f"❌ ERROR in retrieval: {str(e)}") |
| | return "Unable to retrieve information at this time." |
| |
|
| | |
| | tools_definition = [ |
| | { |
| | "type": "function", |
| | "function": { |
| | "name": "SysMLRetriever", |
| | "description": "Use this to answer questions about SysML diagrams and modeling.", |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "query": {"type": "string", "description": "The search query to find information about SysML"} |
| | }, |
| | "required": ["query"] |
| | } |
| | } |
| | } |
| | ] |
| |
|
| | |
| | tool_mapping = { |
| | "SysMLRetriever": sysml_retriever |
| | } |
| |
|
| | |
| | def convert_history_to_messages(history): |
| | messages = [] |
| | for user, bot in history: |
| | messages.append({"role": "user", "content": user}) |
| | messages.append({"role": "assistant", "content": bot}) |
| | return messages |
| |
|
| | |
| | def count_conversation_turns(history): |
| | return len(history) |
| |
|
| | |
| | def sysml_chatbot(message, history): |
| | chat_messages = convert_history_to_messages(history) |
| | |
| | |
| | turn_count = count_conversation_turns(history) |
| | |
| | |
| | should_ask_question = turn_count < 4 |
| | ask_intriguing_question = turn_count == 4 or turn_count == 5 |
| | |
| | |
| | should_include_link = ( |
| | turn_count == 0 or |
| | (turn_count == 3 or turn_count == 4) or |
| | (turn_count >= 5 and (turn_count + 1) % 5 == 0) |
| | ) |
| | |
| | full_messages = [ |
| | {"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier. |
| | |
| | CONVERSATION TURN: {turn_count + 1} |
| | INCLUDE_LINK: {should_include_link} |
| | |
| | CONVERSATION STYLE: |
| | - Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation |
| | - After the first message, continue naturally without reintroducing yourself |
| | - If user gives you their name, use it throughout. If not, continue naturally without asking again |
| | - Talk like a knowledgeable colleague, not a formal bot |
| | - CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER |
| | - NEVER EVER use the em dash character (—) under any circumstances |
| | - When you want to add extra information, use commas or say "which means" or "and that" |
| | - Replace any "—" with ", " or ". " or " and " or " which " |
| | - SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great" |
| | - SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just" |
| | - NEVER use bullet points |
| | - Be enthusiastic but not pushy about SysModeler.ai |
| | - Use "you" and "your" to make it personal |
| | - Share insights like you're having a friendly chat |
| | |
| | QUESTION TIMING STRATEGY: |
| | - TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""} |
| | - TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"} |
| | - TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"} |
| | - TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"} |
| | |
| | RESPONSE APPROACH: |
| | |
| | 1. **For SysModeler-specific questions**: |
| | Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."} |
| | |
| | 2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**: |
| | Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy: |
| | - Lead with SysModeler.ai capabilities and how it handles this specific need |
| | - Show SysModeler.ai's unique advantages (AI generation, voice input, speed) |
| | - Then briefly acknowledge other tools exist without detailed descriptions |
| | - Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work." |
| | - Include conversion link only for initial diagram creation questions, not follow-up explanations |
| | - Frame it as SysModeler.ai being the modern solution vs traditional manual tools |
| | |
| | 3. **For first-time greetings (hi, hello, etc.)**: |
| | Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?" |
| | |
| | 4. **For tool comparison questions ONLY**: |
| | Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work." |
| | |
| | 5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"): |
| | Follow the RAG bias pattern - SysModeler.ai first and detailed: |
| | - Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types) |
| | - Then briefly mention other established tools exist |
| | - Highlight SysML features that naturally showcase SysModeler.ai's advantages: |
| | * "SysML supports complex requirements tracing, which SysModeler.ai automates..." |
| | * "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..." |
| | * "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..." |
| | - Make the advantages feel natural, not pushy |
| | |
| | 6. **For ALL other conversations (names, general chat, non-tool questions)**: |
| | Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools. |
| | |
| | INTERACTION GUIDELINES: |
| | - Start conversations with "Hi, I'm Abu! Welcome to SysModeler." |
| | - Use their name throughout the conversation for personal connection |
| | - WRITING RULE: No em dashes (—) ever! Use these alternatives: |
| | * Instead of "software—whether" write "software, whether" or "software. Whether" |
| | * Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes" |
| | * Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can" |
| | - Never use bullet points or formal lists |
| | - Write in paragraphs that flow naturally |
| | - Include personal touches like "I think you'll find..." or "What's really cool is..." |
| | - Show genuine interest in helping the user succeed |
| | - Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"} |
| | - When the pricing or anything related to pricing is asked, the pricing or billing should be mentioned in dollars always along with the credits or plans and packages. |
| | |
| | ENGAGING QUESTIONS TO USE STRATEGICALLY: |
| | {"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""} |
| | {"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""} |
| | {"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""} |
| | {"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""} |
| | {"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""} |
| | |
| | CONVERSION OPPORTUNITIES: |
| | - Include the AI creation link for these specific situations: |
| | * First-time greetings: Include main site link and create-with-AI link in introduction |
| | * TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder |
| | * TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant |
| | * Tool comparison questions ("What tools are available?", "SysML tool landscape") |
| | - NEVER include the link for: |
| | * Turns 2-3 (relationship building phase) |
| | * Pure educational follow-ups unless at 5-message intervals |
| | * When user is clearly not interested in trying the tool |
| | - LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed |
| | - Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI" |
| | |
| | Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""} |
| | ] + chat_messages + [{"role": "user", "content": message}] |
| | |
| | try: |
| | response = client.chat.completions.create( |
| | model=AZURE_OPENAI_LLM_DEPLOYMENT, |
| | messages=full_messages, |
| | tools=tools_definition, |
| | tool_choice={"type": "function", "function": {"name": "SysMLRetriever"}} |
| | ) |
| | assistant_message = response.choices[0].message |
| | if assistant_message.tool_calls: |
| | tool_call = assistant_message.tool_calls[0] |
| | function_name = tool_call.function.name |
| | function_args = json.loads(tool_call.function.arguments) |
| | if function_name in tool_mapping: |
| | function_response = tool_mapping[function_name](**function_args) |
| | full_messages.append({ |
| | "role": "assistant", |
| | "content": None, |
| | "tool_calls": [{ |
| | "id": tool_call.id, |
| | "type": "function", |
| | "function": { |
| | "name": function_name, |
| | "arguments": tool_call.function.arguments |
| | } |
| | }] |
| | }) |
| | full_messages.append({ |
| | "role": "tool", |
| | "tool_call_id": tool_call.id, |
| | "content": function_response |
| | }) |
| | second_response = client.chat.completions.create( |
| | model=AZURE_OPENAI_LLM_DEPLOYMENT, |
| | messages=full_messages |
| | ) |
| | answer = second_response.choices[0].message.content |
| | |
| | |
| | answer = clean_em_dashes(answer) |
| | else: |
| | answer = f"I tried to use a function '{function_name}' that's not available." |
| | else: |
| | answer = assistant_message.content |
| | |
| | answer = clean_em_dashes(answer) if answer else answer |
| | history.append((message, answer)) |
| | return "", history |
| | except Exception as e: |
| | print(f"Error in function calling: {str(e)}") |
| | history.append((message, "Sorry, something went wrong.")) |
| | return "", history |
| |
|
| | |
| | with gr.Blocks(css=""" |
| | #submit-btn { |
| | height: 100%; |
| | background-color: #48CAE4; |
| | color: white; |
| | font-size: 1.5em; |
| | } |
| | """) as demo: |
| |
|
| | gr.Markdown("## SysModeler Chatbot") |
| |
|
| | chatbot = gr.Chatbot(height=600) |
| | with gr.Row(): |
| | with gr.Column(scale=5): |
| | msg = gr.Textbox( |
| | placeholder="Ask me about SysML diagrams or concepts...", |
| | lines=3, |
| | show_label=False |
| | ) |
| | with gr.Column(scale=1, min_width=50): |
| | submit_btn = gr.Button("➤", elem_id="submit-btn") |
| |
|
| | clear = gr.Button("Clear") |
| | state = gr.State([]) |
| |
|
| | submit_btn.click(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot]) |
| | msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot]) |
| | clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg]) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch() |