File size: 3,198 Bytes

import transformers
import torch
import gradio as gr
from datasets import load_dataset

# Load the model once when the script starts
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Load the model into memory (on GPU if available)
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",  # Auto-detect GPU
)

# Load the dataset from Hugging Face
dataset = load_dataset("quantumminds/cisco_cli_commands")

# Function to search the dataset for a matching command
def search_dataset(user_input):
    # Check if any command in the dataset matches the user input
    for entry in dataset['train']:  # assuming the dataset is in the 'train' split
        if entry["command"].lower() in user_input.lower():  # Match the command with user input (case-insensitive)
            return f"**Command:** {entry['command']}\n\n**Description:** {entry['description']}\n\n**Example:** {entry['examples'][0]['example_command'] if 'examples' in entry else 'No example available'}"
    return None  # If no match found

# Function to generate response using the dataset or fallback to the pipeline
def generate_response(user_input, chat_history):
    # First, try to find a match in the dataset
    dataset_response = search_dataset(user_input)
    
    if dataset_response:
        # Add user and assistant responses to the chat history from dataset match
        chat_history.append({"role": "user", "content": user_input})
        chat_history.append({"role": "assistant", "content": dataset_response})
        return chat_history  # Return early to avoid generating a response from the LLM

    # If no match found in dataset, generate the response from the LLM
    outputs = pipeline(user_input, max_new_tokens=512)
    
    # Generate the assistant's response
    assistant_response = outputs[0]["generated_text"]
    
    # Add user and assistant responses to the chat history
    chat_history.append({"role": "user", "content": user_input})
    chat_history.append({"role": "assistant", "content": assistant_response})

    return chat_history

# Create Gradio interface with chatbot and textbox
with gr.Blocks(theme=gr.themes.Ocean()) as iface:
    gr.Markdown("<h1 style='text-align: center;'>Cisco Configuration Assistant</h1>")
    chatbot = gr.Chatbot(label="Cisco Configuration Chatbot", type="messages", height=500)
    user_input = gr.Textbox(placeholder="Enter your Cisco switch/router question here...", label="Your Input")
    with gr.Row():
        submit_btn = gr.Button("Submit")
        clear_btn = gr.Button("Clear Feed")
    
    def user(query, history):
        # Generate a response and update the history
        history = generate_response(query, history)
        return history, ""  # Return updated history and clear the input box
    # Submit user input and update the chat history
    user_input.submit(user, [user_input, chatbot], [chatbot, user_input])
    submit_btn.click(user, [user_input, chatbot], [chatbot, user_input])
    clear_btn.click(lambda: [], None, chatbot, queue=False)

# Prints to console if dataset is loaded
print(dataset)

# Launch the Gradio app
iface.launch()