File size: 4,949 Bytes
41b57f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import openai
import numpy as np
import re
from typing import List, Tuple
from config import EMBED_MODEL

def get_embedding(text: str) -> List[float]:
    """Generate embedding for a given text."""
    text_strip = text.replace("\n", " ").strip()
    response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
    return response.data[0].embedding

def cosine_similarity(a: List[float], b: List[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    a = np.array(a)
    b = np.array(b)
    if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0:
        return 0.0
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def clean_time(time_str: str) -> str:
    """Clean up time string."""
    if not time_str:
        return ""
    
    time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
    if time_match:
        hour = time_match.group(1)
        minute = time_match.group(2) or "00"
        ampm = time_match.group(3).upper()
        return f"{hour}:{minute} {ampm}"
    
    return time_str.strip()

def find_top_k_matches(user_embedding, dataset, k=3):
    """Find top k matching entries from a dataset."""
    scored = []
    for entry_id, text, emb in dataset:
        score = cosine_similarity(user_embedding, emb)
        scored.append((score, entry_id, text))
    scored.sort(reverse=True)
    return scored[:k]

def classify_intent(question: str) -> str:
    """
    Classify the user's intent into:
    Mode A: Recommendation Mode (Workshops, Dates, Availability, Recommendations)
    Mode B: Front Desk Mode (Default - Everything else)
    """
    prompt = f"""Classify the following user question into one of two modes:
1. "Mode A - Recommendation Mode": Use this if the user is asking about workshops, specific dates, what's available this month, asking for recommendations, or career goals (like getting an agent).
2. "Mode B - Front Desk Mode": Use this for broad introductory questions, kids classes, signing up, summit, instructor roles, auditing, online vs in-studio, general policies, or specific questions about existing classes.

User Question: "{question}"

Response must be exactly "Mode A" or "Mode B"."""

    try:
        response = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=5
        )
        prediction = response.choices[0].message.content.strip()
        if "Mode A" in prediction:
            return "Mode A"
        return "Mode B"
    except Exception as e:
        print(f"Error in intent classification: {e}")
        return "Mode B"  # Default to Front Desk Mode

def should_include_email(question: str) -> bool:
    """
    Determine if the contact email should be shown based on user intent.
    Allowed for: Payments, Refunds, Attendance issues, Account problems.
    """
    from config import EMAIL_ONLY_KEYWORDS
    import re
    
    question_lower = question.lower()
    for word in EMAIL_ONLY_KEYWORDS:
        pattern = rf'\b{re.escape(word)}\b'
        if re.search(pattern, question_lower):
            return True
            
    return False

def classify_user_type(question: str, history: List[dict] = None) -> str:
    """
    Classify the user type into:
    - new_actor
    - experienced_actor
    - parent
    - current_student
    - unknown
    """
    history_str = ""
    if history:
        history_str = "\nConversation context:\n" + "\n".join([f"{m['role']}: {m['content'][:100]}..." for m in history[-3:]])

    prompt = f"""Classify the user into exactly one of these categories based on their question and context:
1. "new_actor": Just starting out, has no experience, or is asking how to begin.
2. "experienced_actor": Already has credits, mentions agents, looking for advanced workshops, or refers to their career progress.
3. "parent": Asking on behalf of their child, mentions "my kid", "my son", "my daughter", "teens".
4. "current_student": Refers to past/current classes at Get Scene, mentions a specific GSP membership, or asks about recurring student workshops.
5. "unknown": Not enough information yet.

User Question: "{question}"{history_str}

Response must be exactly one of: new_actor, experienced_actor, parent, current_student, unknown."""

    try:
        response = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=10
        )
        prediction = response.choices[0].message.content.strip().lower()
        valid_types = ["new_actor", "experienced_actor", "parent", "current_student", "unknown"]
        for t in valid_types:
            if t in prediction:
                return t
        return "unknown"
    except Exception as e:
        print(f"Error in user type classification: {e}")
        return "unknown"