eaglelandsonce commited on
Commit
80c4f68
·
verified ·
1 Parent(s): 87c5cfb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import dspy
5
+
6
+
7
+ # -----------------------------
8
+ # DSPy Signature
9
+ # -----------------------------
10
+ class GenerateQA(dspy.Signature):
11
+ """Generate a simple synthetic question-answer example."""
12
+ topic = dspy.InputField(desc="topic for the synthetic example")
13
+ difficulty = dspy.InputField(desc="easy, medium, or hard")
14
+ question = dspy.OutputField(desc="a clear question about the topic")
15
+ answer = dspy.OutputField(desc="a short correct answer")
16
+
17
+
18
+ # -----------------------------
19
+ # Core generator
20
+ # -----------------------------
21
+ def generate_synthetic_data(
22
+ openai_api_key: str,
23
+ topic: str,
24
+ difficulty: str,
25
+ num_examples: int
26
+ ):
27
+ if not openai_api_key or not openai_api_key.strip():
28
+ return (
29
+ pd.DataFrame([{"error": "Please enter your OpenAI API key."}]),
30
+ json.dumps({"error": "Missing OpenAI API key."}, indent=2)
31
+ )
32
+
33
+ if not topic or not topic.strip():
34
+ return (
35
+ pd.DataFrame([{"error": "Please enter a topic."}]),
36
+ json.dumps({"error": "Missing topic."}, indent=2)
37
+ )
38
+
39
+ try:
40
+ # Configure DSPy with an OpenAI-compatible LM
41
+ lm = dspy.LM(
42
+ model="openai/gpt-4o-mini",
43
+ api_key=openai_api_key.strip()
44
+ )
45
+ dspy.configure(lm=lm)
46
+
47
+ generator = dspy.Predict(GenerateQA)
48
+
49
+ rows = []
50
+ for i in range(num_examples):
51
+ pred = generator(
52
+ topic=topic.strip(),
53
+ difficulty=difficulty,
54
+ config={"temperature": 1.0, "rollout_id": i + 1}
55
+ )
56
+
57
+ rows.append({
58
+ "topic": topic.strip(),
59
+ "difficulty": difficulty,
60
+ "question": pred.question,
61
+ "answer": pred.answer
62
+ })
63
+
64
+ df = pd.DataFrame(rows)
65
+ return df, json.dumps(rows, indent=2)
66
+
67
+ except Exception as e:
68
+ error_payload = {"error": str(e)}
69
+ return pd.DataFrame([error_payload]), json.dumps(error_payload, indent=2)
70
+
71
+
72
+ # -----------------------------
73
+ # Example loader
74
+ # -----------------------------
75
+ def load_example(example_topic):
76
+ return example_topic
77
+
78
+
79
+ # -----------------------------
80
+ # Gradio UI
81
+ # -----------------------------
82
+ EXAMPLE_TOPICS = [
83
+ "machine learning",
84
+ "prompt engineering",
85
+ "financial literacy",
86
+ "cybersecurity basics",
87
+ "project management"
88
+ ]
89
+
90
+ with gr.Blocks(title="DSPy Synthetic Data Creator") as demo:
91
+ gr.Markdown(
92
+ """
93
+ # DSPy Synthetic Data Creator
94
+ Generate simple synthetic Q&A examples using DSPy + OpenAI.
95
+ """
96
+ )
97
+
98
+ with gr.Row():
99
+ with gr.Column(scale=1):
100
+ api_key = gr.Textbox(
101
+ label="OpenAI API Key",
102
+ placeholder="Paste your OpenAI API key here",
103
+ type="password"
104
+ )
105
+
106
+ topic = gr.Textbox(
107
+ label="Topic",
108
+ placeholder="Example: machine learning"
109
+ )
110
+
111
+ difficulty = gr.Dropdown(
112
+ choices=["easy", "medium", "hard"],
113
+ value="easy",
114
+ label="Difficulty"
115
+ )
116
+
117
+ num_examples = gr.Slider(
118
+ minimum=1,
119
+ maximum=20,
120
+ value=5,
121
+ step=1,
122
+ label="Number of Examples"
123
+ )
124
+
125
+ generate_btn = gr.Button("Generate Synthetic Data", variant="primary")
126
+
127
+ with gr.Column(scale=1):
128
+ gr.Markdown("### Example starting inputs")
129
+ for item in EXAMPLE_TOPICS:
130
+ example_btn = gr.Button(item)
131
+ example_btn.click(
132
+ fn=load_example,
133
+ inputs=gr.State(item),
134
+ outputs=topic
135
+ )
136
+
137
+ gr.Markdown("### Generated Table")
138
+ output_table = gr.Dataframe(
139
+ headers=["topic", "difficulty", "question", "answer"],
140
+ datatype=["str", "str", "str", "str"],
141
+ interactive=False
142
+ )
143
+
144
+ gr.Markdown("### JSON Output")
145
+ output_json = gr.Code(label="JSON", language="json")
146
+
147
+ generate_btn.click(
148
+ fn=generate_synthetic_data,
149
+ inputs=[api_key, topic, difficulty, num_examples],
150
+ outputs=[output_table, output_json]
151
+ )
152
+
153
+ demo.launch()