atz21 commited on
Commit
40d9691
·
verified ·
1 Parent(s): 380638f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -156
app.py CHANGED
@@ -2,213 +2,204 @@ import os
2
  import gradio as gr
3
  import google.generativeai as genai
4
  from markdown_pdf import MarkdownPdf, Section
5
-
6
- # -------------------- CONFIG --------------------
7
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
8
 
9
  # ---------- PROMPTS ----------
10
- TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents. Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
11
- Primary Objective: Preserve the author's intended solution path while filtering out all mistakes, corrections, and extraneous marks. The final output must be perfectly formatted and easy to follow.
12
-
13
- Core Instructions:
14
- Hierarchical Structure:
15
- - Identify all questions and subquestions based on their numbering (e.g., 1. a), i)).
16
- - Use ## for main questions (e.g., ## Question 1).
17
- - Use ### for subquestions (e.g., ### a), ### i)).
18
- - If a question number appears out of its logical sequence, transcribe it with the label provided in the source.
19
-
20
- What to Exclude (Content Filtration):
21
- - Mistakes: Completely ignore and do not transcribe any number, variable, or expression that has been struck through, scribbled over, or crossed out. Transcribe only the corrected, final version.
22
- - Extraneous Marks: Do not include any doodles, underlines (unless part of a fraction), or stray marks not relevant to the solution.
23
-
24
- Crucial Distinction: Cancellations vs. Step Cuts:
25
- - Term Cancellation: This is a valid mathematical step where terms cancel each other out (e.g., +2x and -2x, or a term divided by itself).
26
- Action: Transcribe the step where the cancellation occurs. Immediately after that line, add a concise, bracketed note explaining what was cancelled.
27
- - Step Cut: This is when the author skips intermediate algebraic or arithmetic steps (e.g., jumping from 2b = 2 directly to b = 1).
28
- Action: Transcribe the steps exactly as they appear. Do not invent or add the missing steps. The logical jump in the transcribed output serves to represent the step cut.
29
-
30
- Formatting and Special Cases:
31
- - Equations: Enclose all mathematical equations and multi-line calculations in Markdown code blocks for clarity and proper rendering.
32
- - Illegibility: If a specific word or number is impossible to read, use the placeholder [illegible].
33
- - Graphs: Do not attempt to recreate graphs. Instead, describe them textually. Note the type of curve (e.g., parabola, polynomial) and list any labeled key points like intercepts, vertices, or asymptotes.
 
 
 
 
 
 
 
34
  """
35
-
36
- # Full 9-rule grading prompt (point 9 is Presentation; "Calculators" section removed)
37
- GRADING_PROMPT = """Instructions to Examiners
38
-
39
  Abbreviations:
40
  - M: Marks awarded for attempting to use a correct Method.
41
  - A: Marks awarded for an Answer or for Accuracy; often dependent on preceding M marks.
42
  - R: Marks awarded for clear Reasoning.
43
  - AG: Answer given in the question and so no marks are awarded.
44
  - FT: Follow through. The practice of awarding marks, despite candidate errors in previous parts, for their correct methods/answers using incorrect results.
45
-
46
  --------------------------------------------
47
- Using the Markscheme
48
-
49
  ## 1. General
50
  Award marks using the annotations as noted in the markscheme (e.g., M1, A2).
51
-
52
  ## 2. Method and Answer/Accuracy marks
53
- - Do not automatically award full marks for a correct answer; all working must be checked, and marks awarded according to the markscheme.
54
- - It is generally not possible to award M0 followed by A1, as A marks depend on the preceding M mark(s).
55
- - Where M and A marks are noted on the same line, e.g. M1A1, this usually means:
56
- - M1: attempt to use an appropriate method (e.g. substitution into a formula).
57
- - A1: correct values used.
58
- - Where there are two or more A marks on the same line, they may be awarded independently; so if the first value is incorrect, but the next two are correct, award A0A1A1.
59
- - Where the markscheme specifies A3, M2 etc., do not split the marks unless a note allows it.
60
- - The response to a “show that” question does not need to restate the AG line, unless a Note makes this explicit.
61
- - Once a correct answer is seen, ignore further working, even if incorrect, unless that incorrect working is then used in a later part. In such cases, award FT marks as appropriate but withhold the final A1.
62
-
63
  ## 3. Implied marks
64
- Implied marks appear in brackets, e.g. (M1), and can only be awarded if correct work is seen or implied by subsequent working/answer.
65
-
66
  ## 4. Follow through (FT) marks
67
- - FT marks are awarded where an incorrect answer from one part is used correctly in later parts.
68
- - Usually, working must be present. However, if all marks in a part are for the final answer or implied, then FT marks may be given even with no working.
69
- - Within a question part, once an error is made, no further A marks can be awarded for work using the error, but M marks may still be awarded.
70
- - If the question becomes simpler due to the error, use discretion to award fewer FT marks.
71
- - If the error gives an inappropriate value (e.g., probability > 1, non-integer where integer required), do not award the final A mark.
72
- - If the candidate’s answer clearly contradicts information given in the question, FT marks should not be awarded.
73
- - Exceptions to these rules will be explicitly noted in the markscheme.
74
-
75
  ## 5. Mis-read (MR)
76
- - If a candidate copies values incorrectly from the question, this is a misread (MR). Penalize only once for that misread.
77
- - Do not award the first mark (even if it is an M mark), but award all others as appropriate.
78
- - If the question becomes much simpler due to MR, use discretion to award fewer marks.
79
- - If MR leads to an inappropriate value (e.g., probability > 1), do not award the final A mark.
80
- - Mis-copying their own work is an error, not MR.
81
- - MR can only be applied when working is seen.
82
-
83
  ## 6. Alternative methods
84
- - Unless the question specifies a method, other correct methods should be credited.
85
- - If the command term is “Hence” (and not “Hence or otherwise”), alternative methods are not permitted unless explicitly noted.
86
- - Alternative methods for whole questions are labeled METHOD 1, METHOD 2, etc.
87
- - Alternative solutions for parts are labeled EITHER … OR.
88
-
89
  ## 7. Alternative forms
90
- - Accept equivalent forms unless the question specifies otherwise.
91
- - Accept international formats (e.g., 1.9 = 1,9 = 1·9; 1000 = 1,000 = 1.000).
92
- - Do not accept final answers in calculator notation. Intermediate working may use it if it shows the required demand.
93
- - Equivalent algebraic/numeric answers may appear in brackets in the markscheme, but examiners should use discretion for equivalence.
94
-
95
  ## 8. Format and accuracy of answers
96
- - If accuracy is specified in the question, marks depend on the answer being given to that accuracy.
97
- - Otherwise, numerical answers should be exact or correct to 3 significant figures.
98
- - If values are carried forward, candidates may use the exact value or the correct 3 s.f. version.
99
-
100
- **Simplification rules:**
101
- - Arithmetic should be simplified where possible.
102
- - Example: 25/4 should be written as 6.25.
103
- - Example: 10/5 should be written as 2.
104
- - Fractions do not need to be in lowest terms, but numerator and denominator must be integers.
105
- - Algebra should be simplified:
106
- - Example: 4e × e^x → 4e^(x+1).
107
- - Example: x² × x³ × x⁴ → x⁹.
108
- - Factorized vs. expanded forms: both acceptable unless otherwise specified.
109
- - Intermediate A marks do **not** need to be simplified.
110
-
111
  ## 9. Presentation of candidate work
112
- - Crossed-out work: do not mark unless candidate indicates it should be considered.
113
- - More than one solution: mark only the first response unless candidate specifies otherwise.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  """
 
 
 
 
 
115
 
116
- # ---------- HELPER: Save to PDF using markdown-pdf ----------
117
  def save_as_pdf(text, filename="output.pdf"):
118
  pdf = MarkdownPdf()
119
  pdf.add_section(Section(text, toc=False))
120
  pdf.save(filename)
121
  return filename
122
 
123
- # ---------- STEP 1: TRANSCRIPTION ----------
124
- def transcribe(ans_file):
125
  try:
126
- ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
127
- model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- resp = model.generate_content([TRANSCRIPTION_PROMPT, ans_uploaded])
 
 
 
130
 
131
- transcription = getattr(resp, "text", None)
132
- if not transcription and resp.candidates:
133
- transcription = resp.candidates[0].content.parts[0].text
134
 
135
- pdf_path = save_as_pdf(transcription, "transcription.pdf")
136
- return transcription, pdf_path
137
- except Exception as e:
138
- return f"❌ Error during transcription: {e}", None
 
 
 
 
 
 
139
 
140
- # ---------- STEP 2: GRADING ----------
141
- def grade(qp_file, ms_file, transcription):
142
- try:
143
- qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
144
- ms_uploaded = genai.upload_file(path=ms_file, display_name="Marking Scheme")
145
- model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
146
-
147
- # Prompt that embeds the full 9 rules and enforces a structured grading table
148
- structured_instructions = (
149
- "You are an official examiner. Use the following grading rules strictly:\n\n"
150
- f"{GRADING_PROMPT}\n\n"
151
- "OUTPUT FORMAT (use GitHub-flavored Markdown table):\n\n"
152
- "| Student wrote | Marks Awarded | Reason (reference the rules; specify error type: A : All Good , B : Silly Mistake , C : Conceptual Error , D : Hard question , E : Not Applicable) |\n"
153
- "|---|---|---|\n"
154
- "Then, after the table, provide a short 'Summary & Final Mark' section with totals and any FT usage noted.\n\n"
155
- "Guidelines:\n"
156
- "1) Apply marks exactly as per the markscheme.\n"
157
- "2) Justify each awarded or withheld mark with explicit references to the numbered rules.\n"
158
- "3) Classify all errors (Conceptual Error, Silly Mistake, Misread, or None).\n"
159
- "4) Enforce dependency between M and A marks (no A awarded if M not earned) and indicate FT when applied.\n"
160
- "5) Do not invent marks that are not present in the markscheme.\n"
161
- "6) Provide step-by-step reasoning for each mark awarded or withheld.\n"
162
- )
163
 
 
164
  response = model.generate_content([
165
- structured_instructions,
166
- qp_uploaded, # uploaded question paper
167
- ms_uploaded, # uploaded marking scheme
168
- transcription # student's transcription
169
  ])
170
-
171
  grading = getattr(response, "text", None)
172
  if not grading and response.candidates:
173
  grading = response.candidates[0].content.parts[0].text
174
 
175
- pdf_path = save_as_pdf(grading, "grading.pdf")
176
- return grading, pdf_path
 
 
 
 
177
  except Exception as e:
178
- return f"❌ Error during grading: {e}", None
179
 
180
  # ---------- GRADIO APP ----------
181
- with gr.Blocks(title="LeadIB AI Grading") as demo:
182
- gr.Markdown("## LeadIB AI Grading\nUpload exam documents to transcribe and grade student answers step by step.")
183
 
184
  with gr.Row():
185
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
186
- ms_file = gr.File(label="Upload Mark Scheme (PDF)", type="filepath")
187
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
188
 
189
- # Step 1: Transcription
190
- transcribe_btn = gr.Button("Step 1: Transcribe Answer Sheet")
191
  with gr.Row():
192
- transcription_out = gr.Textbox(label="📄 Student Transcription", lines=20)
193
- transcription_pdf = gr.File(label="⬇️ Download Transcription (PDF)")
194
 
195
- # Step 2: Grading
196
- grade_btn = gr.Button("Step 2: Grade the Student")
197
  with gr.Row():
198
- grading_out = gr.Textbox(label="✅ Grading Report (Step-by-Step)", lines=20)
199
- grading_pdf = gr.File(label="⬇️ Download Grading (PDF)")
200
-
201
- # Button Logic
202
- transcribe_btn.click(
203
- fn=transcribe,
204
- inputs=[ans_file],
205
- outputs=[transcription_out, transcription_pdf],
206
- show_progress=True
207
- )
208
- grade_btn.click(
209
- fn=grade,
210
- inputs=[qp_file, ms_file, transcription_out],
211
- outputs=[grading_out, grading_pdf],
212
  show_progress=True
213
  )
214
 
 
2
  import gradio as gr
3
  import google.generativeai as genai
4
  from markdown_pdf import MarkdownPdf, Section
5
+ import pikepdf
 
 
6
 
7
  # ---------- PROMPTS ----------
8
+ PROMPTS = {
9
+ "ALIGNMENT_PROMPT": {
10
+ "role": "system",
11
+ "content": """Your Role: You are an expert examiner and transcription specialist.
12
+ Your task is to **align three sources**:
13
+ - Question Paper (QP)
14
+ - Markscheme (MS)
15
+ - Student Answer Sheet (AS)
16
+ ### Instructions
17
+ 1. Parse all documents carefully and align them **per question and sub-question**.
18
+ 2. For each question/sub-question, produce a structured block:
19
+ ---
20
+ ## Question X (and sub-question if applicable)
21
+ **QP:** [Insert the exact question text]
22
+ **MS:** [Insert the relevant part of the markscheme]
23
+ **AS:** [Insert the student's final cleaned answer transcription]
24
+ ---
25
+ 3. Formatting Rules:
26
+ - Use `##` for main questions and `###` for sub-questions.
27
+ - Write **QP | MS | AS** exactly in that order.
28
+ - Preserve all mathematical expressions inside fenced code blocks.
29
+ - Do not re-create diagrams/graphs. Write `[Graph omitted]`.
30
+ - If part of the student's answer is unreadable, write `[illegible]`.
31
+ - If a student skipped a question, write `[No response]`.
32
+ - Keep MS annotations (M1, A1, R1, etc.) exactly as in the original.
33
+ 4. Output must be **clean, deterministic, and consistent** — so that another model can grade directly using this aligned representation.
34
+ ### Example
35
+ ## Question 1
36
+ **QP:** Expand `(1+x)^3`
37
+ **MS:** M1 for binomial expansion, A1 for coefficients, A1 for final form
38
+ **AS:**
39
  """
40
+ },
41
+ "GRADING_PROMPT": {
42
+ "role": "system",
43
+ "content": """You are an official examiner. Use the following grading rules strictly.
44
  Abbreviations:
45
  - M: Marks awarded for attempting to use a correct Method.
46
  - A: Marks awarded for an Answer or for Accuracy; often dependent on preceding M marks.
47
  - R: Marks awarded for clear Reasoning.
48
  - AG: Answer given in the question and so no marks are awarded.
49
  - FT: Follow through. The practice of awarding marks, despite candidate errors in previous parts, for their correct methods/answers using incorrect results.
 
50
  --------------------------------------------
 
 
51
  ## 1. General
52
  Award marks using the annotations as noted in the markscheme (e.g., M1, A2).
 
53
  ## 2. Method and Answer/Accuracy marks
54
+ - Do not automatically award full marks for a correct answer; all working must be checked.
55
+ - It is generally not possible to award M0 followed by A1.
56
+ - Where M and A marks are noted on the same line (M1A1), M is for method, A is for accuracy.
57
+ - Multiple A marks can be independent.
 
 
 
 
 
 
58
  ## 3. Implied marks
59
+ Implied marks (M1) can only be awarded if correct work is seen or implied.
 
60
  ## 4. Follow through (FT) marks
61
+ - Award FT if an earlier wrong answer is used consistently later.
62
+ - Do not award FT if the result contradicts the question (e.g., probability > 1).
 
 
 
 
 
 
63
  ## 5. Mis-read (MR)
64
+ - Penalize once if the candidate misreads a value.
65
+ - Award other marks as appropriate.
 
 
 
 
 
66
  ## 6. Alternative methods
67
+ - Accept valid alternatives unless "Hence" forbids it.
 
 
 
 
68
  ## 7. Alternative forms
69
+ - Accept equivalent numeric/algebraic forms unless specified otherwise.
 
 
 
 
70
  ## 8. Format and accuracy of answers
71
+ - Use correct accuracy (3 s.f. if not specified).
72
+ - Arithmetic and algebra should be simplified.
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ## 9. Presentation of candidate work
74
+ - Ignore crossed-out work unless indicated.
75
+ - Mark only the first solution unless candidate specifies otherwise.
76
+ ## 10. Graph/Diagram Questions
77
+ - If a question requires drawing or interpreting a graph/diagram, assume the student has done it correctly and award full marks for that part.
78
+ --------------------------------------------
79
+ ### OUTPUT FORMAT
80
+ Produce a GitHub-flavored Markdown table with 3 columns:
81
+ | Student wrote | Marks Awarded | Reason |
82
+ |---------------|---------------|--------|
83
+ Special Formatting Rule:
84
+ - Whenever a mark is lost (M0, A0, R0 etc.), wrap it in red using: `<span style="color:red">M0</span>`.
85
+ - Also wrap the corresponding Reason in red color.
86
+ - Keep awarded marks (M1, A1, etc.) in plain text.
87
+ - If mixed (e.g., M1A0A1), only highlight the lost marks (`A0`) and its reason.
88
+ After the table, provide:
89
+ ### Summary & Final Mark
90
+ - Total marks obtained vs total available
91
+ - Any FT (follow-through) applied
92
+ - Classification of errors (Conceptual, Silly mistake, Misread, etc.)
93
  """
94
+ }
95
+ }
96
+
97
+ # -------------------- CONFIG --------------------
98
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
99
 
100
+ # ---------- HELPER: Save to PDF ----------
101
  def save_as_pdf(text, filename="output.pdf"):
102
  pdf = MarkdownPdf()
103
  pdf.add_section(Section(text, toc=False))
104
  pdf.save(filename)
105
  return filename
106
 
107
+ # ---------- HELPER: PDF Compression ----------
108
+ def compress_pdf(input_path, output_path):
109
  try:
110
+ pdf = pikepdf.open(input_path)
111
+ pdf.save(output_path, optimize_version=True)
112
+ pdf.close()
113
+ return output_path
114
+ except Exception as e:
115
+ print(f"❌ Compression failed for {input_path}: {e}")
116
+ return input_path
117
+
118
+ def check_and_compress(path):
119
+ if os.path.getsize(path) > 20 * 1024 * 1024: # 20 MB
120
+ compressed_path = os.path.splitext(path)[0] + "_compressed.pdf"
121
+ print(f"⚡ Compressing {os.path.basename(path)} (>20MB)...")
122
+ return compress_pdf(path, compressed_path)
123
+ return path
124
+
125
+ # ---------- HELPER: Create Model with Fallback ----------
126
+ def create_model():
127
+ try:
128
+ print("⚡ Using gemini-2.5-pro model")
129
+ return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
130
+ except Exception:
131
+ print("⚡ Falling back to gemini-2.5-flash model")
132
+ return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
133
+
134
+ # ---------- PIPELINE: ALIGN + GRADE ----------
135
+ def align_and_grade(qp_file, ms_file, ans_file):
136
+ try:
137
+ # Ensure files are <20MB
138
+ qp_file = check_and_compress(qp_file)
139
+ ms_file = check_and_compress(ms_file)
140
+ ans_file = check_and_compress(ans_file)
141
 
142
+ # Uploads
143
+ qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
144
+ ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
145
+ ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
146
 
147
+ model = create_model()
 
 
148
 
149
+ # Step 1: Alignment
150
+ resp = model.generate_content([
151
+ PROMPTS["ALIGNMENT_PROMPT"]["content"],
152
+ qp_uploaded,
153
+ ms_uploaded,
154
+ ans_uploaded
155
+ ])
156
+ aligned_text = getattr(resp, "text", None)
157
+ if not aligned_text and resp.candidates:
158
+ aligned_text = resp.candidates[0].content.parts[0].text
159
 
160
+ aligned_pdf_path = save_as_pdf(aligned_text, "aligned_qp_ms_as.pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ # Step 2: Grading (automatic)
163
  response = model.generate_content([
164
+ PROMPTS["GRADING_PROMPT"]["content"],
165
+ aligned_text
 
 
166
  ])
 
167
  grading = getattr(response, "text", None)
168
  if not grading and response.candidates:
169
  grading = response.candidates[0].content.parts[0].text
170
 
171
+ # Save grading report with student's answer filename
172
+ base_name = os.path.splitext(os.path.basename(ans_file))[0]
173
+ grading_pdf_path = save_as_pdf(grading, f"{base_name}_graded.pdf")
174
+
175
+ return aligned_text, aligned_pdf_path, grading, grading_pdf_path
176
+
177
  except Exception as e:
178
+ return f"❌ Error: {e}", None, None, None
179
 
180
  # ---------- GRADIO APP ----------
181
+ with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading)") as demo:
182
+ gr.Markdown("## LeadIB AI Grading\nUpload Question Paper, Markscheme, and Student Answer Sheet.\nThe system will align and grade automatically.")
183
 
184
  with gr.Row():
185
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
186
+ ms_file = gr.File(label="Upload Markscheme (PDF)", type="filepath")
187
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
188
 
189
+ run_btn = gr.Button("Start Alignment + Auto-Grading")
190
+
191
  with gr.Row():
192
+ aligned_out = gr.Textbox(label="📄 Aligned QP | MS | AS", lines=20)
193
+ aligned_pdf = gr.File(label="⬇️ Download Aligned (PDF)")
194
 
 
 
195
  with gr.Row():
196
+ grading_out = gr.Textbox(label="✅ Grading Report", lines=20)
197
+ grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
198
+
199
+ run_btn.click(
200
+ fn=align_and_grade,
201
+ inputs=[qp_file, ms_file, ans_file],
202
+ outputs=[aligned_out, aligned_pdf, grading_out, grading_pdf],
 
 
 
 
 
 
 
203
  show_progress=True
204
  )
205