prithivMLmods commited on
Commit
df399cc
·
verified ·
1 Parent(s): 8cbe68c

update app

Browse files
Files changed (1) hide show
  1. app.py +298 -249
app.py CHANGED
@@ -67,7 +67,6 @@ MODEL_MAP = {
67
  "Captioner-7B-Qwen2.5VL": (processor_z, model_z),
68
  "visionOCR-3B": (processor_v, model_v),
69
  }
70
-
71
  MODEL_CHOICES = list(MODEL_MAP.keys())
72
 
73
  image_examples = [
@@ -76,6 +75,12 @@ image_examples = [
76
  ]
77
 
78
 
 
 
 
 
 
 
79
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
80
  buf = BytesIO()
81
  img.save(buf, format=fmt)
@@ -134,22 +139,25 @@ EXAMPLE_CARDS_HTML = build_example_cards_html()
134
 
135
  def load_example_data(idx_str):
136
  try:
137
- idx = int(float(idx_str))
138
  except Exception:
139
- return json.dumps({"status": "error", "message": "Invalid example index"})
 
140
  if idx < 0 or idx >= len(image_examples):
141
- return json.dumps({"status": "error", "message": "Example index out of range"})
 
142
  ex = image_examples[idx]
143
  media_b64 = file_to_data_url(ex["media"])
144
  if not media_b64:
145
- return json.dumps({"status": "error", "message": "Could not load example image"})
146
- return json.dumps({
 
147
  "status": "ok",
148
  "query": ex["query"],
149
  "media": media_b64,
150
  "model": ex["model"],
151
  "name": os.path.basename(ex["media"]),
152
- })
153
 
154
 
155
  def b64_to_pil(b64_str):
@@ -166,88 +174,132 @@ def b64_to_pil(b64_str):
166
  return None
167
 
168
 
169
- def calc_timeout_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty, gpu_timeout):
 
 
 
170
  try:
171
  return int(gpu_timeout)
172
  except Exception:
173
  return 60
174
 
175
 
176
- @spaces.GPU(duration=calc_timeout_image)
177
  def generate_image(model_name, text, image, max_new_tokens=1024, temperature=0.6, top_p=0.9, top_k=50, repetition_penalty=1.2, gpu_timeout=60):
178
- if not model_name or model_name not in MODEL_MAP:
179
- raise gr.Error("Please select a valid model.")
180
- if image is None:
181
- raise gr.Error("Please upload an image.")
182
- if not text or not str(text).strip():
183
- raise gr.Error("Please enter your instruction.")
184
- if len(str(text)) > MAX_INPUT_TOKEN_LENGTH * 8:
185
- raise gr.Error("Query is too long. Please shorten your input.")
186
-
187
- processor, model = MODEL_MAP[model_name]
188
-
189
- messages = [{
190
- "role": "user",
191
- "content": [
192
- {"type": "image"},
193
- {"type": "text", "text": text},
194
- ]
195
- }]
196
-
197
- prompt_full = processor.apply_chat_template(
198
- messages,
199
- tokenize=False,
200
- add_generation_prompt=True
201
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- inputs = processor(
204
- text=[prompt_full],
205
- images=[image],
206
- return_tensors="pt",
207
- padding=True,
208
- truncation=True,
209
- max_length=MAX_INPUT_TOKEN_LENGTH
210
- ).to(device)
211
-
212
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
213
- generation_kwargs = {
214
- **inputs,
215
- "streamer": streamer,
216
- "max_new_tokens": int(max_new_tokens),
217
- "do_sample": True,
218
- "temperature": float(temperature),
219
- "top_p": float(top_p),
220
- "top_k": int(top_k),
221
- "repetition_penalty": float(repetition_penalty),
222
- }
 
 
 
 
 
 
 
 
 
 
 
223
 
224
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
225
- thread.start()
226
-
227
- buffer = ""
228
- for new_text in streamer:
229
- buffer += new_text.replace("<|im_end|>", "")
230
- time.sleep(0.01)
231
- yield buffer
232
-
233
- gc.collect()
234
- if torch.cuda.is_available():
235
- torch.cuda.empty_cache()
236
-
237
-
238
- def run_inference(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
239
- image = b64_to_pil(image_b64)
240
- yield from generate_image(
241
- model_name=model_name,
242
- text=text,
243
- image=image,
244
- max_new_tokens=max_new_tokens_v,
245
- temperature=temperature_v,
246
- top_p=top_p_v,
247
- top_k=top_k_v,
248
- repetition_penalty=repetition_penalty_v,
249
- gpu_timeout=gpu_timeout_v,
250
- )
251
 
252
 
253
  def noop():
@@ -275,7 +327,7 @@ footer{display:none!important}
275
 
276
  .app-shell{
277
  background:#18181b;border:1px solid #27272a;border-radius:16px;
278
- margin:12px auto;max-width:1400px;overflow:hidden;
279
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
280
  }
281
  .app-header{
@@ -315,23 +367,19 @@ footer{display:none!important}
315
 
316
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
317
  .app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
318
- .app-main-right{width:470px;display:flex;flex-direction:column;flex-shrink:0;background:#18181b}
319
 
320
  #media-drop-zone{
321
- position:relative;background:#09090b;height:440px;min-height:440px;max-height:440px;
322
- overflow:hidden;
323
  }
324
  #media-drop-zone.drag-over{outline:2px solid #FF1493;outline-offset:-2px;background:rgba(255,20,147,.04)}
325
  .upload-prompt-modern{
326
- position:absolute;inset:0;display:flex;align-items:center;justify-content:center;
327
- padding:20px;z-index:20;overflow:hidden;
328
  }
329
  .upload-click-area{
330
- display:flex;flex-direction:column;align-items:center;justify-content:center;
331
- cursor:pointer;padding:28px 36px;max-width:92%;max-height:92%;
332
- border:2px dashed #3f3f46;border-radius:16px;
333
- background:rgba(255,20,147,.03);transition:all .2s ease;gap:8px;text-align:center;
334
- overflow:hidden;
335
  }
336
  .upload-click-area:hover{background:rgba(255,20,147,.08);border-color:#FF1493;transform:scale(1.02)}
337
  .upload-click-area:active{background:rgba(255,20,147,.12);transform:scale(.99)}
@@ -340,26 +388,21 @@ footer{display:none!important}
340
  .upload-sub-text{color:#71717a;font-size:12px}
341
 
342
  .single-preview-wrap{
343
- width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;
344
- overflow:hidden;
345
  }
346
  .single-preview-card{
347
- width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;
348
- overflow:hidden;border:1px solid #27272a;background:#111114;
349
  display:flex;align-items:center;justify-content:center;position:relative;
350
  }
351
  .single-preview-card img{
352
- width:100%;height:100%;max-width:100%;max-height:100%;
353
- object-fit:contain;display:block;background:#000;
354
  }
355
  .preview-overlay-actions{
356
  position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5;
357
  }
358
  .preview-action-btn{
359
- display:inline-flex;align-items:center;justify-content:center;
360
- min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
361
- border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;
362
- color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
363
  }
364
  .preview-action-btn:hover{background:#FF1493;border-color:#FF1493}
365
 
@@ -369,14 +412,13 @@ footer{display:none!important}
369
  }
370
  .hint-bar b{color:#ff7ac7;font-weight:600}
371
  .hint-bar kbd{
372
- display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;
373
- border-radius:4px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
374
  }
375
 
376
  .examples-section{border-top:1px solid #27272a;padding:12px 16px}
377
  .examples-title{
378
- font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;
379
- letter-spacing:.8px;margin-bottom:10px;
380
  }
381
  .examples-scroll{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}
382
  .examples-scroll::-webkit-scrollbar{height:6px}
@@ -384,43 +426,36 @@ footer{display:none!important}
384
  .examples-scroll::-webkit-scrollbar-thumb{background:#27272a;border-radius:3px}
385
  .examples-scroll::-webkit-scrollbar-thumb:hover{background:#3f3f46}
386
  .example-card{
387
- position:relative;
388
- flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;
389
- border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
390
  }
391
  .example-card:hover{border-color:#FF1493;transform:translateY(-2px);box-shadow:0 4px 12px rgba(255,20,147,.15)}
392
  .example-card.loading{opacity:.5;pointer-events:none}
393
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b;position:relative}
394
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
395
  .example-media-chip{
396
- position:absolute;top:8px;left:8px;
397
- display:inline-flex;padding:3px 7px;background:rgba(0,0,0,.7);border:1px solid rgba(255,255,255,.12);
398
  border-radius:999px;font-size:10px;font-weight:700;color:#fff;letter-spacing:.5px;
399
  }
400
  .example-thumb-placeholder{
401
- width:100%;height:100%;display:flex;align-items:center;justify-content:center;
402
- background:#18181b;color:#3f3f46;font-size:11px;
403
  }
404
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
405
  .example-badge{
406
- display:inline-flex;padding:2px 7px;background:rgba(255,20,147,.12);border-radius:4px;
407
- font-size:10px;font-weight:600;color:#ff7ac7;font-family:'JetBrains Mono',monospace;white-space:nowrap;
408
  }
409
  .example-prompt-text{
410
- padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;
411
- display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden;
412
  }
413
 
414
  .panel-card{border-bottom:1px solid #27272a}
415
  .panel-card-title{
416
- padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;
417
- text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
418
  }
419
  .panel-card-body{padding:16px 20px;display:flex;flex-direction:column;gap:8px}
420
  .modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
421
  .modern-textarea{
422
- width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;
423
- padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
424
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
425
  }
426
  .modern-textarea:focus{border-color:#FF1493;box-shadow:0 0 0 3px rgba(255,20,147,.15)}
@@ -431,10 +466,8 @@ footer{display:none!important}
431
  @keyframes shake{0%,100%{transform:translateX(0)}20%,60%{transform:translateX(-4px)}40%,80%{transform:translateX(4px)}}
432
 
433
  .toast-notification{
434
- position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);
435
- z-index:9999;padding:10px 24px;border-radius:10px;font-family:'Inter',sans-serif;
436
- font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;
437
- box-shadow:0 8px 24px rgba(0,0,0,.5);
438
  transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
439
  }
440
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
@@ -445,16 +478,12 @@ footer{display:none!important}
445
  .toast-notification .toast-text{line-height:1.3}
446
 
447
  .btn-run{
448
- display:flex;align-items:center;justify-content:center;gap:8px;width:100%;
449
- background:linear-gradient(135deg,#FF1493,#D10073);border:none;border-radius:10px;
450
- padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'Inter',sans-serif;
451
- color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
452
- transition:all .2s ease;letter-spacing:-.2px;
453
- box-shadow:0 4px 16px rgba(255,20,147,.3),inset 0 1px 0 rgba(255,255,255,.1);
454
  }
455
  .btn-run:hover{
456
- background:linear-gradient(135deg,#ff4db2,#FF1493);transform:translateY(-1px);
457
- box-shadow:0 6px 24px rgba(255,20,147,.45),inset 0 1px 0 rgba(255,255,255,.15);
458
  }
459
  .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(255,20,147,.3)}
460
  #custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run,.btn-run *{
@@ -462,97 +491,74 @@ footer{display:none!important}
462
  }
463
 
464
  .output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}
465
- .output-frame .out-title,
466
- .output-frame .out-title *,
467
- #output-title-label{
468
- color:#ffffff!important;
469
- -webkit-text-fill-color:#ffffff!important;
470
  }
471
  .output-frame .out-title{
472
- padding:10px 20px;font-size:13px;font-weight:700;
473
- text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
474
  display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
475
  }
476
  .out-title-right{display:flex;gap:8px;align-items:center}
477
  .out-action-btn{
478
- display:inline-flex;align-items:center;justify-content:center;background:rgba(255,20,147,.1);
479
- border:1px solid rgba(255,20,147,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
480
  font-size:11px;font-weight:500;color:#ff7ac7!important;gap:4px;height:24px;transition:all .15s;
481
  }
482
  .out-action-btn:hover{background:rgba(255,20,147,.2);border-color:rgba(255,20,147,.35);color:#ffffff!important}
483
  .out-action-btn svg{width:12px;height:12px;fill:#ff7ac7}
484
  .output-frame .out-body{
485
- flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;
486
- overflow:hidden;min-height:320px;position:relative;
487
- }
488
- .output-scroll-wrap{
489
- width:100%;height:100%;padding:0;overflow:hidden;
490
  }
 
491
  .output-textarea{
492
- width:100%;height:320px;min-height:320px;max-height:320px;background:#09090b;color:#e4e4e7;
493
- border:none;outline:none;padding:16px 18px;font-size:13px;line-height:1.6;
494
  font-family:'JetBrains Mono',monospace;overflow:auto;resize:none;white-space:pre-wrap;
495
  }
496
  .output-textarea::placeholder{color:#52525b}
497
- .output-textarea.error-flash{
498
- box-shadow:inset 0 0 0 2px rgba(239,68,68,.6);
499
- }
500
  .modern-loader{
501
- display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);
502
- z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
503
  }
504
  .modern-loader.active{display:flex}
505
  .modern-loader .loader-spinner{
506
- width:36px;height:36px;border:3px solid #27272a;border-top-color:#FF1493;
507
- border-radius:50%;animation:spin .8s linear infinite;
508
  }
509
  @keyframes spin{to{transform:rotate(360deg)}}
510
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
511
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
512
  .loader-bar-fill{
513
- height:100%;background:linear-gradient(90deg,#FF1493,#FF69C8,#FF1493);
514
- background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
515
  }
516
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
517
 
518
  .settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}
519
  .settings-group-title{
520
- font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;
521
- padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5);
522
  }
523
  .settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
524
  .slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
525
  .slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}
526
  .slider-row input[type="range"]{
527
- flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;
528
- border-radius:3px;outline:none;min-width:0;
529
  }
530
  .slider-row input[type="range"]::-webkit-slider-thumb{
531
- -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#FF1493,#D10073);
532
- border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(255,20,147,.4);transition:transform .15s;
533
  }
534
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
535
  .slider-row input[type="range"]::-moz-range-thumb{
536
- width:16px;height:16px;background:linear-gradient(135deg,#FF1493,#D10073);
537
- border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(255,20,147,.4);
538
  }
539
  .slider-row .slider-val{
540
- min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;
541
- font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;
542
- border-radius:6px;color:#a1a1aa;flex-shrink:0;
543
  }
544
 
545
  .app-statusbar{
546
- background:#18181b;border-top:1px solid #27272a;padding:6px 20px;
547
- display:flex;gap:12px;height:34px;align-items:center;font-size:12px;
548
  }
549
  .app-statusbar .sb-section{
550
- padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;
551
- font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap;
552
  }
553
  .app-statusbar .sb-section.sb-fixed{
554
- flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;
555
- padding:3px 12px;background:rgba(255,20,147,.08);border-radius:6px;color:#ff7ac7;font-weight:500;
556
  }
557
 
558
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
@@ -574,7 +580,7 @@ footer{display:none!important}
574
  gallery_js = r"""
575
  () => {
576
  function init() {
577
- if (window.__docScopeInitDone) return;
578
 
579
  const dropZone = document.getElementById('media-drop-zone');
580
  const uploadPrompt = document.getElementById('upload-prompt');
@@ -594,7 +600,7 @@ function init() {
594
  return;
595
  }
596
 
597
- window.__docScopeInitDone = true;
598
  let mediaState = null;
599
  let toastTimer = null;
600
  let examplePoller = null;
@@ -621,7 +627,6 @@ function init() {
621
  toast.classList.add('visible');
622
  toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
623
  }
624
- window.__showToast = showToast;
625
 
626
  function showLoader() {
627
  const l = document.getElementById('output-loader');
@@ -635,8 +640,16 @@ function init() {
635
  const sb = document.getElementById('sb-run-state');
636
  if (sb) sb.textContent = 'Done';
637
  }
638
- window.__showLoader = showLoader;
 
 
 
 
 
 
639
  window.__hideLoader = hideLoader;
 
 
640
 
641
  function flashPromptError() {
642
  promptInput.classList.add('error-flash');
@@ -659,23 +672,23 @@ function init() {
659
 
660
  function setGradioValue(containerId, value) {
661
  const container = document.getElementById(containerId);
662
- if (!container) return;
663
- container.querySelectorAll('input, textarea').forEach(el => {
664
- if (el.type === 'file' || el.type === 'range' || el.type === 'checkbox') return;
665
- const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
666
- const ns = Object.getOwnPropertyDescriptor(proto, 'value');
667
- if (ns && ns.set) {
668
- ns.set.call(el, value);
669
- el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
670
- el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
671
- }
672
- });
 
673
  }
674
 
675
  function syncImageToGradio() {
676
  setGradioValue('hidden-image-b64', mediaState ? mediaState.b64 : '');
677
- const txt = mediaState ? '1 image uploaded' : 'No image uploaded';
678
- if (mediaStatus) mediaStatus.textContent = txt;
679
  }
680
 
681
  function syncPromptToGradio() {
@@ -696,18 +709,17 @@ function init() {
696
  return;
697
  }
698
 
699
- previewImg.src = mediaState.b64;
700
- previewImg.style.display = 'block';
701
  previewWrap.style.display = 'flex';
702
  if (uploadPrompt) uploadPrompt.style.display = 'none';
 
 
703
  syncImageToGradio();
704
  }
705
 
706
- function setPreview(b64, name) {
707
- mediaState = {b64, name: name || 'file'};
708
  renderPreview();
709
  }
710
- window.__setPreview = setPreview;
711
 
712
  function clearPreview() {
713
  mediaState = null;
@@ -722,7 +734,7 @@ function init() {
722
  return;
723
  }
724
  const reader = new FileReader();
725
- reader.onload = (e) => setPreview(e.target.result, file.name);
726
  reader.readAsDataURL(file);
727
  }
728
 
@@ -758,6 +770,7 @@ function init() {
758
  });
759
  syncModelToGradio(name);
760
  }
 
761
  window.__activateModelTab = activateModelTab;
762
 
763
  document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
@@ -794,8 +807,8 @@ function init() {
794
 
795
  function validateBeforeRun() {
796
  const promptVal = promptInput.value.trim();
797
- if (!mediaState && !promptVal) {
798
- showToast('Please upload an image and enter your instruction', 'error');
799
  flashPromptError();
800
  return false;
801
  }
@@ -803,11 +816,6 @@ function init() {
803
  showToast('Please upload an image', 'error');
804
  return false;
805
  }
806
- if (!promptVal) {
807
- showToast('Please enter your instruction', 'warning');
808
- flashPromptError();
809
- return false;
810
- }
811
  const currentModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
812
  if (!currentModel) {
813
  showToast('Please select a model', 'error');
@@ -826,7 +834,12 @@ function init() {
826
  showLoader();
827
  setTimeout(() => {
828
  const gradioBtn = document.getElementById('gradio-run-btn');
829
- if (!gradioBtn) return;
 
 
 
 
 
830
  const btn = gradioBtn.querySelector('button');
831
  if (btn) btn.click(); else gradioBtn.click();
832
  }, 180);
@@ -878,22 +891,26 @@ function init() {
878
  function applyExamplePayload(raw) {
879
  try {
880
  const data = JSON.parse(raw);
881
- if (data.status === 'ok') {
882
- if (data.media) setPreview(data.media, data.name || 'example_file');
883
- if (data.query) {
884
- promptInput.value = data.query;
885
- syncPromptToGradio();
886
- }
887
- if (data.model) activateModelTab(data.model);
888
- document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
889
- showToast('Example loaded', 'info');
890
- } else if (data.status === 'error') {
891
- document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
892
- showToast(data.message || 'Failed to load example', 'error');
893
  }
 
 
 
 
 
 
 
 
 
 
 
894
  } catch (e) {
895
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
896
- showToast('Failed to parse example data', 'error');
897
  }
898
  }
899
 
@@ -910,33 +927,54 @@ function init() {
910
  applyExamplePayload(current);
911
  return;
912
  }
913
- if (attempts >= 80) {
914
  clearInterval(examplePoller);
915
  examplePoller = null;
916
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
917
  showToast('Example load timed out', 'error');
918
  }
919
- }, 150);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  }
921
 
922
  document.querySelectorAll('.example-card[data-idx]').forEach(card => {
923
  card.addEventListener('click', () => {
924
  const idx = card.getAttribute('data-idx');
 
925
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
926
  card.classList.add('loading');
927
  showToast('Loading example...', 'info');
928
-
929
- setGradioValue('example-result-data', '');
930
- setGradioValue('example-idx-input', idx);
931
-
932
- setTimeout(() => {
933
- const btn = document.getElementById('example-load-btn');
934
- if (btn) {
935
- const b = btn.querySelector('button');
936
- if (b) b.click(); else btn.click();
937
- }
938
- startExamplePolling();
939
- }, 220);
940
  });
941
  });
942
 
@@ -944,14 +982,13 @@ function init() {
944
  if (observerTarget) {
945
  const obs = new MutationObserver(() => {
946
  const current = getValueFromContainer('example-result-data');
947
- if (current && current !== lastSeenExamplePayload) {
948
- lastSeenExamplePayload = current;
949
- if (examplePoller) {
950
- clearInterval(examplePoller);
951
- examplePoller = null;
952
- }
953
- applyExamplePayload(current);
954
  }
 
955
  });
956
  obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
957
  }
@@ -974,6 +1011,10 @@ function watchOutputs() {
974
 
975
  let lastText = '';
976
 
 
 
 
 
977
  function syncOutput() {
978
  const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
979
  if (!el) return;
@@ -982,7 +1023,15 @@ function watchOutputs() {
982
  lastText = val;
983
  outArea.value = val;
984
  outArea.scrollTop = outArea.scrollHeight;
985
- if (window.__hideLoader && val.trim()) window.__hideLoader();
 
 
 
 
 
 
 
 
986
  }
987
  }
988
 
@@ -1057,7 +1106,7 @@ with gr.Blocks() as demo:
1057
  <div id="upload-click-area" class="upload-click-area">
1058
  {UPLOAD_PREVIEW_SVG}
1059
  <span id="upload-main-text" class="upload-main-text">Click or drag an image here</span>
1060
- <span id="upload-sub-text" class="upload-sub-text">Upload one document, page, screenshot, receipt, or scene image for OCR and reasoning</span>
1061
  </div>
1062
  </div>
1063
 
@@ -1075,8 +1124,8 @@ with gr.Blocks() as demo:
1075
  </div>
1076
 
1077
  <div class="hint-bar">
1078
- <b>Upload:</b> Click or drag an image into the panel &nbsp;&middot;&nbsp;
1079
- <b>Model:</b> Change models from the header &nbsp;&middot;&nbsp;
1080
  <kbd>Clear</kbd> removes the current image
1081
  </div>
1082
 
@@ -1090,10 +1139,10 @@ with gr.Blocks() as demo:
1090
 
1091
  <div class="app-main-right">
1092
  <div class="panel-card">
1093
- <div class="panel-card-title">Vision / OCR Instruction</div>
1094
  <div class="panel-card-body">
1095
- <label class="modern-label" for="custom-query-input">Query Input</label>
1096
- <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., perform OCR on this image, describe the document, identify visible text, analyze the scene..."></textarea>
1097
  </div>
1098
  </div>
1099
 
@@ -1162,7 +1211,7 @@ with gr.Blocks() as demo:
1162
  </div>
1163
 
1164
  <div class="exp-note">
1165
- Experimental document vision suite &middot; Open on <a href="https://github.com/PRITHIVSAKTHIUR/DocScope-R1" target="_blank">GitHub</a>
1166
  </div>
1167
 
1168
  <div class="app-statusbar">
@@ -1178,7 +1227,7 @@ with gr.Blocks() as demo:
1178
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1179
 
1180
  run_btn.click(
1181
- fn=run_inference,
1182
  inputs=[
1183
  hidden_model_name,
1184
  prompt,
@@ -1216,7 +1265,7 @@ with gr.Blocks() as demo:
1216
  )
1217
 
1218
  if __name__ == "__main__":
1219
- demo.queue(max_size=30).launch(
1220
  css=css,
1221
  mcp_server=True,
1222
  ssr_mode=False,
 
67
  "Captioner-7B-Qwen2.5VL": (processor_z, model_z),
68
  "visionOCR-3B": (processor_v, model_v),
69
  }
 
70
  MODEL_CHOICES = list(MODEL_MAP.keys())
71
 
72
  image_examples = [
 
75
  ]
76
 
77
 
78
+ def select_model(model_name: str):
79
+ if model_name not in MODEL_MAP:
80
+ raise ValueError("Invalid model selected.")
81
+ return MODEL_MAP[model_name]
82
+
83
+
84
  def pil_to_data_url(img: Image.Image, fmt="PNG"):
85
  buf = BytesIO()
86
  img.save(buf, format=fmt)
 
139
 
140
  def load_example_data(idx_str):
141
  try:
142
+ idx = int(str(idx_str).strip())
143
  except Exception:
144
+ return gr.update(value="")
145
+
146
  if idx < 0 or idx >= len(image_examples):
147
+ return gr.update(value="")
148
+
149
  ex = image_examples[idx]
150
  media_b64 = file_to_data_url(ex["media"])
151
  if not media_b64:
152
+ return gr.update(value=json.dumps({"status": "error", "message": "Could not load example image"}))
153
+
154
+ return gr.update(value=json.dumps({
155
  "status": "ok",
156
  "query": ex["query"],
157
  "media": media_b64,
158
  "model": ex["model"],
159
  "name": os.path.basename(ex["media"]),
160
+ }))
161
 
162
 
163
  def b64_to_pil(b64_str):
 
174
  return None
175
 
176
 
177
+ def calc_timeout_generic(*args, **kwargs):
178
+ gpu_timeout = kwargs.get("gpu_timeout", None)
179
+ if gpu_timeout is None and args:
180
+ gpu_timeout = args[-1]
181
  try:
182
  return int(gpu_timeout)
183
  except Exception:
184
  return 60
185
 
186
 
187
+ @spaces.GPU(duration=calc_timeout_generic)
188
  def generate_image(model_name, text, image, max_new_tokens=1024, temperature=0.6, top_p=0.9, top_k=50, repetition_penalty=1.2, gpu_timeout=60):
189
+ try:
190
+ if not model_name or model_name not in MODEL_MAP:
191
+ yield "[ERROR] Please select a valid model."
192
+ return
193
+ if image is None:
194
+ yield "[ERROR] Please upload an image."
195
+ return
196
+ if not text or not str(text).strip():
197
+ yield "[ERROR] Please enter your instruction."
198
+ return
199
+ if len(str(text)) > MAX_INPUT_TOKEN_LENGTH * 8:
200
+ yield "[ERROR] Query is too long. Please shorten your input."
201
+ return
202
+
203
+ processor, model = select_model(model_name)
204
+
205
+ messages = [{
206
+ "role": "user",
207
+ "content": [
208
+ {"type": "image"},
209
+ {"type": "text", "text": text},
210
+ ]
211
+ }]
212
+
213
+ prompt_full = processor.apply_chat_template(
214
+ messages,
215
+ tokenize=False,
216
+ add_generation_prompt=True
217
+ )
218
+
219
+ inputs = processor(
220
+ text=[prompt_full],
221
+ images=[image],
222
+ return_tensors="pt",
223
+ padding=True,
224
+ truncation=True,
225
+ max_length=MAX_INPUT_TOKEN_LENGTH
226
+ ).to(device)
227
+
228
+ streamer = TextIteratorStreamer(
229
+ processor.tokenizer if hasattr(processor, "tokenizer") else processor,
230
+ skip_prompt=True,
231
+ skip_special_tokens=True
232
+ )
233
+
234
+ generation_error = {"error": None}
235
+
236
+ generation_kwargs = {
237
+ **inputs,
238
+ "streamer": streamer,
239
+ "max_new_tokens": int(max_new_tokens),
240
+ "do_sample": True,
241
+ "temperature": float(temperature),
242
+ "top_p": float(top_p),
243
+ "top_k": int(top_k),
244
+ "repetition_penalty": float(repetition_penalty),
245
+ }
246
 
247
+ def _run_generation():
248
+ try:
249
+ model.generate(**generation_kwargs)
250
+ except Exception as e:
251
+ generation_error["error"] = e
252
+ try:
253
+ streamer.end()
254
+ except Exception:
255
+ pass
256
+
257
+ thread = Thread(target=_run_generation, daemon=True)
258
+ thread.start()
259
+
260
+ buffer = ""
261
+ for new_text in streamer:
262
+ buffer += new_text.replace("<|im_end|>", "")
263
+ time.sleep(0.01)
264
+ yield buffer
265
+
266
+ thread.join(timeout=1.0)
267
+
268
+ if generation_error["error"] is not None:
269
+ err_msg = f"[ERROR] Inference failed: {str(generation_error['error'])}"
270
+ if buffer.strip():
271
+ yield buffer + "\n\n" + err_msg
272
+ else:
273
+ yield err_msg
274
+ return
275
+
276
+ if not buffer.strip():
277
+ yield "[ERROR] No output was generated."
278
 
279
+ except Exception as e:
280
+ yield f"[ERROR] {str(e)}"
281
+ finally:
282
+ gc.collect()
283
+ if torch.cuda.is_available():
284
+ torch.cuda.empty_cache()
285
+
286
+
287
+ def run_router(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
288
+ try:
289
+ image = b64_to_pil(image_b64)
290
+ yield from generate_image(
291
+ model_name=model_name,
292
+ text=text,
293
+ image=image,
294
+ max_new_tokens=max_new_tokens_v,
295
+ temperature=temperature_v,
296
+ top_p=top_p_v,
297
+ top_k=top_k_v,
298
+ repetition_penalty=repetition_penalty_v,
299
+ gpu_timeout=gpu_timeout_v,
300
+ )
301
+ except Exception as e:
302
+ yield f"[ERROR] {str(e)}"
 
 
 
303
 
304
 
305
  def noop():
 
327
 
328
  .app-shell{
329
  background:#18181b;border:1px solid #27272a;border-radius:16px;
330
+ margin:12px auto;max-width:1450px;overflow:hidden;
331
  box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
332
  }
333
  .app-header{
 
367
 
368
  .app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
369
  .app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
370
+ .app-main-right{width:500px;display:flex;flex-direction:column;flex-shrink:0;background:#18181b}
371
 
372
  #media-drop-zone{
373
+ position:relative;background:#09090b;height:440px;min-height:440px;max-height:440px;overflow:hidden;
 
374
  }
375
  #media-drop-zone.drag-over{outline:2px solid #FF1493;outline-offset:-2px;background:rgba(255,20,147,.04)}
376
  .upload-prompt-modern{
377
+ position:absolute;inset:0;display:flex;align-items:center;justify-content:center;padding:20px;z-index:20;overflow:hidden;
 
378
  }
379
  .upload-click-area{
380
+ display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;
381
+ padding:28px 36px;max-width:92%;max-height:92%;border:2px dashed #3f3f46;border-radius:16px;
382
+ background:rgba(255,20,147,.03);transition:all .2s ease;gap:8px;text-align:center;overflow:hidden;
 
 
383
  }
384
  .upload-click-area:hover{background:rgba(255,20,147,.08);border-color:#FF1493;transform:scale(1.02)}
385
  .upload-click-area:active{background:rgba(255,20,147,.12);transform:scale(.99)}
 
388
  .upload-sub-text{color:#71717a;font-size:12px}
389
 
390
  .single-preview-wrap{
391
+ width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;overflow:hidden;
 
392
  }
393
  .single-preview-card{
394
+ width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;overflow:hidden;border:1px solid #27272a;background:#111114;
 
395
  display:flex;align-items:center;justify-content:center;position:relative;
396
  }
397
  .single-preview-card img{
398
+ width:100%;height:100%;max-width:100%;max-height:100%;object-fit:contain;display:block;background:#000;border:none;
 
399
  }
400
  .preview-overlay-actions{
401
  position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5;
402
  }
403
  .preview-action-btn{
404
+ display:inline-flex;align-items:center;justify-content:center;min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
405
+ border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
 
 
406
  }
407
  .preview-action-btn:hover{background:#FF1493;border-color:#FF1493}
408
 
 
412
  }
413
  .hint-bar b{color:#ff7ac7;font-weight:600}
414
  .hint-bar kbd{
415
+ display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;border-radius:4px;
416
+ font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
417
  }
418
 
419
  .examples-section{border-top:1px solid #27272a;padding:12px 16px}
420
  .examples-title{
421
+ font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;margin-bottom:10px;
 
422
  }
423
  .examples-scroll{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}
424
  .examples-scroll::-webkit-scrollbar{height:6px}
 
426
  .examples-scroll::-webkit-scrollbar-thumb{background:#27272a;border-radius:3px}
427
  .examples-scroll::-webkit-scrollbar-thumb:hover{background:#3f3f46}
428
  .example-card{
429
+ position:relative;flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
 
 
430
  }
431
  .example-card:hover{border-color:#FF1493;transform:translateY(-2px);box-shadow:0 4px 12px rgba(255,20,147,.15)}
432
  .example-card.loading{opacity:.5;pointer-events:none}
433
  .example-thumb-wrap{height:120px;overflow:hidden;background:#18181b;position:relative}
434
  .example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
435
  .example-media-chip{
436
+ position:absolute;top:8px;left:8px;display:inline-flex;padding:3px 7px;background:rgba(0,0,0,.7);border:1px solid rgba(255,255,255,.12);
 
437
  border-radius:999px;font-size:10px;font-weight:700;color:#fff;letter-spacing:.5px;
438
  }
439
  .example-thumb-placeholder{
440
+ width:100%;height:100%;display:flex;align-items:center;justify-content:center;background:#18181b;color:#3f3f46;font-size:11px;
 
441
  }
442
  .example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
443
  .example-badge{
444
+ display:inline-flex;padding:2px 7px;background:rgba(255,20,147,.12);border-radius:4px;font-size:10px;font-weight:600;color:#ff7ac7;
445
+ font-family:'JetBrains Mono',monospace;white-space:nowrap;
446
  }
447
  .example-prompt-text{
448
+ padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden;
 
449
  }
450
 
451
  .panel-card{border-bottom:1px solid #27272a}
452
  .panel-card-title{
453
+ padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
 
454
  }
455
  .panel-card-body{padding:16px 20px;display:flex;flex-direction:column;gap:8px}
456
  .modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
457
  .modern-textarea{
458
+ width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
 
459
  resize:none;outline:none;min-height:100px;transition:border-color .2s;
460
  }
461
  .modern-textarea:focus{border-color:#FF1493;box-shadow:0 0 0 3px rgba(255,20,147,.15)}
 
466
  @keyframes shake{0%,100%{transform:translateX(0)}20%,60%{transform:translateX(-4px)}40%,80%{transform:translateX(4px)}}
467
 
468
  .toast-notification{
469
+ position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);z-index:9999;padding:10px 24px;border-radius:10px;
470
+ font-family:'Inter',sans-serif;font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;box-shadow:0 8px 24px rgba(0,0,0,.5);
 
 
471
  transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
472
  }
473
  .toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
 
478
  .toast-notification .toast-text{line-height:1.3}
479
 
480
  .btn-run{
481
+ display:flex;align-items:center;justify-content:center;gap:8px;width:100%;background:linear-gradient(135deg,#FF1493,#D10073);border:none;border-radius:10px;
482
+ padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'Inter',sans-serif;color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
483
+ transition:all .2s ease;letter-spacing:-.2px;box-shadow:0 4px 16px rgba(255,20,147,.3),inset 0 1px 0 rgba(255,255,255,.1);
 
 
 
484
  }
485
  .btn-run:hover{
486
+ background:linear-gradient(135deg,#ff4db2,#FF1493);transform:translateY(-1px);box-shadow:0 6px 24px rgba(255,20,147,.45),inset 0 1px 0 rgba(255,255,255,.15);
 
487
  }
488
  .btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(255,20,147,.3)}
489
  #custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run,.btn-run *{
 
491
  }
492
 
493
  .output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}
494
+ .output-frame .out-title,.output-frame .out-title *,#output-title-label{
495
+ color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
 
 
 
496
  }
497
  .output-frame .out-title{
498
+ padding:10px 20px;font-size:13px;font-weight:700;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
 
499
  display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
500
  }
501
  .out-title-right{display:flex;gap:8px;align-items:center}
502
  .out-action-btn{
503
+ display:inline-flex;align-items:center;justify-content:center;background:rgba(255,20,147,.1);border:1px solid rgba(255,20,147,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
 
504
  font-size:11px;font-weight:500;color:#ff7ac7!important;gap:4px;height:24px;transition:all .15s;
505
  }
506
  .out-action-btn:hover{background:rgba(255,20,147,.2);border-color:rgba(255,20,147,.35);color:#ffffff!important}
507
  .out-action-btn svg{width:12px;height:12px;fill:#ff7ac7}
508
  .output-frame .out-body{
509
+ flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;overflow:hidden;min-height:320px;position:relative;
 
 
 
 
510
  }
511
+ .output-scroll-wrap{width:100%;height:100%;padding:0;overflow:hidden}
512
  .output-textarea{
513
+ width:100%;height:320px;min-height:320px;max-height:320px;background:#09090b;color:#e4e4e7;border:none;outline:none;padding:16px 18px;font-size:13px;line-height:1.6;
 
514
  font-family:'JetBrains Mono',monospace;overflow:auto;resize:none;white-space:pre-wrap;
515
  }
516
  .output-textarea::placeholder{color:#52525b}
517
+ .output-textarea.error-flash{box-shadow:inset 0 0 0 2px rgba(239,68,68,.6)}
 
 
518
  .modern-loader{
519
+ display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
 
520
  }
521
  .modern-loader.active{display:flex}
522
  .modern-loader .loader-spinner{
523
+ width:36px;height:36px;border:3px solid #27272a;border-top-color:#FF1493;border-radius:50%;animation:spin .8s linear infinite;
 
524
  }
525
  @keyframes spin{to{transform:rotate(360deg)}}
526
  .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
527
  .loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
528
  .loader-bar-fill{
529
+ height:100%;background:linear-gradient(90deg,#FF1493,#FF69C8,#FF1493);background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
 
530
  }
531
  @keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
532
 
533
  .settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}
534
  .settings-group-title{
535
+ font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5);
 
536
  }
537
  .settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
538
  .slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
539
  .slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}
540
  .slider-row input[type="range"]{
541
+ flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;border-radius:3px;outline:none;min-width:0;
 
542
  }
543
  .slider-row input[type="range"]::-webkit-slider-thumb{
544
+ -webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#FF1493,#D10073);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(255,20,147,.4);transition:transform .15s;
 
545
  }
546
  .slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
547
  .slider-row input[type="range"]::-moz-range-thumb{
548
+ width:16px;height:16px;background:linear-gradient(135deg,#FF1493,#D10073);border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(255,20,147,.4);
 
549
  }
550
  .slider-row .slider-val{
551
+ min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa;flex-shrink:0;
 
 
552
  }
553
 
554
  .app-statusbar{
555
+ background:#18181b;border-top:1px solid #27272a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center;font-size:12px;
 
556
  }
557
  .app-statusbar .sb-section{
558
+ padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap;
 
559
  }
560
  .app-statusbar .sb-section.sb-fixed{
561
+ flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;padding:3px 12px;background:rgba(255,20,147,.08);border-radius:6px;color:#ff7ac7;font-weight:500;
 
562
  }
563
 
564
  .exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
 
580
  gallery_js = r"""
581
  () => {
582
  function init() {
583
+ if (window.__outpostInitDone) return;
584
 
585
  const dropZone = document.getElementById('media-drop-zone');
586
  const uploadPrompt = document.getElementById('upload-prompt');
 
600
  return;
601
  }
602
 
603
+ window.__outpostInitDone = true;
604
  let mediaState = null;
605
  let toastTimer = null;
606
  let examplePoller = null;
 
627
  toast.classList.add('visible');
628
  toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
629
  }
 
630
 
631
  function showLoader() {
632
  const l = document.getElementById('output-loader');
 
640
  const sb = document.getElementById('sb-run-state');
641
  if (sb) sb.textContent = 'Done';
642
  }
643
+ function setRunErrorState() {
644
+ const l = document.getElementById('output-loader');
645
+ if (l) l.classList.remove('active');
646
+ const sb = document.getElementById('sb-run-state');
647
+ if (sb) sb.textContent = 'Error';
648
+ }
649
+
650
  window.__hideLoader = hideLoader;
651
+ window.__setRunErrorState = setRunErrorState;
652
+ window.__showToast = showToast;
653
 
654
  function flashPromptError() {
655
  promptInput.classList.add('error-flash');
 
672
 
673
  function setGradioValue(containerId, value) {
674
  const container = document.getElementById(containerId);
675
+ if (!container) return false;
676
+ const el = container.querySelector('textarea, input');
677
+ if (!el) return false;
678
+ const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
679
+ const ns = Object.getOwnPropertyDescriptor(proto, 'value');
680
+ if (ns && ns.set) {
681
+ ns.set.call(el, value);
682
+ el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
683
+ el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
684
+ return true;
685
+ }
686
+ return false;
687
  }
688
 
689
  function syncImageToGradio() {
690
  setGradioValue('hidden-image-b64', mediaState ? mediaState.b64 : '');
691
+ if (mediaStatus) mediaStatus.textContent = mediaState ? '1 image uploaded' : 'No image uploaded';
 
692
  }
693
 
694
  function syncPromptToGradio() {
 
709
  return;
710
  }
711
 
 
 
712
  previewWrap.style.display = 'flex';
713
  if (uploadPrompt) uploadPrompt.style.display = 'none';
714
+ previewImg.src = mediaState.preview || mediaState.b64;
715
+ previewImg.style.display = 'block';
716
  syncImageToGradio();
717
  }
718
 
719
+ function setPreviewFromFileReader(b64, name) {
720
+ mediaState = {b64, name: name || 'file', mode: 'image'};
721
  renderPreview();
722
  }
 
723
 
724
  function clearPreview() {
725
  mediaState = null;
 
734
  return;
735
  }
736
  const reader = new FileReader();
737
+ reader.onload = (e) => setPreviewFromFileReader(e.target.result, file.name);
738
  reader.readAsDataURL(file);
739
  }
740
 
 
770
  });
771
  syncModelToGradio(name);
772
  }
773
+
774
  window.__activateModelTab = activateModelTab;
775
 
776
  document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
 
807
 
808
  function validateBeforeRun() {
809
  const promptVal = promptInput.value.trim();
810
+ if (!promptVal) {
811
+ showToast('Please enter your instruction', 'warning');
812
  flashPromptError();
813
  return false;
814
  }
 
816
  showToast('Please upload an image', 'error');
817
  return false;
818
  }
 
 
 
 
 
819
  const currentModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
820
  if (!currentModel) {
821
  showToast('Please select a model', 'error');
 
834
  showLoader();
835
  setTimeout(() => {
836
  const gradioBtn = document.getElementById('gradio-run-btn');
837
+ if (!gradioBtn) {
838
+ setRunErrorState();
839
+ if (outputArea) outputArea.value = '[ERROR] Run button not found.';
840
+ showToast('Run button not found', 'error');
841
+ return;
842
+ }
843
  const btn = gradioBtn.querySelector('button');
844
  if (btn) btn.click(); else gradioBtn.click();
845
  }, 180);
 
891
  function applyExamplePayload(raw) {
892
  try {
893
  const data = JSON.parse(raw);
894
+ if (data.status !== 'ok') return;
895
+
896
+ if (data.model) activateModelTab(data.model);
897
+ if (data.query) {
898
+ promptInput.value = data.query;
899
+ syncPromptToGradio();
 
 
 
 
 
 
900
  }
901
+
902
+ mediaState = {
903
+ b64: data.media || '',
904
+ preview: data.media || '',
905
+ name: data.name || 'example_file',
906
+ mode: 'image'
907
+ };
908
+ renderPreview();
909
+
910
+ document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
911
+ showToast('Example loaded', 'info');
912
  } catch (e) {
913
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
 
914
  }
915
  }
916
 
 
927
  applyExamplePayload(current);
928
  return;
929
  }
930
+ if (attempts >= 100) {
931
  clearInterval(examplePoller);
932
  examplePoller = null;
933
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
934
  showToast('Example load timed out', 'error');
935
  }
936
+ }, 120);
937
+ }
938
+
939
+ function triggerExampleLoad(idx) {
940
+ const btnWrap = document.getElementById('example-load-btn');
941
+ const btn = btnWrap ? (btnWrap.querySelector('button') || btnWrap) : null;
942
+ if (!btn) return;
943
+
944
+ let attempts = 0;
945
+
946
+ function writeIdxAndClick() {
947
+ attempts += 1;
948
+
949
+ const ok1 = setGradioValue('example-idx-input', String(idx));
950
+ setGradioValue('example-result-data', '');
951
+ const currentVal = getValueFromContainer('example-idx-input');
952
+
953
+ if (ok1 && currentVal === String(idx)) {
954
+ btn.click();
955
+ startExamplePolling();
956
+ return;
957
+ }
958
+
959
+ if (attempts < 30) {
960
+ setTimeout(writeIdxAndClick, 100);
961
+ } else {
962
+ document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
963
+ showToast('Failed to initialize example loader', 'error');
964
+ }
965
+ }
966
+
967
+ writeIdxAndClick();
968
  }
969
 
970
  document.querySelectorAll('.example-card[data-idx]').forEach(card => {
971
  card.addEventListener('click', () => {
972
  const idx = card.getAttribute('data-idx');
973
+ if (idx === null || idx === undefined || idx === '') return;
974
  document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
975
  card.classList.add('loading');
976
  showToast('Loading example...', 'info');
977
+ triggerExampleLoad(idx);
 
 
 
 
 
 
 
 
 
 
 
978
  });
979
  });
980
 
 
982
  if (observerTarget) {
983
  const obs = new MutationObserver(() => {
984
  const current = getValueFromContainer('example-result-data');
985
+ if (!current || current === lastSeenExamplePayload) return;
986
+ lastSeenExamplePayload = current;
987
+ if (examplePoller) {
988
+ clearInterval(examplePoller);
989
+ examplePoller = null;
 
 
990
  }
991
+ applyExamplePayload(current);
992
  });
993
  obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
994
  }
 
1011
 
1012
  let lastText = '';
1013
 
1014
+ function isErrorText(val) {
1015
+ return typeof val === 'string' && val.trim().startsWith('[ERROR]');
1016
+ }
1017
+
1018
  function syncOutput() {
1019
  const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
1020
  if (!el) return;
 
1023
  lastText = val;
1024
  outArea.value = val;
1025
  outArea.scrollTop = outArea.scrollHeight;
1026
+
1027
+ if (val.trim()) {
1028
+ if (isErrorText(val)) {
1029
+ if (window.__setRunErrorState) window.__setRunErrorState();
1030
+ if (window.__showToast) window.__showToast('Inference failed', 'error');
1031
+ } else {
1032
+ if (window.__hideLoader) window.__hideLoader();
1033
+ }
1034
+ }
1035
  }
1036
  }
1037
 
 
1106
  <div id="upload-click-area" class="upload-click-area">
1107
  {UPLOAD_PREVIEW_SVG}
1108
  <span id="upload-main-text" class="upload-main-text">Click or drag an image here</span>
1109
+ <span id="upload-sub-text" class="upload-sub-text">Upload one image for OCR and multimodal reasoning</span>
1110
  </div>
1111
  </div>
1112
 
 
1124
  </div>
1125
 
1126
  <div class="hint-bar">
1127
+ <b>Mode:</b> Image inference only &nbsp;&middot;&nbsp;
1128
+ <b>Model:</b> Switch between OCR, captioning, and reasoning variants &nbsp;&middot;&nbsp;
1129
  <kbd>Clear</kbd> removes the current image
1130
  </div>
1131
 
 
1139
 
1140
  <div class="app-main-right">
1141
  <div class="panel-card">
1142
+ <div id="instruction-title" class="panel-card-title">Vision / OCR Instruction</div>
1143
  <div class="panel-card-body">
1144
+ <label id="query-label" class="modern-label" for="custom-query-input">Query Input</label>
1145
+ <textarea id="custom-query-input" class="modern-textarea" rows="4" placeholder="e.g., perform OCR, describe the image, analyze the scene, extract visible text..."></textarea>
1146
  </div>
1147
  </div>
1148
 
 
1211
  </div>
1212
 
1213
  <div class="exp-note">
1214
+ Experimental document vision workspace
1215
  </div>
1216
 
1217
  <div class="app-statusbar">
 
1227
  demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
1228
 
1229
  run_btn.click(
1230
+ fn=run_router,
1231
  inputs=[
1232
  hidden_model_name,
1233
  prompt,
 
1265
  )
1266
 
1267
  if __name__ == "__main__":
1268
+ demo.queue(max_size=50).launch(
1269
  css=css,
1270
  mcp_server=True,
1271
  ssr_mode=False,