Update index.html
Browse files- index.html +912 -18
index.html
CHANGED
|
@@ -1,19 +1,913 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</html>
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
+
<title>vLLM deployment advisor</title>
|
| 7 |
+
<link rel="preconnect" href="https://huggingface.co" />
|
| 8 |
+
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--bg: #0f1419;
|
| 11 |
+
--surface: #1a2332;
|
| 12 |
+
--surface2: #243044;
|
| 13 |
+
--border: #334155;
|
| 14 |
+
--text: #e2e8f0;
|
| 15 |
+
--muted: #94a3b8;
|
| 16 |
+
--accent: #38bdf8;
|
| 17 |
+
--accent2: #a78bfa;
|
| 18 |
+
--good: #34d399;
|
| 19 |
+
--warn: #fbbf24;
|
| 20 |
+
}
|
| 21 |
+
* { box-sizing: border-box; }
|
| 22 |
+
body {
|
| 23 |
+
margin: 0;
|
| 24 |
+
font-family: "Segoe UI", system-ui, sans-serif;
|
| 25 |
+
background: var(--bg);
|
| 26 |
+
color: var(--text);
|
| 27 |
+
line-height: 1.5;
|
| 28 |
+
min-height: 100vh;
|
| 29 |
+
}
|
| 30 |
+
.wrap {
|
| 31 |
+
max-width: 1100px;
|
| 32 |
+
margin: 0 auto;
|
| 33 |
+
padding: 1.5rem 1.25rem 3rem;
|
| 34 |
+
}
|
| 35 |
+
h1 {
|
| 36 |
+
font-size: 1.35rem;
|
| 37 |
+
font-weight: 600;
|
| 38 |
+
margin: 0 0 0.25rem;
|
| 39 |
+
letter-spacing: -0.02em;
|
| 40 |
+
}
|
| 41 |
+
.sub {
|
| 42 |
+
color: var(--muted);
|
| 43 |
+
font-size: 0.9rem;
|
| 44 |
+
margin-bottom: 1.5rem;
|
| 45 |
+
}
|
| 46 |
+
label {
|
| 47 |
+
display: block;
|
| 48 |
+
font-size: 0.8rem;
|
| 49 |
+
color: var(--muted);
|
| 50 |
+
margin-bottom: 0.35rem;
|
| 51 |
+
}
|
| 52 |
+
input[type="text"], input[type="number"], select {
|
| 53 |
+
width: 100%;
|
| 54 |
+
padding: 0.6rem 0.75rem;
|
| 55 |
+
border: 1px solid var(--border);
|
| 56 |
+
border-radius: 8px;
|
| 57 |
+
background: var(--surface);
|
| 58 |
+
color: var(--text);
|
| 59 |
+
font-size: 0.95rem;
|
| 60 |
+
}
|
| 61 |
+
input:focus, select:focus {
|
| 62 |
+
outline: 2px solid var(--accent);
|
| 63 |
+
outline-offset: 1px;
|
| 64 |
+
}
|
| 65 |
+
.row {
|
| 66 |
+
display: grid;
|
| 67 |
+
gap: 1rem;
|
| 68 |
+
margin-bottom: 1rem;
|
| 69 |
+
}
|
| 70 |
+
@media (min-width: 640px) {
|
| 71 |
+
.row.cols-2 { grid-template-columns: 1fr 1fr; }
|
| 72 |
+
.row.cols-3 { grid-template-columns: repeat(3, 1fr); }
|
| 73 |
+
}
|
| 74 |
+
button.primary {
|
| 75 |
+
padding: 0.65rem 1.25rem;
|
| 76 |
+
background: linear-gradient(135deg, #0ea5e9, #6366f1);
|
| 77 |
+
color: #fff;
|
| 78 |
+
border: none;
|
| 79 |
+
border-radius: 8px;
|
| 80 |
+
font-weight: 600;
|
| 81 |
+
cursor: pointer;
|
| 82 |
+
font-size: 0.95rem;
|
| 83 |
+
}
|
| 84 |
+
button.primary:hover { filter: brightness(1.08); }
|
| 85 |
+
button.primary:disabled { opacity: 0.5; cursor: not-allowed; }
|
| 86 |
+
.card {
|
| 87 |
+
background: var(--surface);
|
| 88 |
+
border: 1px solid var(--border);
|
| 89 |
+
border-radius: 12px;
|
| 90 |
+
padding: 1.1rem 1.25rem;
|
| 91 |
+
margin-top: 1rem;
|
| 92 |
+
}
|
| 93 |
+
.card h2 {
|
| 94 |
+
font-size: 1rem;
|
| 95 |
+
margin: 0 0 0.75rem;
|
| 96 |
+
color: var(--accent);
|
| 97 |
+
}
|
| 98 |
+
.err { color: #f87171; font-size: 0.9rem; margin-top: 0.5rem; }
|
| 99 |
+
.ok { color: var(--good); font-size: 0.9rem; }
|
| 100 |
+
table {
|
| 101 |
+
width: 100%;
|
| 102 |
+
border-collapse: collapse;
|
| 103 |
+
font-size: 0.85rem;
|
| 104 |
+
}
|
| 105 |
+
th, td {
|
| 106 |
+
text-align: left;
|
| 107 |
+
padding: 0.45rem 0.5rem;
|
| 108 |
+
border-bottom: 1px solid var(--border);
|
| 109 |
+
}
|
| 110 |
+
th { color: var(--muted); font-weight: 500; }
|
| 111 |
+
.gpu-grid {
|
| 112 |
+
display: grid;
|
| 113 |
+
gap: 0.65rem;
|
| 114 |
+
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
| 115 |
+
}
|
| 116 |
+
.gpu-card {
|
| 117 |
+
background: var(--surface2);
|
| 118 |
+
border: 1px solid var(--border);
|
| 119 |
+
border-radius: 10px;
|
| 120 |
+
padding: 0.75rem 0.9rem;
|
| 121 |
+
cursor: pointer;
|
| 122 |
+
transition: border-color 0.15s, box-shadow 0.15s;
|
| 123 |
+
}
|
| 124 |
+
.gpu-card:hover, .gpu-card.selected {
|
| 125 |
+
border-color: var(--accent);
|
| 126 |
+
box-shadow: 0 0 0 1px var(--accent);
|
| 127 |
+
}
|
| 128 |
+
.gpu-card .name { font-weight: 600; font-size: 0.9rem; }
|
| 129 |
+
.gpu-card .vram { color: var(--muted); font-size: 0.8rem; }
|
| 130 |
+
.gpu-detail {
|
| 131 |
+
margin-top: 1rem;
|
| 132 |
+
padding: 1rem;
|
| 133 |
+
background: var(--bg);
|
| 134 |
+
border-radius: 8px;
|
| 135 |
+
border: 1px solid var(--border);
|
| 136 |
+
font-size: 0.88rem;
|
| 137 |
+
}
|
| 138 |
+
.gpu-detail dl {
|
| 139 |
+
display: grid;
|
| 140 |
+
grid-template-columns: auto 1fr;
|
| 141 |
+
gap: 0.35rem 1rem;
|
| 142 |
+
margin: 0;
|
| 143 |
+
}
|
| 144 |
+
.gpu-detail dt { color: var(--muted); }
|
| 145 |
+
.gpu-detail dd { margin: 0; }
|
| 146 |
+
pre.cmd {
|
| 147 |
+
background: #0c1220;
|
| 148 |
+
border: 1px solid var(--border);
|
| 149 |
+
border-radius: 8px;
|
| 150 |
+
padding: 1rem;
|
| 151 |
+
overflow-x: auto;
|
| 152 |
+
font-size: 0.78rem;
|
| 153 |
+
line-height: 1.45;
|
| 154 |
+
white-space: pre-wrap;
|
| 155 |
+
word-break: break-all;
|
| 156 |
+
}
|
| 157 |
+
.badge {
|
| 158 |
+
display: inline-block;
|
| 159 |
+
padding: 0.15rem 0.45rem;
|
| 160 |
+
border-radius: 4px;
|
| 161 |
+
font-size: 0.75rem;
|
| 162 |
+
background: var(--surface2);
|
| 163 |
+
color: var(--muted);
|
| 164 |
+
}
|
| 165 |
+
.hint { font-size: 0.8rem; color: var(--muted); margin-top: 0.75rem; }
|
| 166 |
+
.spinner { display: inline-block; width: 1rem; height: 1rem; border: 2px solid var(--border); border-top-color: var(--accent); border-radius: 50%; animation: spin 0.7s linear infinite; vertical-align: middle; margin-right: 0.35rem; }
|
| 167 |
+
@keyframes spin { to { transform: rotate(360deg); } }
|
| 168 |
+
.model-row {
|
| 169 |
+
display: grid;
|
| 170 |
+
grid-template-columns: 1fr auto;
|
| 171 |
+
gap: 0.5rem;
|
| 172 |
+
align-items: end;
|
| 173 |
+
margin-bottom: 0.65rem;
|
| 174 |
+
}
|
| 175 |
+
.model-row .model-id-input { margin: 0; }
|
| 176 |
+
button.btn-ghost {
|
| 177 |
+
padding: 0.55rem 0.85rem;
|
| 178 |
+
background: var(--surface2);
|
| 179 |
+
color: var(--text);
|
| 180 |
+
border: 1px solid var(--border);
|
| 181 |
+
border-radius: 8px;
|
| 182 |
+
cursor: pointer;
|
| 183 |
+
font-size: 0.85rem;
|
| 184 |
+
}
|
| 185 |
+
button.btn-ghost:hover { border-color: var(--accent); }
|
| 186 |
+
button.btn-ghost.danger:hover { border-color: #f87171; color: #f87171; }
|
| 187 |
+
.gpu-card.preferred { border-color: var(--accent2); box-shadow: 0 0 0 1px var(--accent2); }
|
| 188 |
+
.gpu-card.commands-target { outline: 1px dashed var(--good); outline-offset: 2px; }
|
| 189 |
+
details.model-block { margin-bottom: 1rem; border: 1px solid var(--border); border-radius: 8px; padding: 0.5rem 0.75rem; background: var(--bg); }
|
| 190 |
+
details.model-block summary { cursor: pointer; font-weight: 600; color: var(--accent); }
|
| 191 |
+
</style>
|
| 192 |
+
</head>
|
| 193 |
+
<body>
|
| 194 |
+
<div class="wrap">
|
| 195 |
+
<h1>vLLM deployment advisor</h1>
|
| 196 |
+
<p class="sub">Pulls weight sizes from Hugging Face, estimates KV memory, and suggests tensor parallelism and <code style="color:var(--accent2)">vllm serve</code> commands. Add several models to estimate total GPUs on your preferred GPU type (separate vLLM instances). Estimates are heuristic — validate on your hardware.</p>
|
| 197 |
+
|
| 198 |
+
<div class="card" style="margin-top:0">
|
| 199 |
+
<label>Hugging Face models (one per serving endpoint)</label>
|
| 200 |
+
<p class="hint" style="margin-top:0">Each model is a separate <code>vllm serve</code> process. Planning assumes tensor-parallel groups do not share GPUs with another model unless you colocate manually.</p>
|
| 201 |
+
<div id="modelListContainer"></div>
|
| 202 |
+
<button type="button" class="btn-ghost" id="btnAddModel" style="margin-bottom:1rem">+ Add model</button>
|
| 203 |
+
<div class="row cols-2">
|
| 204 |
+
<div>
|
| 205 |
+
<label for="hfToken">HF token (optional, for gated/private)</label>
|
| 206 |
+
<input type="text" id="hfToken" placeholder="hf_..." autocomplete="off" />
|
| 207 |
+
</div>
|
| 208 |
+
<div>
|
| 209 |
+
<label for="preferredGpu">Preferred GPU (for TP & totals)</label>
|
| 210 |
+
<select id="preferredGpu"></select>
|
| 211 |
+
</div>
|
| 212 |
+
</div>
|
| 213 |
+
<div class="row cols-3">
|
| 214 |
+
<div>
|
| 215 |
+
<label for="weightDtype">Weight memory (dtype)</label>
|
| 216 |
+
<select id="weightDtype">
|
| 217 |
+
<option value="bf16" selected>BF16 / FP16 (2 bytes/param)</option>
|
| 218 |
+
<option value="fp8">FP8 weights (~1 byte/param, if supported)</option>
|
| 219 |
+
</select>
|
| 220 |
+
</div>
|
| 221 |
+
<div>
|
| 222 |
+
<label for="kvDtype">KV cache dtype</label>
|
| 223 |
+
<select id="kvDtype">
|
| 224 |
+
<option value="auto">auto</option>
|
| 225 |
+
<option value="fp8" selected>fp8 (half KV vs fp16)</option>
|
| 226 |
+
<option value="fp16">fp16</option>
|
| 227 |
+
</select>
|
| 228 |
+
</div>
|
| 229 |
+
<div>
|
| 230 |
+
<label for="maxModelLen">Max model length (tokens)</label>
|
| 231 |
+
<input type="number" id="maxModelLen" value="8192" min="256" step="256" />
|
| 232 |
+
</div>
|
| 233 |
+
</div>
|
| 234 |
+
<div class="row cols-3">
|
| 235 |
+
<div>
|
| 236 |
+
<label for="gpuUtil">Target GPU memory utilization</label>
|
| 237 |
+
<input type="number" id="gpuUtil" value="0.90" min="0.5" max="0.98" step="0.01" />
|
| 238 |
+
</div>
|
| 239 |
+
<div>
|
| 240 |
+
<label for="batchHint">Concurrent sequences per model (KV hint)</label>
|
| 241 |
+
<input type="number" id="batchHint" value="8" min="1" max="512" step="1" />
|
| 242 |
+
</div>
|
| 243 |
+
<div style="display:flex;align-items:flex-end">
|
| 244 |
+
<button type="button" class="primary" id="btnFetch" style="width:100%">Fetch all & compute</button>
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
<div id="fetchError" class="err" hidden></div>
|
| 248 |
+
</div>
|
| 249 |
+
|
| 250 |
+
<div id="results" hidden>
|
| 251 |
+
<div class="card">
|
| 252 |
+
<h2>Multi-model deployment (preferred GPU)</h2>
|
| 253 |
+
<div id="multiDeployment"></div>
|
| 254 |
+
</div>
|
| 255 |
+
|
| 256 |
+
<div class="card">
|
| 257 |
+
<h2>Models & shards (from Hub)</h2>
|
| 258 |
+
<div id="modelSummary"></div>
|
| 259 |
+
</div>
|
| 260 |
+
|
| 261 |
+
<div class="card">
|
| 262 |
+
<h2>Memory breakdown</h2>
|
| 263 |
+
<div id="memBreakdown"></div>
|
| 264 |
+
</div>
|
| 265 |
+
|
| 266 |
+
<div class="card">
|
| 267 |
+
<h2>GPU catalog</h2>
|
| 268 |
+
<p class="sub" style="margin:0 0 0.75rem">Click a GPU for full specs. Your <strong>preferred</strong> choice is highlighted for multi-model totals above.</p>
|
| 269 |
+
<div id="gpuGrid" class="gpu-grid"></div>
|
| 270 |
+
<div id="gpuDetailPanel" hidden></div>
|
| 271 |
+
</div>
|
| 272 |
+
|
| 273 |
+
<div class="card">
|
| 274 |
+
<h2>vLLM commands</h2>
|
| 275 |
+
<p id="commandGpuHint" class="hint" style="margin-top:0"></p>
|
| 276 |
+
<pre class="cmd" id="vllmCmd"></pre>
|
| 277 |
+
<p class="hint">Use a different <code>--port</code> per model when running on the same host. Adjust <code>--tensor-parallel-size</code> if your cluster differs. See <a href="https://docs.vllm.ai" style="color:var(--accent)" target="_blank" rel="noopener">vLLM docs</a>.</p>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
</div>
|
| 281 |
+
|
| 282 |
+
<script>
|
| 283 |
+
const HF_API = "https://huggingface.co/api";
|
| 284 |
+
|
| 285 |
+
/** Hugging Face repo ids are `org/name`; encoding the whole string turns `/` into `%2F` and breaks `/api/models/...` (400). Encode each path segment only. */
|
| 286 |
+
function hfRepoPath(repoId) {
|
| 287 |
+
return repoId
|
| 288 |
+
.trim()
|
| 289 |
+
.split("/")
|
| 290 |
+
.filter(Boolean)
|
| 291 |
+
.map(encodeURIComponent)
|
| 292 |
+
.join("/");
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
/** Accept pasted browser URLs, e.g. https://huggingface.co/Qwen/Qwen3-30B-A3B → Qwen/Qwen3-30B-A3B */
|
| 296 |
+
function normalizeHfModelInput(raw) {
|
| 297 |
+
const s = String(raw).trim();
|
| 298 |
+
if (!s) return s;
|
| 299 |
+
if (!/^https?:\/\//i.test(s)) return s;
|
| 300 |
+
try {
|
| 301 |
+
const u = new URL(s);
|
| 302 |
+
const h = u.hostname.replace(/^www\./i, "").toLowerCase();
|
| 303 |
+
if (h !== "huggingface.co" && h !== "hf.co") return s;
|
| 304 |
+
const parts = u.pathname.split("/").filter(Boolean);
|
| 305 |
+
if (parts[0] === "datasets" || parts[0] === "spaces") return s;
|
| 306 |
+
if (parts.length >= 2) {
|
| 307 |
+
return `${decodeURIComponent(parts[0])}/${decodeURIComponent(parts[1])}`;
|
| 308 |
+
}
|
| 309 |
+
} catch {
|
| 310 |
+
/* ignore */
|
| 311 |
+
}
|
| 312 |
+
return s;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
const GPU_CATALOG = [
|
| 316 |
+
{ id: "h100-sxm", name: "NVIDIA H100 SXM", vramGb: 80, memBandwidthGbps: 3350, tdpW: 700, fp16Tflops: 989, pcie: "PCIe 5.0 x16", notes: "Datacenter flagship; best for large TP." },
|
| 317 |
+
{ id: "h100-pcie", name: "NVIDIA H100 PCIe", vramGb: 80, memBandwidthGbps: 2000, tdpW: 350, fp16Tflops: 756, pcie: "PCIe 5.0 x16", notes: "Slightly lower BW than SXM." },
|
| 318 |
+
{ id: "h200", name: "NVIDIA H200", vramGb: 141, memBandwidthGbps: 4800, tdpW: 700, fp16Tflops: 989, pcie: "PCIe 5.0 x16", notes: "More HBM than H100." },
|
| 319 |
+
{ id: "b200", name: "NVIDIA B200", vramGb: 192, memBandwidthGbps: 8000, tdpW: 1000, fp16Tflops: 2250, pcie: "NVLink / rack", notes: "Blackwell; approximate specs." },
|
| 320 |
+
{ id: "a100-80", name: "NVIDIA A100 80GB", vramGb: 80, memBandwidthGbps: 2039, tdpW: 400, fp16Tflops: 312, pcie: "PCIe 4.0", notes: "Common in clouds." },
|
| 321 |
+
{ id: "a100-40", name: "NVIDIA A100 40GB", vramGb: 40, memBandwidthGbps: 1555, tdpW: 400, fp16Tflops: 312, pcie: "PCIe 4.0", notes: "" },
|
| 322 |
+
{ id: "l40s", name: "NVIDIA L40S", vramGb: 48, memBandwidthGbps: 864, tdpW: 350, fp16Tflops: 362, pcie: "PCIe 4.0 x16", notes: "Inference-oriented Ada." },
|
| 323 |
+
{ id: "l40", name: "NVIDIA L40", vramGb: 48, memBandwidthGbps: 864, tdpW: 300, fp16Tflops: 181, pcie: "PCIe 4.0 x16", notes: "Legacy Ada datacenter; predecessor to L40S." },
|
| 324 |
+
{ id: "a30", name: "NVIDIA A30", vramGb: 24, memBandwidthGbps: 933, tdpW: 165, fp16Tflops: 165, pcie: "PCIe 4.0 x16", notes: "Legacy Ampere; compact inference." },
|
| 325 |
+
{ id: "a10", name: "NVIDIA A10", vramGb: 24, memBandwidthGbps: 600, tdpW: 150, fp16Tflops: 125, pcie: "PCIe 4.0 x16", notes: "Legacy Ampere single-slot cloud GPU." },
|
| 326 |
+
{ id: "a10g", name: "NVIDIA A10G", vramGb: 24, memBandwidthGbps: 600, tdpW: 300, fp16Tflops: 125, pcie: "PCIe 4.0 x16", notes: "A10-class (e.g. AWS G5); ref. specs." },
|
| 327 |
+
{ id: "l4", name: "NVIDIA L4", vramGb: 24, memBandwidthGbps: 300, tdpW: 72, fp16Tflops: 120, pcie: "PCIe 4.0 x16", notes: "Legacy Ada low-power inference." },
|
| 328 |
+
{ id: "t4", name: "NVIDIA T4", vramGb: 16, memBandwidthGbps: 320, tdpW: 70, fp16Tflops: 65, pcie: "PCIe 3.0 x16", notes: "Legacy Turing inference." },
|
| 329 |
+
{ id: "v100-32", name: "NVIDIA V100 32GB", vramGb: 32, memBandwidthGbps: 1134, tdpW: 300, fp16Tflops: 125, pcie: "PCIe 3.0 / SXM2", notes: "Legacy Volta; still common in older clusters." },
|
| 330 |
+
{ id: "v100-16", name: "NVIDIA V100 16GB", vramGb: 16, memBandwidthGbps: 900, tdpW: 250, fp16Tflops: 125, pcie: "PCIe 3.0 / SXM2", notes: "Legacy Volta 16 GB SKU." },
|
| 331 |
+
{ id: "p100-16", name: "NVIDIA P100 16GB", vramGb: 16, memBandwidthGbps: 732, tdpW: 250, fp16Tflops: 19, pcie: "PCIe 3.0", notes: "Legacy Pascal; very dated for LLMs." },
|
| 332 |
+
{ id: "a6000", name: "NVIDIA RTX A6000", vramGb: 48, memBandwidthGbps: 768, tdpW: 300, fp16Tflops: 155, pcie: "PCIe 4.0 x16", notes: "Workstation." },
|
| 333 |
+
{ id: "3090", name: "NVIDIA GeForce RTX 3090", vramGb: 24, memBandwidthGbps: 936, tdpW: 350, fp16Tflops: 160, pcie: "PCIe 4.0 x16", notes: "Legacy Ampere consumer; 24 GB." },
|
| 334 |
+
{ id: "4090", name: "NVIDIA GeForce RTX 4090", vramGb: 24, memBandwidthGbps: 1008, tdpW: 450, fp16Tflops: 330, pcie: "PCIe 4.0 x16", notes: "High BW consumer card." },
|
| 335 |
+
{ id: "4080", name: "NVIDIA GeForce RTX 4080", vramGb: 16, memBandwidthGbps: 717, tdpW: 320, fp16Tflops: 195, pcie: "PCIe 4.0 x16", notes: "" },
|
| 336 |
+
{ id: "5090", name: "NVIDIA GeForce RTX 5090", vramGb: 32, memBandwidthGbps: 1792, tdpW: 575, fp16Tflops: 420, pcie: "PCIe 5.0 x16", notes: "Approximate consumer flagship." },
|
| 337 |
+
{ id: "mi300x", name: "AMD MI300X", vramGb: 192, memBandwidthGbps: 5300, tdpW: 750, fp16Tflops: 1300, pcie: "OAM", notes: "Approximate; check ROCm/vLLM support." },
|
| 338 |
+
];
|
| 339 |
+
|
| 340 |
+
function authHeaders() {
|
| 341 |
+
const t = document.getElementById("hfToken").value.trim();
|
| 342 |
+
return t ? { Authorization: `Bearer ${t}` } : {};
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
async function hfFetch(url) {
|
| 346 |
+
const r = await fetch(url, { headers: { ...authHeaders() } });
|
| 347 |
+
if (!r.ok) throw new Error(`${r.status} ${r.statusText} — ${url}`);
|
| 348 |
+
return r;
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
async function hfJson(url) {
|
| 352 |
+
const r = await hfFetch(url);
|
| 353 |
+
return r.json();
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
async function hfText(url) {
|
| 357 |
+
const r = await hfFetch(url);
|
| 358 |
+
return r.text();
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
/** Sum sizes of weight files from tree API */
|
| 362 |
+
function analyzeTreeFiles(tree) {
|
| 363 |
+
const ignore = ["training_args", "optimizer", "scheduler", "tf_model", "flax_model", "rust_model"];
|
| 364 |
+
const files = tree.filter((f) => {
|
| 365 |
+
if ((f.type !== "blob" && f.type !== "file") || typeof f.size !== "number") return false;
|
| 366 |
+
const p = f.path.toLowerCase();
|
| 367 |
+
if (ignore.some((k) => p.includes(k))) return false;
|
| 368 |
+
if (p.endsWith(".safetensors")) return true;
|
| 369 |
+
if (p.endsWith(".bin")) {
|
| 370 |
+
return (
|
| 371 |
+
p.endsWith("pytorch_model.bin") ||
|
| 372 |
+
/model-\d+-of-\d+\.bin$/.test(p) ||
|
| 373 |
+
p.includes("pytorch_model-")
|
| 374 |
+
);
|
| 375 |
+
}
|
| 376 |
+
return false;
|
| 377 |
+
});
|
| 378 |
+
const totalBytes = files.reduce((s, f) => s + f.size, 0);
|
| 379 |
+
const byShard = files.map((f) => ({ path: f.path, sizeBytes: f.size, sizeGb: f.size / 1e9 }));
|
| 380 |
+
byShard.sort((a, b) => b.sizeBytes - a.sizeBytes);
|
| 381 |
+
const maxShard = byShard.length ? byShard[0].sizeBytes : 0;
|
| 382 |
+
return { files: byShard, totalBytes, maxShardBytes: maxShard };
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
function parseConfigJson(text) {
|
| 386 |
+
try {
|
| 387 |
+
return JSON.parse(text);
|
| 388 |
+
} catch {
|
| 389 |
+
return null;
|
| 390 |
+
}
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
/** Rough param count from Llama-like config */
|
| 394 |
+
function estimateParamsFromConfig(cfg) {
|
| 395 |
+
if (!cfg) return null;
|
| 396 |
+
if (typeof cfg.num_parameters === "number") return cfg.num_parameters;
|
| 397 |
+
const h = cfg.hidden_size;
|
| 398 |
+
const L = cfg.num_hidden_layers;
|
| 399 |
+
const V = cfg.vocab_size;
|
| 400 |
+
const I = cfg.intermediate_size;
|
| 401 |
+
const nHead = cfg.num_attention_heads;
|
| 402 |
+
const nKV = cfg.num_key_value_heads ?? nHead;
|
| 403 |
+
if (!h || !L || !V || !I || !nHead) return null;
|
| 404 |
+
const headDim = h / nHead;
|
| 405 |
+
const embed = V * h;
|
| 406 |
+
const attnPerLayer = 2 * (h * h) + 2 * (nKV * headDim * h);
|
| 407 |
+
const mlpPerLayer = 3 * h * I;
|
| 408 |
+
const ln = 2 * h * L * 2;
|
| 409 |
+
const out = h * V;
|
| 410 |
+
return embed + L * (attnPerLayer + mlpPerLayer) + ln + out;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
/**
|
| 414 |
+
* KV bytes per token per layer: K and V each num_kv_heads * head_dim.
|
| 415 |
+
* Per token: 2 (K+V) * num_kv_heads * head_dim * bytes
|
| 416 |
+
*/
|
| 417 |
+
function kvBytesPerToken(cfg, kvBytesPerEl) {
|
| 418 |
+
if (!cfg) return 0;
|
| 419 |
+
const h = cfg.hidden_size;
|
| 420 |
+
const L = cfg.num_hidden_layers;
|
| 421 |
+
const nHead = cfg.num_attention_heads;
|
| 422 |
+
const nKV = cfg.num_key_value_heads ?? nHead;
|
| 423 |
+
if (!h || !L || !nHead) return 0;
|
| 424 |
+
const headDim = h / nHead;
|
| 425 |
+
return L * 2 * nKV * headDim * kvBytesPerEl;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
function bytesPerParamWeight(dtype) {
|
| 429 |
+
return dtype === "fp8" ? 1 : 2;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
function usableVramGb(vramGb, util) {
|
| 433 |
+
return vramGb * util;
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
/**
|
| 437 |
+
* With tensor parallelism, weights and standard attention KV are split across TP ranks:
|
| 438 |
+
* per-GPU ≈ (weightGb + kvTotalGb) / tp. Need tp ≥ ceil((weight + KV) / usable).
|
| 439 |
+
* Largest on-disk shard is shown separately (load-time peak can differ by loader).
|
| 440 |
+
*/
|
| 441 |
+
function minTpForWeightsAndKv(totalWeightGb, kvTotalGb, usablePerGpuGb) {
|
| 442 |
+
if (usablePerGpuGb <= 0) return Infinity;
|
| 443 |
+
const combined = totalWeightGb + kvTotalGb;
|
| 444 |
+
return Math.max(1, Math.ceil(combined / usablePerGpuGb));
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
function minTpForLargestShard(maxShardGb, usablePerGpuGb) {
|
| 448 |
+
if (!maxShardGb || maxShardGb <= 0) return 1;
|
| 449 |
+
if (usablePerGpuGb <= 0) return Infinity;
|
| 450 |
+
return Math.max(1, Math.ceil(maxShardGb / usablePerGpuGb));
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
function renderGpuDetail(gpu) {
|
| 454 |
+
const el = document.getElementById("gpuDetailPanel");
|
| 455 |
+
el.hidden = false;
|
| 456 |
+
el.innerHTML = `
|
| 457 |
+
<div class="gpu-detail">
|
| 458 |
+
<strong style="color:var(--accent)">${gpu.name}</strong>
|
| 459 |
+
<dl style="margin-top:0.75rem">
|
| 460 |
+
<dt>VRAM</dt><dd>${gpu.vramGb} GB</dd>
|
| 461 |
+
<dt>Memory bandwidth (ref.)</dt><dd>~${gpu.memBandwidthGbps} GB/s</dd>
|
| 462 |
+
<dt>FP16 TFLOPS (ref.)</dt><dd>~${gpu.fp16Tflops}</dd>
|
| 463 |
+
<dt>TDP (ref.)</dt><dd>${gpu.tdpW} W</dd>
|
| 464 |
+
<dt>PCIe</dt><dd>${gpu.pcie}</dd>
|
| 465 |
+
<dt>Notes</dt><dd>${gpu.notes || "—"}</dd>
|
| 466 |
+
</dl>
|
| 467 |
+
<p class="hint" style="margin-bottom:0">Published specs vary by SKU and firmware; use vendor datasheets for procurement.</p>
|
| 468 |
+
</div>
|
| 469 |
+
`;
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
let selectedGpuId = null;
|
| 473 |
+
/** @type {{ models: object[] } | null} */
|
| 474 |
+
let lastFetchCtx = null;
|
| 475 |
+
let rowIdSeq = 0;
|
| 476 |
+
|
| 477 |
+
function populatePreferredGpuSelect() {
|
| 478 |
+
const sel = document.getElementById("preferredGpu");
|
| 479 |
+
if (!sel || sel.options.length) return;
|
| 480 |
+
GPU_CATALOG.forEach((g) => {
|
| 481 |
+
const o = document.createElement("option");
|
| 482 |
+
o.value = g.id;
|
| 483 |
+
o.textContent = `${g.name} (${g.vramGb} GB)`;
|
| 484 |
+
sel.appendChild(o);
|
| 485 |
+
});
|
| 486 |
+
sel.value = "h100-sxm";
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
function getModelIdsFromInputs() {
|
| 490 |
+
return Array.from(document.querySelectorAll(".model-id-input"))
|
| 491 |
+
.map((el) => normalizeHfModelInput(el.value.trim()))
|
| 492 |
+
.filter(Boolean);
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
function syncInputValuesFromNormalized() {
|
| 496 |
+
const inputs = document.querySelectorAll(".model-id-input");
|
| 497 |
+
inputs.forEach((el) => {
|
| 498 |
+
const n = normalizeHfModelInput(el.value.trim());
|
| 499 |
+
if (n && n !== el.value.trim()) el.value = n;
|
| 500 |
+
});
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
function addModelRow(initial = "") {
|
| 504 |
+
const container = document.getElementById("modelListContainer");
|
| 505 |
+
const id = `mr-${++rowIdSeq}`;
|
| 506 |
+
const wrap = document.createElement("div");
|
| 507 |
+
wrap.className = "model-row";
|
| 508 |
+
wrap.dataset.rowId = id;
|
| 509 |
+
wrap.innerHTML = `
|
| 510 |
+
<div>
|
| 511 |
+
<label class="model-row-label" style="font-size:0.8rem;color:var(--muted)">Model id or URL</label>
|
| 512 |
+
<input type="text" class="model-id-input" placeholder="org/model or https://huggingface.co/…" autocomplete="off" />
|
| 513 |
+
</div>
|
| 514 |
+
<button type="button" class="btn-ghost danger btn-remove-model" title="Remove">Remove</button>`;
|
| 515 |
+
wrap.querySelector(".model-id-input").value = initial;
|
| 516 |
+
container.appendChild(wrap);
|
| 517 |
+
wrap.querySelector(".btn-remove-model").addEventListener("click", () => {
|
| 518 |
+
if (document.querySelectorAll(".model-row").length <= 1) return;
|
| 519 |
+
wrap.remove();
|
| 520 |
+
});
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
async function fetchOneModel(modelId) {
|
| 524 |
+
const meta = await hfJson(`${HF_API}/models/${hfRepoPath(modelId)}`);
|
| 525 |
+
const ref = meta.sha || "main";
|
| 526 |
+
const treeUrl = `${HF_API}/models/${hfRepoPath(modelId)}/tree/${encodeURIComponent(ref)}?recursive=true`;
|
| 527 |
+
const tree = await hfJson(treeUrl);
|
| 528 |
+
const analysis = analyzeTreeFiles(Array.isArray(tree) ? tree : []);
|
| 529 |
+
|
| 530 |
+
let config = null;
|
| 531 |
+
try {
|
| 532 |
+
const cfgUrl = `https://huggingface.co/${hfRepoPath(modelId)}/resolve/${encodeURIComponent(ref)}/config.json`;
|
| 533 |
+
const cfgText = await hfText(cfgUrl);
|
| 534 |
+
config = parseConfigJson(cfgText);
|
| 535 |
+
} catch {
|
| 536 |
+
config = null;
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
let indexMeta = null;
|
| 540 |
+
try {
|
| 541 |
+
const idxCandidates = [
|
| 542 |
+
`https://huggingface.co/${hfRepoPath(modelId)}/resolve/${encodeURIComponent(ref)}/model.safetensors.index.json`,
|
| 543 |
+
`https://huggingface.co/${hfRepoPath(modelId)}/resolve/${encodeURIComponent(ref)}/pytorch_model.bin.index.json`,
|
| 544 |
+
];
|
| 545 |
+
for (const u of idxCandidates) {
|
| 546 |
+
try {
|
| 547 |
+
const j = await hfJson(u);
|
| 548 |
+
if (j.metadata && j.metadata.total_size != null) {
|
| 549 |
+
indexMeta = j.metadata;
|
| 550 |
+
break;
|
| 551 |
+
}
|
| 552 |
+
} catch { /* try next */ }
|
| 553 |
+
}
|
| 554 |
+
} catch { /* optional */ }
|
| 555 |
+
const totalBytesFromIndex = indexMeta && indexMeta.total_size ? Number(indexMeta.total_size) : null;
|
| 556 |
+
|
| 557 |
+
const totalBytes = analysis.totalBytes > 0 ? analysis.totalBytes : totalBytesFromIndex;
|
| 558 |
+
const totalGbDisk = totalBytes != null ? totalBytes / 1e9 : null;
|
| 559 |
+
const maxShardGb = analysis.maxShardBytes > 0 ? analysis.maxShardBytes / 1e9 : (totalGbDisk || 0);
|
| 560 |
+
const estParams = estimateParamsFromConfig(config);
|
| 561 |
+
|
| 562 |
+
return { modelId, meta, analysis, config, totalGbDisk, maxShardGb, estParams };
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
function metricsForCtx(ctx) {
|
| 566 |
+
const weightDtype = document.getElementById("weightDtype").value;
|
| 567 |
+
const bPerParam = bytesPerParamWeight(weightDtype);
|
| 568 |
+
const weightGbFromParams = ctx.estParams != null ? (ctx.estParams * bPerParam) / 1e9 : null;
|
| 569 |
+
const weightGb = ctx.totalGbDisk != null ? ctx.totalGbDisk * (bPerParam / 2) : weightGbFromParams;
|
| 570 |
+
|
| 571 |
+
const kvSel = document.getElementById("kvDtype").value;
|
| 572 |
+
const kvBytesPerEl = kvSel === "fp8" ? 1 : 2;
|
| 573 |
+
const maxLen = Math.max(256, parseInt(document.getElementById("maxModelLen").value, 10) || 8192);
|
| 574 |
+
const batchHint = Math.max(1, parseInt(document.getElementById("batchHint").value, 10) || 1);
|
| 575 |
+
|
| 576 |
+
const kvPerToken = kvBytesPerToken(ctx.config, kvBytesPerEl);
|
| 577 |
+
const kvTotalGb = (kvPerToken * maxLen * batchHint) / 1e9;
|
| 578 |
+
return { weightGb, kvTotalGb, kvPerToken, weightDtype, kvSel, maxLen, batchHint };
|
| 579 |
+
}
|
| 580 |
+
|
| 581 |
+
function tpForModelOnGpu(ctx, weightGb, kvTotalGb, gpu, util) {
|
| 582 |
+
const usable = usableVramGb(gpu.vramGb, util);
|
| 583 |
+
if (weightGb == null) return null;
|
| 584 |
+
const tpMem = minTpForWeightsAndKv(weightGb, kvTotalGb, usable);
|
| 585 |
+
const tpShard = minTpForLargestShard(ctx.maxShardGb, usable);
|
| 586 |
+
return Math.max(tpMem, tpShard);
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
/** GPU used for generated vLLM commands: clicked card overrides Preferred dropdown. */
|
| 590 |
+
function gpuForCommands() {
|
| 591 |
+
const prefId = document.getElementById("preferredGpu").value;
|
| 592 |
+
if (selectedGpuId) {
|
| 593 |
+
const g = GPU_CATALOG.find((x) => x.id === selectedGpuId);
|
| 594 |
+
if (g) return g;
|
| 595 |
+
}
|
| 596 |
+
return GPU_CATALOG.find((x) => x.id === prefId) || GPU_CATALOG[0];
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
function renderVllmCommands(models) {
|
| 600 |
+
const hintEl = document.getElementById("commandGpuHint");
|
| 601 |
+
const cmdEl = document.getElementById("vllmCmd");
|
| 602 |
+
if (!models || !models.length || !cmdEl) return;
|
| 603 |
+
|
| 604 |
+
const util = Math.min(0.98, Math.max(0.5, parseFloat(document.getElementById("gpuUtil").value) || 0.9));
|
| 605 |
+
const cmdGpu = gpuForCommands();
|
| 606 |
+
const usableCmd = usableVramGb(cmdGpu.vramGb, util);
|
| 607 |
+
|
| 608 |
+
const kvFlag =
|
| 609 |
+
document.getElementById("kvDtype").value === "fp8"
|
| 610 |
+
? "fp8_e5m2"
|
| 611 |
+
: document.getElementById("kvDtype").value === "fp16"
|
| 612 |
+
? "fp16"
|
| 613 |
+
: "auto";
|
| 614 |
+
const dtypeFlag = document.getElementById("weightDtype").value === "fp8" ? "float8_e4m3fn" : "bfloat16";
|
| 615 |
+
const maxLen = Math.max(256, parseInt(document.getElementById("maxModelLen").value, 10) || 8192);
|
| 616 |
+
|
| 617 |
+
const blocks = [];
|
| 618 |
+
let totalCmd = 0;
|
| 619 |
+
models.forEach((ctx) => {
|
| 620 |
+
const m = metricsForCtx(ctx);
|
| 621 |
+
const tp = tpForModelOnGpu(ctx, m.weightGb, m.kvTotalGb, cmdGpu, util);
|
| 622 |
+
const tpUse = typeof tp === "number" && !Number.isNaN(tp) ? tp : 1;
|
| 623 |
+
totalCmd += tpUse;
|
| 624 |
+
blocks.push({ ctx, tpUse });
|
| 625 |
+
});
|
| 626 |
+
|
| 627 |
+
const lines = [
|
| 628 |
+
`# Total GPUs (separate vLLM servers, ${cmdGpu.name}): ${totalCmd}`,
|
| 629 |
+
`# ~${usableCmd.toFixed(1)} GB usable per GPU @ ${(util * 100).toFixed(0)}% of ${cmdGpu.vramGb} GB VRAM`,
|
| 630 |
+
`# Assign disjoint CUDA_VISIBLE_DEVICES per server on the same host.`,
|
| 631 |
+
"",
|
| 632 |
+
];
|
| 633 |
+
blocks.forEach((b, i) => {
|
| 634 |
+
const port = 8000 + i;
|
| 635 |
+
lines.push(
|
| 636 |
+
`# --- ${b.ctx.modelId} ---`,
|
| 637 |
+
`vllm serve "${b.ctx.modelId}" \\`,
|
| 638 |
+
` --dtype ${dtypeFlag} \\`,
|
| 639 |
+
` --tensor-parallel-size ${b.tpUse} \\`,
|
| 640 |
+
` --max-model-len ${maxLen} \\`,
|
| 641 |
+
` --gpu-memory-utilization ${util} \\`,
|
| 642 |
+
` --kv-cache-dtype ${kvFlag} \\`,
|
| 643 |
+
` --port ${port}`,
|
| 644 |
+
""
|
| 645 |
+
);
|
| 646 |
+
});
|
| 647 |
+
cmdEl.textContent = lines.join("\n").trimEnd();
|
| 648 |
+
|
| 649 |
+
if (hintEl) {
|
| 650 |
+
const src = selectedGpuId ? "GPU catalog (clicked card)" : "Preferred GPU dropdown";
|
| 651 |
+
hintEl.textContent = `Tensor parallelism in the commands below uses ${cmdGpu.name} (~${usableCmd.toFixed(1)} GB usable per GPU). Source: ${src}. Click a GPU card to override the dropdown; change the dropdown to clear the override.`;
|
| 652 |
+
}
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
function buildGpuGrid(state) {
|
| 656 |
+
const grid = document.getElementById("gpuGrid");
|
| 657 |
+
grid.innerHTML = "";
|
| 658 |
+
const { usablePerGpuByGpu, shardsFit, tp, util, preferredGpuId } = state;
|
| 659 |
+
const cmdGpuId = gpuForCommands().id;
|
| 660 |
+
|
| 661 |
+
GPU_CATALOG.forEach((gpu) => {
|
| 662 |
+
const usable = usablePerGpuByGpu[gpu.id];
|
| 663 |
+
const fit = shardsFit[gpu.id];
|
| 664 |
+
const isPref = preferredGpuId && gpu.id === preferredGpuId;
|
| 665 |
+
const isCmdTarget = gpu.id === cmdGpuId;
|
| 666 |
+
const card = document.createElement("div");
|
| 667 |
+
card.className =
|
| 668 |
+
"gpu-card" +
|
| 669 |
+
(selectedGpuId === gpu.id ? " selected" : "") +
|
| 670 |
+
(isPref ? " preferred" : "") +
|
| 671 |
+
(isCmdTarget ? " commands-target" : "");
|
| 672 |
+
card.innerHTML = `
|
| 673 |
+
<div class="name">${isPref ? '<span style="float:right;font-size:0.65rem;color:var(--accent2);text-transform:uppercase">preferred</span>' : ""}${gpu.name}</div>
|
| 674 |
+
<div class="vram">${gpu.vramGb} GB VRAM · ~${usable.toFixed(1)} GB usable @ ${(util * 100).toFixed(0)}%</div>
|
| 675 |
+
<div style="margin-top:0.4rem;font-size:0.78rem;color:var(--muted)">
|
| 676 |
+
Shards fit (largest shard across models) / GPU: <strong style="color:var(--text)">${fit}</strong>
|
| 677 |
+
${tp[gpu.id] != null ? ` · max TP any model: <strong style="color:var(--good)">${tp[gpu.id]}</strong>` : ""}
|
| 678 |
+
</div>
|
| 679 |
+
`;
|
| 680 |
+
card.addEventListener("click", () => {
|
| 681 |
+
selectedGpuId = gpu.id;
|
| 682 |
+
document.querySelectorAll(".gpu-card").forEach((c) => c.classList.remove("selected"));
|
| 683 |
+
card.classList.add("selected");
|
| 684 |
+
renderGpuDetail(gpu);
|
| 685 |
+
if (lastFetchCtx && lastFetchCtx.models) computeAndRenderMulti(lastFetchCtx.models);
|
| 686 |
+
});
|
| 687 |
+
grid.appendChild(card);
|
| 688 |
+
});
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
/**
|
| 692 |
+
* @param {object[]} models — array of Hub fetch ctx
|
| 693 |
+
*/
|
| 694 |
+
function computeAndRenderMulti(models) {
|
| 695 |
+
const util = Math.min(0.98, Math.max(0.5, parseFloat(document.getElementById("gpuUtil").value) || 0.9));
|
| 696 |
+
const preferredGpuId = document.getElementById("preferredGpu").value;
|
| 697 |
+
const prefGpu = GPU_CATALOG.find((g) => g.id === preferredGpuId) || GPU_CATALOG[0];
|
| 698 |
+
|
| 699 |
+
const perModel = models.map((ctx) => {
|
| 700 |
+
const m = metricsForCtx(ctx);
|
| 701 |
+
const tpPref = tpForModelOnGpu(ctx, m.weightGb, m.kvTotalGb, prefGpu, util);
|
| 702 |
+
const perGpuPref =
|
| 703 |
+
m.weightGb != null && tpPref != null ? (m.weightGb + m.kvTotalGb) / tpPref : null;
|
| 704 |
+
return { ctx, ...m, tpOnPreferred: tpPref, perGpuOnPreferred: perGpuPref };
|
| 705 |
+
});
|
| 706 |
+
|
| 707 |
+
const maxShardAll = Math.max(0, ...models.map((c) => c.maxShardGb || 0));
|
| 708 |
+
|
| 709 |
+
let summaryHtml = "";
|
| 710 |
+
let memHtml = "";
|
| 711 |
+
perModel.forEach((row, idx) => {
|
| 712 |
+
const { ctx, weightGb, kvTotalGb, kvPerToken, weightDtype, kvSel, maxLen, batchHint } = row;
|
| 713 |
+
const shardRows = ctx.analysis.files.slice(0, 12).map((f) =>
|
| 714 |
+
`<tr><td>${escapeHtml(f.path)}</td><td>${f.sizeGb.toFixed(2)}</td></tr>`
|
| 715 |
+
).join("");
|
| 716 |
+
const moreShards =
|
| 717 |
+
ctx.analysis.files.length > 12
|
| 718 |
+
? `<tr><td colspan="2">… ${ctx.analysis.files.length - 12} more</td></tr>`
|
| 719 |
+
: "";
|
| 720 |
+
|
| 721 |
+
summaryHtml += `
|
| 722 |
+
<details class="model-block" ${idx === 0 ? "open" : ""}>
|
| 723 |
+
<summary>${escapeHtml(ctx.modelId)}</summary>
|
| 724 |
+
<p style="margin:0.5rem 0;font-size:0.85rem;color:var(--muted)">${escapeHtml(ctx.meta.pipeline_tag || ctx.meta.library_name || "model")}</p>
|
| 725 |
+
<table>
|
| 726 |
+
<tr><th>Metric</th><th>Value</th></tr>
|
| 727 |
+
<tr><td>Weight files total</td><td>${ctx.totalGbDisk != null ? ctx.totalGbDisk.toFixed(2) + " GB" : "unknown"}</td></tr>
|
| 728 |
+
<tr><td>Largest shard</td><td>${ctx.maxShardGb > 0 ? ctx.maxShardGb.toFixed(2) + " GB" : "—"}</td></tr>
|
| 729 |
+
<tr><td>Est. weight (${weightDtype})</td><td>${weightGb != null ? weightGb.toFixed(2) + " GB" : "—"}</td></tr>
|
| 730 |
+
</table>
|
| 731 |
+
${ctx.analysis.files.length ? `<table style="margin-top:0.5rem"><tr><th>File</th><th>GB</th></tr>${shardRows}${moreShards}</table>` : ""}
|
| 732 |
+
</details>`;
|
| 733 |
+
|
| 734 |
+
memHtml += `
|
| 735 |
+
<h3 style="font-size:0.9rem;margin:0.75rem 0 0.4rem;color:var(--accent)">${escapeHtml(ctx.modelId)}</h3>
|
| 736 |
+
<table>
|
| 737 |
+
<tr><th>Component</th><th>Estimate</th></tr>
|
| 738 |
+
<tr><td>Weights</td><td>${weightGb != null ? weightGb.toFixed(2) + " GB" : "—"}</td></tr>
|
| 739 |
+
<tr><td>KV (${kvSel}, ${maxLen} × ${batchHint} seqs)</td><td>${kvTotalGb.toFixed(3)} GB</td></tr>
|
| 740 |
+
<tr><td>KV / token</td><td>${(kvPerToken / 1024).toFixed(2)} KiB</td></tr>
|
| 741 |
+
</table>`;
|
| 742 |
+
});
|
| 743 |
+
|
| 744 |
+
document.getElementById("modelSummary").innerHTML = summaryHtml || "<p class='hint'>No models.</p>";
|
| 745 |
+
document.getElementById("memBreakdown").innerHTML =
|
| 746 |
+
memHtml + `<p class="hint">KV is a planning upper bound; vLLM paging changes real usage.</p>`;
|
| 747 |
+
|
| 748 |
+
const usablePerGpuByGpu = {};
|
| 749 |
+
const shardsFit = {};
|
| 750 |
+
const minTp = {};
|
| 751 |
+
for (const gpu of GPU_CATALOG) {
|
| 752 |
+
const usable = usableVramGb(gpu.vramGb, util);
|
| 753 |
+
usablePerGpuByGpu[gpu.id] = usable;
|
| 754 |
+
shardsFit[gpu.id] =
|
| 755 |
+
maxShardAll > 0 ? Math.floor(usable / maxShardAll) : 0;
|
| 756 |
+
let maxTp = 0;
|
| 757 |
+
for (const row of perModel) {
|
| 758 |
+
if (row.weightGb == null) continue;
|
| 759 |
+
const t = tpForModelOnGpu(row.ctx, row.weightGb, row.kvTotalGb, gpu, util);
|
| 760 |
+
if (t != null && t > maxTp) maxTp = t;
|
| 761 |
+
}
|
| 762 |
+
minTp[gpu.id] = maxTp || null;
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
buildGpuGrid({ util, usablePerGpuByGpu, shardsFit, tp: minTp, preferredGpuId });
|
| 766 |
+
|
| 767 |
+
const totalGpusSeparate = perModel.reduce(
|
| 768 |
+
(s, r) => s + (typeof r.tpOnPreferred === "number" && !Number.isNaN(r.tpOnPreferred) ? r.tpOnPreferred : 0),
|
| 769 |
+
0
|
| 770 |
+
);
|
| 771 |
+
const sumMemOneGpu = perModel.reduce((s, r) => s + (r.weightGb || 0) + r.kvTotalGb, 0);
|
| 772 |
+
const usablePref = usableVramGb(prefGpu.vramGb, util);
|
| 773 |
+
const eachTpOne = perModel.every((r) => r.tpOnPreferred === 1);
|
| 774 |
+
const fitsAllOnSingleGpu = sumMemOneGpu <= usablePref && eachTpOne;
|
| 775 |
+
|
| 776 |
+
let multiHtml = `
|
| 777 |
+
<p class="hint" style="margin-bottom:0.75rem">This table uses the <strong>Preferred GPU</strong> dropdown only. The <strong>vLLM commands</strong> section uses that same GPU until you click a GPU in the catalog — then commands switch to the clicked GPU (dashed outline). Changing the dropdown clears the click override.</p>
|
| 778 |
+
<p><strong>Preferred GPU:</strong> ${escapeHtml(prefGpu.name)} — ~${usablePref.toFixed(1)} GB usable @ ${(util * 100).toFixed(0)}%</p>
|
| 779 |
+
<table>
|
| 780 |
+
<tr><th>Model</th><th>Weights+KV (est.)</th><th>Min TP on preferred</th><th>GPUs (dedicated group)</th></tr>
|
| 781 |
+
${perModel
|
| 782 |
+
.map((r) => {
|
| 783 |
+
const sum = r.weightGb != null ? r.weightGb + r.kvTotalGb : r.kvTotalGb;
|
| 784 |
+
const tp = r.tpOnPreferred ?? "—";
|
| 785 |
+
const gpus = r.tpOnPreferred ?? "—";
|
| 786 |
+
return `<tr>
|
| 787 |
+
<td>${escapeHtml(r.ctx.modelId)}</td>
|
| 788 |
+
<td>${sum.toFixed(2)} GB</td>
|
| 789 |
+
<td>${tp}</td>
|
| 790 |
+
<td>${gpus}</td>
|
| 791 |
+
</tr>`;
|
| 792 |
+
})
|
| 793 |
+
.join("")}
|
| 794 |
+
<tr style="font-weight:600;border-top:2px solid var(--border)">
|
| 795 |
+
<td>Total (separate instances)</td>
|
| 796 |
+
<td>—</td>
|
| 797 |
+
<td>—</td>
|
| 798 |
+
<td>${totalGpusSeparate || "—"} GPUs</td>
|
| 799 |
+
</tr>
|
| 800 |
+
</table>
|
| 801 |
+
<p class="hint" style="margin-top:0.75rem">
|
| 802 |
+
<strong>Separate instances:</strong> each model uses its own tensor-parallel group; total accelerator count ≈ <strong>${totalGpusSeparate}</strong> × ${escapeHtml(prefGpu.name)} (no GPU sharing between models).
|
| 803 |
+
</p>
|
| 804 |
+
<p class="hint">
|
| 805 |
+
<strong>Single GPU, multiple models:</strong> needs sum(weights+KV) ≤ usable VRAM on one GPU <em>and</em> each model’s min TP = 1 on that GPU.
|
| 806 |
+
Here sum ≈ <strong>${sumMemOneGpu.toFixed(2)} GB</strong> vs <strong>${usablePref.toFixed(2)} GB</strong> usable —
|
| 807 |
+
${fitsAllOnSingleGpu ? '<span style="color:var(--good)">may fit in theory (still not recommended for large models — VRAM fragmentation & two processes).</span>' : '<span style="color:#f87171">does not fit on one GPU of this type at current settings.</span>'}
|
| 808 |
+
</p>
|
| 809 |
+
<p class="hint"><strong>Max configuration on preferred GPU:</strong> at these dtype / max-model-len / batch settings, the table above is the minimum TP per model; you cannot lower TP without reducing context, batch, quantization, or choosing a larger GPU.</p>
|
| 810 |
+
`;
|
| 811 |
+
document.getElementById("multiDeployment").innerHTML = multiHtml;
|
| 812 |
+
|
| 813 |
+
renderVllmCommands(models);
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
function tryRecomputeFromCache() {
|
| 817 |
+
if (!lastFetchCtx || !lastFetchCtx.models || document.getElementById("results").hidden) return;
|
| 818 |
+
const ids = getModelIdsFromInputs();
|
| 819 |
+
const cached = lastFetchCtx.models.map((m) => m.modelId);
|
| 820 |
+
if (ids.length !== cached.length || ids.some((id, i) => id !== cached[i])) return;
|
| 821 |
+
computeAndRenderMulti(lastFetchCtx.models);
|
| 822 |
+
}
|
| 823 |
+
|
| 824 |
+
document.getElementById("btnAddModel").addEventListener("click", () => addModelRow());
|
| 825 |
+
|
| 826 |
+
document.getElementById("btnFetch").addEventListener("click", async () => {
|
| 827 |
+
syncInputValuesFromNormalized();
|
| 828 |
+
const ids = getModelIdsFromInputs();
|
| 829 |
+
const errEl = document.getElementById("fetchError");
|
| 830 |
+
const results = document.getElementById("results");
|
| 831 |
+
errEl.hidden = true;
|
| 832 |
+
results.hidden = true;
|
| 833 |
+
|
| 834 |
+
if (ids.length === 0) {
|
| 835 |
+
errEl.textContent = "Add at least one Hugging Face model id or URL.";
|
| 836 |
+
errEl.hidden = false;
|
| 837 |
+
return;
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
const btn = document.getElementById("btnFetch");
|
| 841 |
+
btn.disabled = true;
|
| 842 |
+
btn.innerHTML = '<span class="spinner"></span>Loading…';
|
| 843 |
+
|
| 844 |
+
try {
|
| 845 |
+
const models = [];
|
| 846 |
+
const errors = [];
|
| 847 |
+
for (let i = 0; i < ids.length; i++) {
|
| 848 |
+
try {
|
| 849 |
+
models.push(await fetchOneModel(ids[i]));
|
| 850 |
+
} catch (e) {
|
| 851 |
+
errors.push(`${ids[i]}: ${e.message || e}`);
|
| 852 |
+
}
|
| 853 |
+
}
|
| 854 |
+
if (errors.length && models.length === 0) {
|
| 855 |
+
errEl.textContent = errors.join("\n");
|
| 856 |
+
errEl.hidden = false;
|
| 857 |
+
lastFetchCtx = null;
|
| 858 |
+
return;
|
| 859 |
+
}
|
| 860 |
+
if (errors.length) {
|
| 861 |
+
errEl.textContent = "Some models failed:\n" + errors.join("\n");
|
| 862 |
+
errEl.hidden = false;
|
| 863 |
+
}
|
| 864 |
+
|
| 865 |
+
lastFetchCtx = { models };
|
| 866 |
+
document.getElementById("gpuDetailPanel").hidden = true;
|
| 867 |
+
selectedGpuId = null;
|
| 868 |
+
computeAndRenderMulti(models);
|
| 869 |
+
|
| 870 |
+
results.hidden = false;
|
| 871 |
+
} catch (e) {
|
| 872 |
+
errEl.textContent = e.message || String(e);
|
| 873 |
+
errEl.hidden = false;
|
| 874 |
+
lastFetchCtx = null;
|
| 875 |
+
} finally {
|
| 876 |
+
btn.disabled = false;
|
| 877 |
+
btn.textContent = "Fetch all & compute";
|
| 878 |
+
}
|
| 879 |
+
});
|
| 880 |
+
|
| 881 |
+
function escapeHtml(s) {
|
| 882 |
+
const d = document.createElement("div");
|
| 883 |
+
d.textContent = s;
|
| 884 |
+
return d.innerHTML;
|
| 885 |
+
}
|
| 886 |
+
|
| 887 |
+
function debounce(fn, ms) {
|
| 888 |
+
let t;
|
| 889 |
+
return (...args) => {
|
| 890 |
+
clearTimeout(t);
|
| 891 |
+
t = setTimeout(() => fn(...args), ms);
|
| 892 |
+
};
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
const debouncedRecompute = debounce(tryRecomputeFromCache, 350);
|
| 896 |
+
|
| 897 |
+
["maxModelLen", "batchHint", "gpuUtil", "weightDtype", "kvDtype"].forEach((id) => {
|
| 898 |
+
const el = document.getElementById(id);
|
| 899 |
+
if (!el) return;
|
| 900 |
+
el.addEventListener("change", tryRecomputeFromCache);
|
| 901 |
+
if (el.type === "number") el.addEventListener("input", debouncedRecompute);
|
| 902 |
+
});
|
| 903 |
+
|
| 904 |
+
document.getElementById("preferredGpu").addEventListener("change", () => {
|
| 905 |
+
selectedGpuId = null;
|
| 906 |
+
tryRecomputeFromCache();
|
| 907 |
+
});
|
| 908 |
+
|
| 909 |
+
populatePreferredGpuSelect();
|
| 910 |
+
addModelRow();
|
| 911 |
+
</script>
|
| 912 |
+
</body>
|
| 913 |
</html>
|