| import init, { Model } from "./build/m.js"; |
|
|
| async function fetchArrayBuffer(url) { |
| const cacheName = "phi-mixformer-candle-cache"; |
| const cache = await caches.open(cacheName); |
| const cachedResponse = await cache.match(url); |
| if (cachedResponse) { |
| const data = await cachedResponse.arrayBuffer(); |
| return new Uint8Array(data); |
| } |
| const res = await fetch(url, { cache: "force-cache" }); |
| cache.put(url, res.clone()); |
| return new Uint8Array(await res.arrayBuffer()); |
| } |
| class Phi { |
| static instance = {}; |
|
|
| static async getInstance( |
| weightsURL, |
| modelID, |
| tokenizerURL, |
| configURL, |
| quantized |
| ) { |
| |
| if (!this.instance[modelID]) { |
| await init(); |
|
|
| self.postMessage({ status: "loading", message: "Loading Model" }); |
|
|
| const [weightsArrayU8, tokenizerArrayU8, configArrayU8] = |
| await Promise.all([ |
| fetchArrayBuffer(weightsURL), |
| fetchArrayBuffer(tokenizerURL), |
| fetchArrayBuffer(configURL), |
| ]); |
|
|
| this.instance[modelID] = new Model( |
| weightsArrayU8, |
| tokenizerArrayU8, |
| configArrayU8, |
| quantized |
| ); |
| } |
| return this.instance[modelID]; |
| } |
| } |
|
|
| let controller = null; |
| self.addEventListener("message", (event) => { |
| if (event.data.command === "start") { |
| controller = new AbortController(); |
| generate(event.data); |
| } else if (event.data.command === "abort") { |
| controller.abort(); |
| } |
| }); |
|
|
| async function generate(data) { |
| const { |
| weightsURL, |
| modelID, |
| tokenizerURL, |
| configURL, |
| quantized, |
| prompt, |
| temp, |
| top_p, |
| repeatPenalty, |
| seed, |
| maxSeqLen, |
| } = data; |
| try { |
| self.postMessage({ status: "loading", message: "Starting Phi" }); |
| const model = await Phi.getInstance( |
| weightsURL, |
| modelID, |
| tokenizerURL, |
| configURL, |
| quantized |
| ); |
|
|
| self.postMessage({ status: "loading", message: "Initializing model" }); |
| const firstToken = model.init_with_prompt( |
| prompt, |
| temp, |
| top_p, |
| repeatPenalty, |
| 64, |
| BigInt(seed) |
| ); |
| const seq_len = 2048; |
|
|
| let sentence = firstToken; |
| let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1; |
| let startTime = performance.now(); |
| let tokensCount = 0; |
| while (tokensCount < maxTokens) { |
| await new Promise(async (resolve) => { |
| if (controller && controller.signal.aborted) { |
| self.postMessage({ |
| status: "aborted", |
| message: "Aborted", |
| output: prompt + sentence, |
| }); |
| return; |
| } |
| const token = await model.next_token(); |
| if (token === "<|endoftext|>") { |
| self.postMessage({ |
| status: "complete", |
| message: "complete", |
| output: prompt + sentence, |
| }); |
| return; |
| } |
| const tokensSec = |
| ((tokensCount + 1) / (performance.now() - startTime)) * 1000; |
|
|
| sentence += token; |
| self.postMessage({ |
| status: "generating", |
| message: "Generating token", |
| token: token, |
| sentence: sentence, |
| totalTime: performance.now() - startTime, |
| tokensSec, |
| prompt: prompt, |
| }); |
| setTimeout(resolve, 0); |
| }); |
| tokensCount++; |
| } |
| self.postMessage({ |
| status: "complete", |
| message: "complete", |
| output: prompt + sentence, |
| }); |
| } catch (e) { |
| self.postMessage({ error: e }); |
| } |
| } |
|
|