liuw15 commited on
Commit
1141c00
·
1 Parent(s): c801c74

拆分index.js路由,分为claude、gemini、openai,并且支持生图

Browse files
src/routes/claude.js ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Claude API 路由
3
+ * 处理 /v1/messages 端点
4
+ */
5
+
6
+ import { Router } from 'express';
7
+ import { handleClaudeRequest } from '../server/handlers/claude.js';
8
+
9
+ const router = Router();
10
+
11
+ /**
12
+ * POST /v1/messages
13
+ * 处理 Claude 消息请求
14
+ */
15
+ router.post('/messages', (req, res) => {
16
+ const isStream = req.body.stream === true;
17
+ handleClaudeRequest(req, res, isStream);
18
+ });
19
+
20
+ export default router;
src/routes/gemini.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Gemini API 路由
3
+ * 处理 /v1beta/models/* 端点
4
+ */
5
+
6
+ import { Router } from 'express';
7
+ import { handleGeminiModelsList, handleGeminiModelDetail, handleGeminiRequest } from '../server/handlers/gemini.js';
8
+
9
+ const router = Router();
10
+
11
+ /**
12
+ * GET /v1beta/models
13
+ * 获取模型列表(Gemini格式)
14
+ */
15
+ router.get('/models', handleGeminiModelsList);
16
+
17
+ /**
18
+ * GET /v1beta/models/:model
19
+ * 获取单个模型详情(Gemini格式)
20
+ */
21
+ router.get('/models/:model', handleGeminiModelDetail);
22
+
23
+ /**
24
+ * POST /v1beta/models/:model:streamGenerateContent
25
+ * 流式生成内容
26
+ */
27
+ router.post('/models/:model\\:streamGenerateContent', (req, res) => {
28
+ const modelName = req.params.model;
29
+ handleGeminiRequest(req, res, modelName, true);
30
+ });
31
+
32
+ /**
33
+ * POST /v1beta/models/:model:generateContent
34
+ * 生成内容(支持通过 alt=sse 参数启用流式)
35
+ */
36
+ router.post('/models/:model\\:generateContent', (req, res) => {
37
+ const modelName = req.params.model;
38
+ const isStream = req.query.alt === 'sse';
39
+ handleGeminiRequest(req, res, modelName, isStream);
40
+ });
41
+
42
+ export default router;
src/routes/openai.js ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * OpenAI API 路由
3
+ * 处理 /v1/chat/completions 和 /v1/models 端点
4
+ */
5
+
6
+ import { Router } from 'express';
7
+ import { getAvailableModels } from '../api/client.js';
8
+ import { handleOpenAIRequest } from '../server/handlers/openai.js';
9
+ import logger from '../utils/logger.js';
10
+
11
+ const router = Router();
12
+
13
+ /**
14
+ * GET /v1/models
15
+ * 获取可用模型列表
16
+ */
17
+ router.get('/models', async (req, res) => {
18
+ try {
19
+ const models = await getAvailableModels();
20
+ res.json(models);
21
+ } catch (error) {
22
+ logger.error('获取模型列表失败:', error.message);
23
+ res.status(500).json({ error: error.message });
24
+ }
25
+ });
26
+
27
+ /**
28
+ * POST /v1/chat/completions
29
+ * 处理聊天补全请求
30
+ */
31
+ router.post('/chat/completions', handleOpenAIRequest);
32
+
33
+ export default router;
src/server/handlers/claude.js ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Claude 格式处理器
3
+ * 处理 /v1/messages 请求,支持流式和非流式响应
4
+ */
5
+
6
+ import { generateAssistantResponse, generateAssistantResponseNoStream } from '../../api/client.js';
7
+ import { generateClaudeRequestBody, prepareImageRequest } from '../../utils/utils.js';
8
+ import { normalizeClaudeParameters } from '../../utils/parameterNormalizer.js';
9
+ import { buildClaudeErrorPayload } from '../../utils/errors.js';
10
+ import logger from '../../utils/logger.js';
11
+ import config from '../../config/config.js';
12
+ import tokenManager from '../../auth/token_manager.js';
13
+ import {
14
+ setStreamHeaders,
15
+ createHeartbeat,
16
+ with429Retry
17
+ } from '../stream.js';
18
+
19
+ /**
20
+ * 创建 Claude 流式事件
21
+ * @param {string} eventType - 事件类型
22
+ * @param {Object} data - 事件数据
23
+ * @returns {string}
24
+ */
25
+ export const createClaudeStreamEvent = (eventType, data) => {
26
+ return `event: ${eventType}\ndata: ${JSON.stringify(data)}\n\n`;
27
+ };
28
+
29
+ /**
30
+ * 创建 Claude 非流式响应
31
+ * @param {string} id - 消息ID
32
+ * @param {string} model - 模型名称
33
+ * @param {string|null} content - 文本内容
34
+ * @param {string|null} reasoning - 思维链内容
35
+ * @param {string|null} reasoningSignature - 思维链签名
36
+ * @param {Array|null} toolCalls - 工具调用
37
+ * @param {string} stopReason - 停止原因
38
+ * @param {Object|null} usage - 使用量统计
39
+ * @returns {Object}
40
+ */
41
+ export const createClaudeResponse = (id, model, content, reasoning, reasoningSignature, toolCalls, stopReason, usage) => {
42
+ const contentBlocks = [];
43
+
44
+ // 思维链内容(如果有)- Claude 格式用 thinking 类型
45
+ if (reasoning) {
46
+ const thinkingBlock = {
47
+ type: "thinking",
48
+ thinking: reasoning
49
+ };
50
+ if (reasoningSignature && config.passSignatureToClient) {
51
+ thinkingBlock.signature = reasoningSignature;
52
+ }
53
+ contentBlocks.push(thinkingBlock);
54
+ }
55
+
56
+ // 文本内容
57
+ if (content) {
58
+ contentBlocks.push({
59
+ type: "text",
60
+ text: content
61
+ });
62
+ }
63
+
64
+ // 工具调用
65
+ if (toolCalls && toolCalls.length > 0) {
66
+ for (const tc of toolCalls) {
67
+ try {
68
+ const toolBlock = {
69
+ type: "tool_use",
70
+ id: tc.id,
71
+ name: tc.function.name,
72
+ input: JSON.parse(tc.function.arguments)
73
+ };
74
+ if (tc.thoughtSignature && config.passSignatureToClient) {
75
+ toolBlock.signature = tc.thoughtSignature;
76
+ }
77
+ contentBlocks.push(toolBlock);
78
+ } catch (e) {
79
+ // 解析失败时传入空对象
80
+ contentBlocks.push({
81
+ type: "tool_use",
82
+ id: tc.id,
83
+ name: tc.function.name,
84
+ input: {}
85
+ });
86
+ }
87
+ }
88
+ }
89
+
90
+ return {
91
+ id: id,
92
+ type: "message",
93
+ role: "assistant",
94
+ content: contentBlocks,
95
+ model: model,
96
+ stop_reason: stopReason,
97
+ stop_sequence: null,
98
+ usage: usage ? {
99
+ input_tokens: usage.prompt_tokens || 0,
100
+ output_tokens: usage.completion_tokens || 0
101
+ } : { input_tokens: 0, output_tokens: 0 }
102
+ };
103
+ };
104
+
105
+ /**
106
+ * 处理 Claude 格式的聊天请求
107
+ * @param {Request} req - Express请求对象
108
+ * @param {Response} res - Express响应对象
109
+ * @param {boolean} isStream - 是否流式响应
110
+ */
111
+ export const handleClaudeRequest = async (req, res, isStream) => {
112
+ const { messages, model, system, tools, ...rawParams } = req.body;
113
+
114
+ try {
115
+ if (!messages) {
116
+ return res.status(400).json(buildClaudeErrorPayload({ message: 'messages is required' }, 400));
117
+ }
118
+
119
+ const token = await tokenManager.getToken();
120
+ if (!token) {
121
+ throw new Error('没有可用的token,请运行 npm run login 获取token');
122
+ }
123
+
124
+ // 使用统一参数规范化模块处理 Claude 格式参数
125
+ const parameters = normalizeClaudeParameters(rawParams);
126
+
127
+ const isImageModel = model.includes('-image');
128
+ const requestBody = generateClaudeRequestBody(messages, model, parameters, tools, system, token);
129
+
130
+ if (isImageModel) {
131
+ prepareImageRequest(requestBody);
132
+ }
133
+
134
+ const msgId = `msg_${Date.now()}`;
135
+ const maxRetries = Number(config.retryTimes || 0);
136
+ const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
137
+
138
+ if (isStream) {
139
+ setStreamHeaders(res);
140
+ const heartbeatTimer = createHeartbeat(res);
141
+
142
+ try {
143
+ let contentIndex = 0;
144
+ let usageData = null;
145
+ let hasToolCall = false;
146
+ let currentBlockType = null;
147
+ let reasoningSent = false;
148
+
149
+ // 发送 message_start
150
+ res.write(createClaudeStreamEvent('message_start', {
151
+ type: "message_start",
152
+ message: {
153
+ id: msgId,
154
+ type: "message",
155
+ role: "assistant",
156
+ content: [],
157
+ model: model,
158
+ stop_reason: null,
159
+ stop_sequence: null,
160
+ usage: { input_tokens: 0, output_tokens: 0 }
161
+ }
162
+ }));
163
+
164
+ if (isImageModel) {
165
+ // 生���模型:使用非流式获取结果后以流式格式返回
166
+ const { content, usage } = await with429Retry(
167
+ () => generateAssistantResponseNoStream(requestBody, token),
168
+ safeRetries,
169
+ 'claude.stream.image '
170
+ );
171
+
172
+ // 发送文本块
173
+ res.write(createClaudeStreamEvent('content_block_start', {
174
+ type: "content_block_start",
175
+ index: 0,
176
+ content_block: { type: "text", text: "" }
177
+ }));
178
+ res.write(createClaudeStreamEvent('content_block_delta', {
179
+ type: "content_block_delta",
180
+ index: 0,
181
+ delta: { type: "text_delta", text: content || '' }
182
+ }));
183
+ res.write(createClaudeStreamEvent('content_block_stop', {
184
+ type: "content_block_stop",
185
+ index: 0
186
+ }));
187
+
188
+ // 发送 message_delta 和 message_stop
189
+ res.write(createClaudeStreamEvent('message_delta', {
190
+ type: "message_delta",
191
+ delta: { stop_reason: 'end_turn', stop_sequence: null },
192
+ usage: usage ? { output_tokens: usage.completion_tokens || 0 } : { output_tokens: 0 }
193
+ }));
194
+ res.write(createClaudeStreamEvent('message_stop', {
195
+ type: "message_stop"
196
+ }));
197
+
198
+ clearInterval(heartbeatTimer);
199
+ res.end();
200
+ return;
201
+ }
202
+
203
+ await with429Retry(
204
+ () => generateAssistantResponse(requestBody, token, (data) => {
205
+ if (data.type === 'usage') {
206
+ usageData = data.usage;
207
+ } else if (data.type === 'reasoning') {
208
+ // 思维链内容 - 使用 thinking 类型
209
+ if (!reasoningSent) {
210
+ // 开始思维块
211
+ const contentBlock = { type: "thinking", thinking: "" };
212
+ if (data.thoughtSignature && config.passSignatureToClient) {
213
+ contentBlock.signature = data.thoughtSignature;
214
+ }
215
+ res.write(createClaudeStreamEvent('content_block_start', {
216
+ type: "content_block_start",
217
+ index: contentIndex,
218
+ content_block: contentBlock
219
+ }));
220
+ currentBlockType = 'thinking';
221
+ reasoningSent = true;
222
+ }
223
+ // 发送思维增量
224
+ const delta = { type: "thinking_delta", thinking: data.reasoning_content || '' };
225
+ if (data.thoughtSignature && config.passSignatureToClient) {
226
+ delta.signature = data.thoughtSignature;
227
+ }
228
+ res.write(createClaudeStreamEvent('content_block_delta', {
229
+ type: "content_block_delta",
230
+ index: contentIndex,
231
+ delta: delta
232
+ }));
233
+ } else if (data.type === 'tool_calls') {
234
+ hasToolCall = true;
235
+ // 结束之前的块(如果有)
236
+ if (currentBlockType) {
237
+ res.write(createClaudeStreamEvent('content_block_stop', {
238
+ type: "content_block_stop",
239
+ index: contentIndex
240
+ }));
241
+ contentIndex++;
242
+ }
243
+ // 工具调用
244
+ for (const tc of data.tool_calls) {
245
+ try {
246
+ const inputObj = JSON.parse(tc.function.arguments);
247
+ const toolContentBlock = { type: "tool_use", id: tc.id, name: tc.function.name, input: {} };
248
+ if (tc.thoughtSignature && config.passSignatureToClient) {
249
+ toolContentBlock.signature = tc.thoughtSignature;
250
+ }
251
+ res.write(createClaudeStreamEvent('content_block_start', {
252
+ type: "content_block_start",
253
+ index: contentIndex,
254
+ content_block: toolContentBlock
255
+ }));
256
+ // 发送 input 增量
257
+ res.write(createClaudeStreamEvent('content_block_delta', {
258
+ type: "content_block_delta",
259
+ index: contentIndex,
260
+ delta: { type: "input_json_delta", partial_json: JSON.stringify(inputObj) }
261
+ }));
262
+ res.write(createClaudeStreamEvent('content_block_stop', {
263
+ type: "content_block_stop",
264
+ index: contentIndex
265
+ }));
266
+ contentIndex++;
267
+ } catch (e) {
268
+ // 解析失败,跳过
269
+ }
270
+ }
271
+ currentBlockType = null;
272
+ } else {
273
+ // 普通文本内容
274
+ if (currentBlockType === 'thinking') {
275
+ // 结束思维块
276
+ res.write(createClaudeStreamEvent('content_block_stop', {
277
+ type: "content_block_stop",
278
+ index: contentIndex
279
+ }));
280
+ contentIndex++;
281
+ currentBlockType = null;
282
+ }
283
+ if (currentBlockType !== 'text') {
284
+ // 开始文本块
285
+ res.write(createClaudeStreamEvent('content_block_start', {
286
+ type: "content_block_start",
287
+ index: contentIndex,
288
+ content_block: { type: "text", text: "" }
289
+ }));
290
+ currentBlockType = 'text';
291
+ }
292
+ // 发送文本增量
293
+ res.write(createClaudeStreamEvent('content_block_delta', {
294
+ type: "content_block_delta",
295
+ index: contentIndex,
296
+ delta: { type: "text_delta", text: data.content || '' }
297
+ }));
298
+ }
299
+ }),
300
+ safeRetries,
301
+ 'claude.stream '
302
+ );
303
+
304
+ // 结束最后一个内容块
305
+ if (currentBlockType) {
306
+ res.write(createClaudeStreamEvent('content_block_stop', {
307
+ type: "content_block_stop",
308
+ index: contentIndex
309
+ }));
310
+ }
311
+
312
+ // 发送 message_delta
313
+ const stopReason = hasToolCall ? 'tool_use' : 'end_turn';
314
+ res.write(createClaudeStreamEvent('message_delta', {
315
+ type: "message_delta",
316
+ delta: { stop_reason: stopReason, stop_sequence: null },
317
+ usage: usageData ? { output_tokens: usageData.completion_tokens || 0 } : { output_tokens: 0 }
318
+ }));
319
+
320
+ // 发送 message_stop
321
+ res.write(createClaudeStreamEvent('message_stop', {
322
+ type: "message_stop"
323
+ }));
324
+
325
+ clearInterval(heartbeatTimer);
326
+ res.end();
327
+ } catch (error) {
328
+ clearInterval(heartbeatTimer);
329
+ if (!res.writableEnded) {
330
+ const statusCode = error.statusCode || error.status || 500;
331
+ res.write(createClaudeStreamEvent('error', buildClaudeErrorPayload(error, statusCode)));
332
+ res.end();
333
+ }
334
+ logger.error('Claude 流式请求失败:', error.message);
335
+ return;
336
+ }
337
+ } else {
338
+ // 非流式请求
339
+ req.setTimeout(0);
340
+ res.setTimeout(0);
341
+
342
+ const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
343
+ () => generateAssistantResponseNoStream(requestBody, token),
344
+ safeRetries,
345
+ 'claude.no_stream '
346
+ );
347
+
348
+ const stopReason = toolCalls.length > 0 ? 'tool_use' : 'end_turn';
349
+ const response = createClaudeResponse(
350
+ msgId,
351
+ model,
352
+ content,
353
+ reasoningContent,
354
+ reasoningSignature,
355
+ toolCalls,
356
+ stopReason,
357
+ usage
358
+ );
359
+
360
+ res.json(response);
361
+ }
362
+ } catch (error) {
363
+ logger.error('Claude 请求失败:', error.message);
364
+ if (res.headersSent) return;
365
+ const statusCode = error.statusCode || error.status || 500;
366
+ res.status(statusCode).json(buildClaudeErrorPayload(error, statusCode));
367
+ }
368
+ };
src/server/handlers/gemini.js ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Gemini 格式处理器
3
+ * 处理 /v1beta/models/* 请求,支持流式和非流式响应
4
+ */
5
+
6
+ import { generateAssistantResponse, generateAssistantResponseNoStream, getAvailableModels } from '../../api/client.js';
7
+ import { generateGeminiRequestBody, prepareImageRequest } from '../../utils/utils.js';
8
+ import { buildGeminiErrorPayload } from '../../utils/errors.js';
9
+ import logger from '../../utils/logger.js';
10
+ import config from '../../config/config.js';
11
+ import tokenManager from '../../auth/token_manager.js';
12
+ import {
13
+ setStreamHeaders,
14
+ createHeartbeat,
15
+ writeStreamData,
16
+ endStream,
17
+ with429Retry
18
+ } from '../stream.js';
19
+
20
+ /**
21
+ * 创建 Gemini 格式响应
22
+ * @param {string|null} content - 文本内容
23
+ * @param {string|null} reasoning - 思维链内容
24
+ * @param {string|null} reasoningSignature - 思维链签名
25
+ * @param {Array|null} toolCalls - 工具调用
26
+ * @param {string|null} finishReason - 结束原因
27
+ * @param {Object|null} usage - 使用量统计
28
+ * @returns {Object}
29
+ */
30
+ export const createGeminiResponse = (content, reasoning, reasoningSignature, toolCalls, finishReason, usage) => {
31
+ const parts = [];
32
+
33
+ if (reasoning) {
34
+ const thoughtPart = { text: reasoning, thought: true };
35
+ if (reasoningSignature && config.passSignatureToClient) {
36
+ thoughtPart.thoughtSignature = reasoningSignature;
37
+ }
38
+ parts.push(thoughtPart);
39
+ }
40
+
41
+ if (content) {
42
+ parts.push({ text: content });
43
+ }
44
+
45
+ if (toolCalls && toolCalls.length > 0) {
46
+ toolCalls.forEach(tc => {
47
+ try {
48
+ const functionCallPart = {
49
+ functionCall: {
50
+ name: tc.function.name,
51
+ args: JSON.parse(tc.function.arguments)
52
+ }
53
+ };
54
+ if (tc.thoughtSignature && config.passSignatureToClient) {
55
+ functionCallPart.thoughtSignature = tc.thoughtSignature;
56
+ }
57
+ parts.push(functionCallPart);
58
+ } catch (e) {
59
+ // 忽略解析错误
60
+ }
61
+ });
62
+ }
63
+
64
+ const response = {
65
+ candidates: [{
66
+ content: {
67
+ parts: parts,
68
+ role: "model"
69
+ },
70
+ finishReason: finishReason || "STOP",
71
+ index: 0
72
+ }]
73
+ };
74
+
75
+ if (usage) {
76
+ response.usageMetadata = {
77
+ promptTokenCount: usage.prompt_tokens,
78
+ candidatesTokenCount: usage.completion_tokens,
79
+ totalTokenCount: usage.total_tokens
80
+ };
81
+ }
82
+
83
+ return response;
84
+ };
85
+
86
+ /**
87
+ * 将 OpenAI 模型列表转换为 Gemini 格式
88
+ * @param {Object} openaiModels - OpenAI格式模型列表
89
+ * @returns {Object}
90
+ */
91
+ export const convertToGeminiModelList = (openaiModels) => {
92
+ const models = openaiModels.data.map(model => ({
93
+ name: `models/${model.id}`,
94
+ version: "001",
95
+ displayName: model.id,
96
+ description: "Imported model",
97
+ inputTokenLimit: 32768, // 默认值
98
+ outputTokenLimit: 8192, // 默认值
99
+ supportedGenerationMethods: ["generateContent", "countTokens"],
100
+ temperature: 0.9,
101
+ topP: 1.0,
102
+ topK: 40
103
+ }));
104
+ return { models };
105
+ };
106
+
107
+ /**
108
+ * 获取 Gemini 格式模型列表
109
+ * @param {Request} req - Express请求对象
110
+ * @param {Response} res - Express响应对象
111
+ */
112
+ export const handleGeminiModelsList = async (req, res) => {
113
+ try {
114
+ const openaiModels = await getAvailableModels();
115
+ const geminiModels = convertToGeminiModelList(openaiModels);
116
+ res.json(geminiModels);
117
+ } catch (error) {
118
+ logger.error('获取模型列表失败:', error.message);
119
+ res.status(500).json({ error: { code: 500, message: error.message, status: "INTERNAL" } });
120
+ }
121
+ };
122
+
123
+ /**
124
+ * 获取单个模型详情(Gemini格式)
125
+ * @param {Request} req - Express请求对象
126
+ * @param {Response} res - Express响应对象
127
+ */
128
+ export const handleGeminiModelDetail = async (req, res) => {
129
+ try {
130
+ const modelId = req.params.model.replace(/^models\//, '');
131
+ const openaiModels = await getAvailableModels();
132
+ const model = openaiModels.data.find(m => m.id === modelId);
133
+
134
+ if (model) {
135
+ const geminiModel = {
136
+ name: `models/${model.id}`,
137
+ version: "001",
138
+ displayName: model.id,
139
+ description: "Imported model",
140
+ inputTokenLimit: 32768,
141
+ outputTokenLimit: 8192,
142
+ supportedGenerationMethods: ["generateContent", "countTokens"],
143
+ temperature: 0.9,
144
+ topP: 1.0,
145
+ topK: 40
146
+ };
147
+ res.json(geminiModel);
148
+ } else {
149
+ res.status(404).json({ error: { code: 404, message: `Model ${modelId} not found`, status: "NOT_FOUND" } });
150
+ }
151
+ } catch (error) {
152
+ logger.error('获取模型详情失败:', error.message);
153
+ res.status(500).json({ error: { code: 500, message: error.message, status: "INTERNAL" } });
154
+ }
155
+ };
156
+
157
+ /**
158
+ * 处理 Gemini 格式的聊天请求
159
+ * @param {Request} req - Express请求对象
160
+ * @param {Response} res - Express响应对象
161
+ * @param {string} modelName - 模型名称
162
+ * @param {boolean} isStream - 是否流式响应
163
+ */
164
+ export const handleGeminiRequest = async (req, res, modelName, isStream) => {
165
+ const maxRetries = Number(config.retryTimes || 0);
166
+ const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
167
+
168
+ try {
169
+ const token = await tokenManager.getToken();
170
+ if (!token) {
171
+ throw new Error('没有可用的token,请运行 npm run login 获取token');
172
+ }
173
+
174
+ const isImageModel = modelName.includes('-image');
175
+ const requestBody = generateGeminiRequestBody(req.body, modelName, token);
176
+
177
+ if (isImageModel) {
178
+ prepareImageRequest(requestBody);
179
+ }
180
+
181
+ if (isStream) {
182
+ setStreamHeaders(res);
183
+ const heartbeatTimer = createHeartbeat(res);
184
+
185
+ try {
186
+ if (isImageModel) {
187
+ // 生图模型:使用非流式获取结果后一次性返回
188
+ const { content, usage } = await with429Retry(
189
+ () => generateAssistantResponseNoStream(requestBody, token),
190
+ safeRetries,
191
+ 'gemini.stream.image '
192
+ );
193
+ const chunk = createGeminiResponse(content, null, null, null, 'STOP', usage);
194
+ writeStreamData(res, chunk);
195
+ clearInterval(heartbeatTimer);
196
+ endStream(res);
197
+ return;
198
+ }
199
+
200
+ let usageData = null;
201
+ let hasToolCall = false;
202
+
203
+ await with429Retry(
204
+ () => generateAssistantResponse(requestBody, token, (data) => {
205
+ if (data.type === 'usage') {
206
+ usageData = data.usage;
207
+ } else if (data.type === 'reasoning') {
208
+ // Gemini 思考内容
209
+ const chunk = createGeminiResponse(null, data.reasoning_content, data.thoughtSignature, null, null, null);
210
+ writeStreamData(res, chunk);
211
+ } else if (data.type === 'tool_calls') {
212
+ hasToolCall = true;
213
+ // Gemini 工具调用
214
+ const chunk = createGeminiResponse(null, null, null, data.tool_calls, null, null);
215
+ writeStreamData(res, chunk);
216
+ } else {
217
+ // 普通文本
218
+ const chunk = createGeminiResponse(data.content, null, null, null, null, null);
219
+ writeStreamData(res, chunk);
220
+ }
221
+ }),
222
+ safeRetries,
223
+ 'gemini.stream '
224
+ );
225
+
226
+ // 发送结束块和 usage
227
+ const finishReason = hasToolCall ? "STOP" : "STOP"; // Gemini 工具调用也是 STOP
228
+ const finalChunk = createGeminiResponse(null, null, null, null, finishReason, usageData);
229
+ writeStreamData(res, finalChunk);
230
+
231
+ clearInterval(heartbeatTimer);
232
+ endStream(res);
233
+ } catch (error) {
234
+ clearInterval(heartbeatTimer);
235
+ if (!res.writableEnded) {
236
+ const statusCode = error.statusCode || error.status || 500;
237
+ writeStreamData(res, buildGeminiErrorPayload(error, statusCode));
238
+ endStream(res);
239
+ }
240
+ logger.error('Gemini 流式请求失败:', error.message);
241
+ return;
242
+ }
243
+ } else {
244
+ // 非流式
245
+ req.setTimeout(0);
246
+ res.setTimeout(0);
247
+
248
+ const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
249
+ () => generateAssistantResponseNoStream(requestBody, token),
250
+ safeRetries,
251
+ 'gemini.no_stream '
252
+ );
253
+
254
+ const finishReason = toolCalls.length > 0 ? "STOP" : "STOP";
255
+ const response = createGeminiResponse(content, reasoningContent, reasoningSignature, toolCalls, finishReason, usage);
256
+ res.json(response);
257
+ }
258
+ } catch (error) {
259
+ logger.error('Gemini 请求失败:', error.message);
260
+ if (res.headersSent) return;
261
+ const statusCode = error.statusCode || error.status || 500;
262
+ res.status(statusCode).json(buildGeminiErrorPayload(error, statusCode));
263
+ }
264
+ };
src/server/handlers/openai.js ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * OpenAI 格式处理器
3
+ * 处理 /v1/chat/completions 请求,支持流式和非流式响应
4
+ */
5
+
6
+ import { generateAssistantResponse, generateAssistantResponseNoStream } from '../../api/client.js';
7
+ import { generateRequestBody, prepareImageRequest } from '../../utils/utils.js';
8
+ import { buildOpenAIErrorPayload } from '../../utils/errors.js';
9
+ import logger from '../../utils/logger.js';
10
+ import config from '../../config/config.js';
11
+ import tokenManager from '../../auth/token_manager.js';
12
+ import {
13
+ createResponseMeta,
14
+ setStreamHeaders,
15
+ createHeartbeat,
16
+ getChunkObject,
17
+ releaseChunkObject,
18
+ writeStreamData,
19
+ endStream,
20
+ with429Retry
21
+ } from '../stream.js';
22
+
23
+ /**
24
+ * 创建流式数据块
25
+ * 支持 DeepSeek 格式的 reasoning_content
26
+ * @param {string} id - 响应ID
27
+ * @param {number} created - 创建时间戳
28
+ * @param {string} model - 模型名称
29
+ * @param {Object} delta - 增量内容
30
+ * @param {string|null} finish_reason - 结束原因
31
+ * @returns {Object}
32
+ */
33
+ export const createStreamChunk = (id, created, model, delta, finish_reason = null) => {
34
+ const chunk = getChunkObject();
35
+ chunk.id = id;
36
+ chunk.object = 'chat.completion.chunk';
37
+ chunk.created = created;
38
+ chunk.model = model;
39
+ chunk.choices[0].delta = delta;
40
+ chunk.choices[0].finish_reason = finish_reason;
41
+ return chunk;
42
+ };
43
+
44
+ /**
45
+ * 处理 OpenAI 格式的聊天请求
46
+ * @param {Request} req - Express请求对象
47
+ * @param {Response} res - Express响应对象
48
+ */
49
+ export const handleOpenAIRequest = async (req, res) => {
50
+ const { messages, model, stream = false, tools, ...params } = req.body;
51
+
52
+ try {
53
+ if (!messages) {
54
+ return res.status(400).json({ error: 'messages is required' });
55
+ }
56
+
57
+ const token = await tokenManager.getToken();
58
+ if (!token) {
59
+ throw new Error('没有可用的token,请运行 npm run login 获取token');
60
+ }
61
+
62
+ const isImageModel = model.includes('-image');
63
+ const requestBody = generateRequestBody(messages, model, params, tools, token);
64
+
65
+ if (isImageModel) {
66
+ prepareImageRequest(requestBody);
67
+ }
68
+
69
+ const { id, created } = createResponseMeta();
70
+ const maxRetries = Number(config.retryTimes || 0);
71
+ const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
72
+
73
+ if (stream) {
74
+ setStreamHeaders(res);
75
+
76
+ // 启动心跳,防止 Cloudflare 超时断连
77
+ const heartbeatTimer = createHeartbeat(res);
78
+
79
+ try {
80
+ if (isImageModel) {
81
+ const { content, usage } = await with429Retry(
82
+ () => generateAssistantResponseNoStream(requestBody, token),
83
+ safeRetries,
84
+ 'chat.stream.image '
85
+ );
86
+ writeStreamData(res, createStreamChunk(id, created, model, { content }));
87
+ writeStreamData(res, { ...createStreamChunk(id, created, model, {}, 'stop'), usage });
88
+ } else {
89
+ let hasToolCall = false;
90
+ let usageData = null;
91
+
92
+ await with429Retry(
93
+ () => generateAssistantResponse(requestBody, token, (data) => {
94
+ if (data.type === 'usage') {
95
+ usageData = data.usage;
96
+ } else if (data.type === 'reasoning') {
97
+ const delta = { reasoning_content: data.reasoning_content };
98
+ if (data.thoughtSignature && config.passSignatureToClient) {
99
+ delta.thoughtSignature = data.thoughtSignature;
100
+ }
101
+ writeStreamData(res, createStreamChunk(id, created, model, delta));
102
+ } else if (data.type === 'tool_calls') {
103
+ hasToolCall = true;
104
+ // 根据配置决定是否透传工具调用中的签名
105
+ const toolCallsWithIndex = data.tool_calls.map((toolCall, index) => {
106
+ if (config.passSignatureToClient) {
107
+ return { index, ...toolCall };
108
+ } else {
109
+ const { thoughtSignature, ...rest } = toolCall;
110
+ return { index, ...rest };
111
+ }
112
+ });
113
+ const delta = { tool_calls: toolCallsWithIndex };
114
+ writeStreamData(res, createStreamChunk(id, created, model, delta));
115
+ } else {
116
+ const delta = { content: data.content };
117
+ writeStreamData(res, createStreamChunk(id, created, model, delta));
118
+ }
119
+ }),
120
+ safeRetries,
121
+ 'chat.stream '
122
+ );
123
+
124
+ writeStreamData(res, { ...createStreamChunk(id, created, model, {}, hasToolCall ? 'tool_calls' : 'stop'), usage: usageData });
125
+ }
126
+
127
+ clearInterval(heartbeatTimer);
128
+ endStream(res);
129
+ } catch (error) {
130
+ clearInterval(heartbeatTimer);
131
+ throw error;
132
+ }
133
+ } else {
134
+ // 非流式请求:设置较长超时,避免大模型响应超时
135
+ req.setTimeout(0); // 禁用请求超时
136
+ res.setTimeout(0); // 禁用响应超时
137
+
138
+ const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
139
+ () => generateAssistantResponseNoStream(requestBody, token),
140
+ safeRetries,
141
+ 'chat.no_stream '
142
+ );
143
+
144
+ // DeepSeek 格式:reasoning_content 在 content 之前
145
+ const message = { role: 'assistant' };
146
+ if (reasoningContent) message.reasoning_content = reasoningContent;
147
+ if (reasoningSignature && config.passSignatureToClient) message.thoughtSignature = reasoningSignature;
148
+ message.content = content;
149
+
150
+ if (toolCalls.length > 0) {
151
+ // 根据配置决定是否透传工具调用中的签名
152
+ if (config.passSignatureToClient) {
153
+ message.tool_calls = toolCalls;
154
+ } else {
155
+ message.tool_calls = toolCalls.map(({ thoughtSignature, ...rest }) => rest);
156
+ }
157
+ }
158
+
159
+ // 使用预构建的响应对象,减少内存分配
160
+ const response = {
161
+ id,
162
+ object: 'chat.completion',
163
+ created,
164
+ model,
165
+ choices: [{
166
+ index: 0,
167
+ message,
168
+ finish_reason: toolCalls.length > 0 ? 'tool_calls' : 'stop'
169
+ }],
170
+ usage
171
+ };
172
+
173
+ res.json(response);
174
+ }
175
+ } catch (error) {
176
+ logger.error('生成响应失败:', error.message);
177
+ if (res.headersSent) return;
178
+ const statusCode = error.statusCode || error.status || 500;
179
+ return res.status(statusCode).json(buildOpenAIErrorPayload(error, statusCode));
180
+ }
181
+ };
src/server/index.js CHANGED
@@ -1,18 +1,26 @@
 
 
 
 
 
1
  import express from 'express';
2
  import cors from 'cors';
3
  import path from 'path';
4
- import { generateAssistantResponse, generateAssistantResponseNoStream, getAvailableModels, generateImageForSD, closeRequester } from '../api/client.js';
5
- import { generateRequestBody, generateGeminiRequestBody, generateClaudeRequestBody, prepareImageRequest } from '../utils/utils.js';
6
- import { normalizeClaudeParameters } from '../utils/parameterNormalizer.js';
7
  import logger from '../utils/logger.js';
8
  import config from '../config/config.js';
9
- import tokenManager from '../auth/token_manager.js';
 
 
 
 
 
 
10
  import adminRouter from '../routes/admin.js';
11
  import sdRouter from '../routes/sd.js';
12
- import memoryManager, { registerMemoryPoolCleanup } from '../utils/memoryManager.js';
13
- import { getPublicDir, getRelativePath } from '../utils/paths.js';
14
- import { DEFAULT_HEARTBEAT_INTERVAL, MEMORY_CHECK_INTERVAL } from '../constants/index.js';
15
- import { buildOpenAIErrorPayload, buildGeminiErrorPayload, buildClaudeErrorPayload, errorHandler, ValidationError } from '../utils/errors.js';
16
 
17
  const publicDir = getPublicDir();
18
 
@@ -20,167 +28,11 @@ logger.info(`静态文件目录: ${getRelativePath(publicDir)}`);
20
 
21
  const app = express();
22
 
23
- // ==================== 通用重试工具(处429) ====================
24
- const with429Retry = async (fn, maxRetries, loggerPrefix = '') => {
25
- const retries = Number.isFinite(maxRetries) && maxRetries > 0 ? Math.floor(maxRetries) : 0;
26
- let attempt = 0;
27
- // 首次执行 + 最多 retries 次重试
28
- while (true) {
29
- try {
30
- return await fn(attempt);
31
- } catch (error) {
32
- // 兼容多种错误格式:error.status, error.statusCode, error.response?.status
33
- const status = Number(error.status || error.statusCode || error.response?.status);
34
- if (status === 429 && attempt < retries) {
35
- const nextAttempt = attempt + 1;
36
- logger.warn(`${loggerPrefix}收到 429,正在进行第 ${nextAttempt} 次重试(共 ${retries} 次)`);
37
- attempt = nextAttempt;
38
- continue;
39
- }
40
- throw error;
41
- }
42
- }
43
- };
44
-
45
- // ==================== 心跳机制(防止 CF 超时) ====================
46
- const HEARTBEAT_INTERVAL = config.server.heartbeatInterval || DEFAULT_HEARTBEAT_INTERVAL;
47
- const SSE_HEARTBEAT = Buffer.from(': heartbeat\n\n');
48
-
49
- // 创建心跳定时器
50
- const createHeartbeat = (res) => {
51
- const timer = setInterval(() => {
52
- if (!res.writableEnded) {
53
- res.write(SSE_HEARTBEAT);
54
- } else {
55
- clearInterval(timer);
56
- }
57
- }, HEARTBEAT_INTERVAL);
58
-
59
- // 响应结束时清理
60
- res.on('close', () => clearInterval(timer));
61
- res.on('finish', () => clearInterval(timer));
62
-
63
- return timer;
64
- };
65
-
66
- // 预编译的常量字符串(避免重复创建)
67
- const SSE_PREFIX = Buffer.from('data: ');
68
- const SSE_SUFFIX = Buffer.from('\n\n');
69
- const SSE_DONE = Buffer.from('data: [DONE]\n\n');
70
-
71
- // 工具函数:生成响应元数据
72
- const createResponseMeta = () => ({
73
- id: `chatcmpl-${Date.now()}`,
74
- created: Math.floor(Date.now() / 1000)
75
- });
76
-
77
- // 工具函数:设置流式响应头
78
- const setStreamHeaders = (res) => {
79
- res.setHeader('Content-Type', 'text/event-stream');
80
- res.setHeader('Cache-Control', 'no-cache');
81
- res.setHeader('Connection', 'keep-alive');
82
- res.setHeader('X-Accel-Buffering', 'no'); // 禁用 nginx 缓冲
83
- };
84
-
85
- // 工具函数:构建流式数据块(使用动态对象池减少 GC)
86
- // 支持 DeepSeek 格式的 reasoning_content
87
- const chunkPool = [];
88
- const getChunkObject = () => chunkPool.pop() || { choices: [{ index: 0, delta: {}, finish_reason: null }] };
89
- const releaseChunkObject = (obj) => {
90
- const maxSize = memoryManager.getPoolSizes().chunk;
91
- if (chunkPool.length < maxSize) chunkPool.push(obj);
92
- };
93
-
94
- // 注册内存清理回调(使用统一工具收缩对象池)
95
- registerMemoryPoolCleanup(chunkPool, () => memoryManager.getPoolSizes().chunk);
96
-
97
- // 设置内存阈值(从配置加载)并启动内存管理器
98
  memoryManager.setThreshold(config.server.memoryThreshold);
99
  memoryManager.start(MEMORY_CHECK_INTERVAL);
100
 
101
- const createStreamChunk = (id, created, model, delta, finish_reason = null) => {
102
- const chunk = getChunkObject();
103
- chunk.id = id;
104
- chunk.object = 'chat.completion.chunk';
105
- chunk.created = created;
106
- chunk.model = model;
107
- chunk.choices[0].delta = delta;
108
- chunk.choices[0].finish_reason = finish_reason;
109
- return chunk;
110
- };
111
-
112
- // 工具函数:零拷贝写入流式数据
113
- const writeStreamData = (res, data) => {
114
- const json = JSON.stringify(data);
115
- res.write(SSE_PREFIX);
116
- res.write(json);
117
- res.write(SSE_SUFFIX);
118
- };
119
-
120
- // 工具函数:结束流式响应
121
- const endStream = (res) => {
122
- if (res.writableEnded) return;
123
- res.write(SSE_DONE);
124
- res.end();
125
- };
126
-
127
-
128
-
129
-
130
- // Gemini 响应构建工具
131
- const createGeminiResponse = (content, reasoning, reasoningSignature, toolCalls, finishReason, usage) => {
132
- const parts = [];
133
- if (reasoning) {
134
- const thoughtPart = { text: reasoning, thought: true };
135
- if (reasoningSignature && config.passSignatureToClient) {
136
- thoughtPart.thoughtSignature = reasoningSignature;
137
- }
138
- parts.push(thoughtPart);
139
- }
140
- if (content) {
141
- parts.push({ text: content });
142
- }
143
- if (toolCalls && toolCalls.length > 0) {
144
- toolCalls.forEach(tc => {
145
- try {
146
- const functionCallPart = {
147
- functionCall: {
148
- name: tc.function.name,
149
- args: JSON.parse(tc.function.arguments)
150
- }
151
- };
152
- if (tc.thoughtSignature && config.passSignatureToClient) {
153
- functionCallPart.thoughtSignature = tc.thoughtSignature;
154
- }
155
- parts.push(functionCallPart);
156
- } catch (e) {
157
- // 忽略解析错误
158
- }
159
- });
160
- }
161
-
162
- const response = {
163
- candidates: [{
164
- content: {
165
- parts: parts,
166
- role: "model"
167
- },
168
- finishReason: finishReason || "STOP",
169
- index: 0
170
- }]
171
- };
172
-
173
- if (usage) {
174
- response.usageMetadata = {
175
- promptTokenCount: usage.prompt_tokens,
176
- candidatesTokenCount: usage.completion_tokens,
177
- totalTokenCount: usage.total_tokens
178
- };
179
- }
180
-
181
- return response;
182
- };
183
-
184
  app.use(cors());
185
  app.use(express.json({ limit: config.security.maxRequestSize }));
186
 
@@ -194,9 +46,15 @@ app.use('/admin', adminRouter);
194
  // 使用统一错误处理中间件
195
  app.use(errorHandler);
196
 
 
197
  app.use((req, res, next) => {
198
- const ignorePaths = ['/images', '/favicon.ico', '/.well-known', '/sdapi/v1/options', '/sdapi/v1/samplers', '/sdapi/v1/schedulers', '/sdapi/v1/upscalers', '/sdapi/v1/latent-upscale-modes', '/sdapi/v1/sd-vae', '/sdapi/v1/sd-modules'];
199
- if (!ignorePaths.some(path => req.path.startsWith(path))) {
 
 
 
 
 
200
  const start = Date.now();
201
  res.on('finish', () => {
202
  logger.request(req.method, req.path, res.statusCode, Date.now() - start);
@@ -204,8 +62,11 @@ app.use((req, res, next) => {
204
  }
205
  next();
206
  });
 
 
207
  app.use('/sdapi/v1', sdRouter);
208
 
 
209
  app.use((req, res, next) => {
210
  if (req.path.startsWith('/v1/')) {
211
  const apiKey = config.security?.apiKey;
@@ -221,15 +82,18 @@ app.use((req, res, next) => {
221
  next();
222
  });
223
 
224
- app.get('/v1/models', async (req, res) => {
225
- try {
226
- const models = await getAvailableModels();
227
- res.json(models);
228
- } catch (error) {
229
- logger.error('获取模型列表失败:', error.message);
230
- res.status(500).json({ error: error.message });
231
- }
232
- });
 
 
 
233
 
234
  // 内存监控端点
235
  app.get('/v1/memory', (req, res) => {
@@ -242,7 +106,7 @@ app.get('/v1/memory', (req, res) => {
242
  arrayBuffers: usage.arrayBuffers,
243
  pressure: memoryManager.getCurrentPressure(),
244
  poolSizes: memoryManager.getPoolSizes(),
245
- chunkPoolSize: chunkPool.length
246
  });
247
  });
248
 
@@ -251,585 +115,7 @@ app.get('/health', (req, res) => {
251
  res.json({ status: 'ok', uptime: process.uptime() });
252
  });
253
 
254
-
255
-
256
- app.post('/v1/chat/completions', async (req, res) => {
257
- const { messages, model, stream = false, tools, ...params} = req.body;
258
- try {
259
- if (!messages) {
260
- return res.status(400).json({ error: 'messages is required' });
261
- }
262
- const token = await tokenManager.getToken();
263
- if (!token) {
264
- throw new Error('没有可用的token,请运行 npm run login 获取token');
265
- }
266
- const isImageModel = model.includes('-image');
267
- const requestBody = generateRequestBody(messages, model, params, tools, token);
268
- if (isImageModel) {
269
- prepareImageRequest(requestBody);
270
- }
271
- //console.log(JSON.stringify(requestBody,null,2))
272
-
273
- const { id, created } = createResponseMeta();
274
- const maxRetries = Number(config.retryTimes || 0);
275
- const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
276
-
277
- if (stream) {
278
- setStreamHeaders(res);
279
-
280
- // 启动心跳,防止 Cloudflare 超时断连
281
- const heartbeatTimer = createHeartbeat(res);
282
-
283
- try {
284
- if (isImageModel) {
285
- const { content, usage } = await with429Retry(
286
- () => generateAssistantResponseNoStream(requestBody, token),
287
- safeRetries,
288
- 'chat.stream.image '
289
- );
290
- writeStreamData(res, createStreamChunk(id, created, model, { content }));
291
- writeStreamData(res, { ...createStreamChunk(id, created, model, {}, 'stop'), usage });
292
- } else {
293
- let hasToolCall = false;
294
- let usageData = null;
295
-
296
- await with429Retry(
297
- () => generateAssistantResponse(requestBody, token, (data) => {
298
- if (data.type === 'usage') {
299
- usageData = data.usage;
300
- } else if (data.type === 'reasoning') {
301
- const delta = { reasoning_content: data.reasoning_content };
302
- if (data.thoughtSignature && config.passSignatureToClient) {
303
- delta.thoughtSignature = data.thoughtSignature;
304
- }
305
- writeStreamData(res, createStreamChunk(id, created, model, delta));
306
- } else if (data.type === 'tool_calls') {
307
- hasToolCall = true;
308
- // 根据配置决定是否透传工具调用中的签名
309
- const toolCallsWithIndex = data.tool_calls.map((toolCall, index) => {
310
- if (config.passSignatureToClient) {
311
- return { index, ...toolCall };
312
- } else {
313
- const { thoughtSignature, ...rest } = toolCall;
314
- return { index, ...rest };
315
- }
316
- });
317
- const delta = { tool_calls: toolCallsWithIndex };
318
- writeStreamData(res, createStreamChunk(id, created, model, delta));
319
- } else {
320
- const delta = { content: data.content };
321
- writeStreamData(res, createStreamChunk(id, created, model, delta));
322
- }
323
- }),
324
- safeRetries,
325
- 'chat.stream '
326
- );
327
-
328
- writeStreamData(res, { ...createStreamChunk(id, created, model, {}, hasToolCall ? 'tool_calls' : 'stop'), usage: usageData });
329
- }
330
-
331
- clearInterval(heartbeatTimer);
332
- endStream(res);
333
- } catch (error) {
334
- clearInterval(heartbeatTimer);
335
- throw error;
336
- }
337
- } else {
338
- // 非流式请求:设置较长超时,避免大模型响应超时
339
- req.setTimeout(0); // 禁用请求超时
340
- res.setTimeout(0); // 禁用响应超时
341
-
342
- const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
343
- () => generateAssistantResponseNoStream(requestBody, token),
344
- safeRetries,
345
- 'chat.no_stream '
346
- );
347
- // DeepSeek 格式:reasoning_content 在 content 之前
348
- const message = { role: 'assistant' };
349
- if (reasoningContent) message.reasoning_content = reasoningContent;
350
- if (reasoningSignature && config.passSignatureToClient) message.thoughtSignature = reasoningSignature;
351
- message.content = content;
352
- if (toolCalls.length > 0) {
353
- // 根据配置决定是否透传工具调用中的签名
354
- if (config.passSignatureToClient) {
355
- message.tool_calls = toolCalls;
356
- } else {
357
- message.tool_calls = toolCalls.map(({ thoughtSignature, ...rest }) => rest);
358
- }
359
- }
360
-
361
- // 使用预构建的响应对象,减少内存分配
362
- const response = {
363
- id,
364
- object: 'chat.completion',
365
- created,
366
- model,
367
- choices: [{
368
- index: 0,
369
- message,
370
- finish_reason: toolCalls.length > 0 ? 'tool_calls' : 'stop'
371
- }],
372
- usage
373
- };
374
-
375
- res.json(response);
376
- }
377
- } catch (error) {
378
- logger.error('生成响应失败:', error.message);
379
- if (res.headersSent) return;
380
- const statusCode = error.statusCode || error.status || 500;
381
- return res.status(statusCode).json(buildOpenAIErrorPayload(error, statusCode));
382
- }
383
- });
384
-
385
- // Gemini 模型列表格式转换
386
- const convertToGeminiModelList = (openaiModels) => {
387
- const models = openaiModels.data.map(model => ({
388
- name: `models/${model.id}`,
389
- version: "001",
390
- displayName: model.id,
391
- description: "Imported model",
392
- inputTokenLimit: 32768, // 默认值
393
- outputTokenLimit: 8192, // 默认值
394
- supportedGenerationMethods: ["generateContent", "countTokens"],
395
- temperature: 0.9,
396
- topP: 1.0,
397
- topK: 40
398
- }));
399
- return { models };
400
- };
401
-
402
- // Gemini API 路由
403
- app.get('/v1beta/models', async (req, res) => {
404
- try {
405
- const openaiModels = await getAvailableModels();
406
- const geminiModels = convertToGeminiModelList(openaiModels);
407
- res.json(geminiModels);
408
- } catch (error) {
409
- logger.error('获取模型列表失败:', error.message);
410
- res.status(500).json({ error: { code: 500, message: error.message, status: "INTERNAL" } });
411
- }
412
- });
413
-
414
- app.get('/v1beta/models/:model', async (req, res) => {
415
- try {
416
- const modelId = req.params.model.replace(/^models\//, '');
417
- const openaiModels = await getAvailableModels();
418
- const model = openaiModels.data.find(m => m.id === modelId);
419
-
420
- if (model) {
421
- const geminiModel = {
422
- name: `models/${model.id}`,
423
- version: "001",
424
- displayName: model.id,
425
- description: "Imported model",
426
- inputTokenLimit: 32768,
427
- outputTokenLimit: 8192,
428
- supportedGenerationMethods: ["generateContent", "countTokens"],
429
- temperature: 0.9,
430
- topP: 1.0,
431
- topK: 40
432
- };
433
- res.json(geminiModel);
434
- } else {
435
- res.status(404).json({ error: { code: 404, message: `Model ${modelId} not found`, status: "NOT_FOUND" } });
436
- }
437
- } catch (error) {
438
- logger.error('获取模型详情失败:', error.message);
439
- res.status(500).json({ error: { code: 500, message: error.message, status: "INTERNAL" } });
440
- }
441
- });
442
-
443
- const handleGeminiRequest = async (req, res, modelName, isStream) => {
444
- const maxRetries = Number(config.retryTimes || 0);
445
- const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
446
-
447
- try {
448
- const token = await tokenManager.getToken();
449
- if (!token) {
450
- throw new Error('没有可用的token,请运行 npm run login 获取token');
451
- }
452
-
453
- const requestBody = generateGeminiRequestBody(req.body, modelName, token);
454
-
455
- if (isStream) {
456
- setStreamHeaders(res);
457
- const heartbeatTimer = createHeartbeat(res);
458
-
459
- try {
460
- let usageData = null;
461
- let hasToolCall = false;
462
-
463
- await with429Retry(
464
- () => generateAssistantResponse(requestBody, token, (data) => {
465
- if (data.type === 'usage') {
466
- usageData = data.usage;
467
- } else if (data.type === 'reasoning') {
468
- // Gemini 思考内容
469
- const chunk = createGeminiResponse(null, data.reasoning_content, data.thoughtSignature, null, null, null);
470
- writeStreamData(res, chunk);
471
- } else if (data.type === 'tool_calls') {
472
- hasToolCall = true;
473
- // Gemini 工具调用
474
- const chunk = createGeminiResponse(null, null, null, data.tool_calls, null, null);
475
- writeStreamData(res, chunk);
476
- } else {
477
- // 普通文本
478
- const chunk = createGeminiResponse(data.content, null, null, null, null, null);
479
- writeStreamData(res, chunk);
480
- }
481
- }),
482
- safeRetries,
483
- 'gemini.stream '
484
- );
485
-
486
- // 发送结束块和 usage
487
- const finishReason = hasToolCall ? "STOP" : "STOP"; // Gemini 工具调用也是 STOP
488
- const finalChunk = createGeminiResponse(null, null, null, null, finishReason, usageData);
489
- writeStreamData(res, finalChunk);
490
-
491
- clearInterval(heartbeatTimer);
492
- endStream(res);
493
- } catch (error) {
494
- clearInterval(heartbeatTimer);
495
- if (!res.writableEnded) {
496
- const statusCode = error.statusCode || error.status || 500;
497
- writeStreamData(res, buildGeminiErrorPayload(error, statusCode));
498
- endStream(res);
499
- }
500
- logger.error('Gemini 流式请求失败:', error.message);
501
- return;
502
- }
503
- } else {
504
- // 非流式
505
- req.setTimeout(0);
506
- res.setTimeout(0);
507
-
508
- const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
509
- () => generateAssistantResponseNoStream(requestBody, token),
510
- safeRetries,
511
- 'gemini.no_stream '
512
- );
513
-
514
- const finishReason = toolCalls.length > 0 ? "STOP" : "STOP";
515
- const response = createGeminiResponse(content, reasoningContent, reasoningSignature, toolCalls, finishReason, usage);
516
- res.json(response);
517
- }
518
- } catch (error) {
519
- logger.error('Gemini 请求失败:', error.message);
520
- if (res.headersSent) return;
521
- const statusCode = error.statusCode || error.status || 500;
522
- res.status(statusCode).json(buildGeminiErrorPayload(error, statusCode));
523
- }
524
- };
525
-
526
- app.post('/v1beta/models/:model\\:streamGenerateContent', (req, res) => {
527
- const modelName = req.params.model;
528
- handleGeminiRequest(req, res, modelName, true);
529
- });
530
-
531
- app.post('/v1beta/models/:model\\:generateContent', (req, res) => {
532
- const modelName = req.params.model;
533
- const isStream = req.query.alt === 'sse';
534
- handleGeminiRequest(req, res, modelName, isStream);
535
- });
536
-
537
- // ==================== Claude API ====================
538
-
539
-
540
-
541
- // Claude 流式响应工具
542
- const createClaudeStreamEvent = (eventType, data) => {
543
- return `event: ${eventType}\ndata: ${JSON.stringify(data)}\n\n`;
544
- };
545
-
546
- // Claude 非流式响应构建
547
- const createClaudeResponse = (id, model, content, reasoning, reasoningSignature, toolCalls, stopReason, usage) => {
548
- const contentBlocks = [];
549
-
550
- // 思维链内容(如果有)- Claude 格式用 thinking 类型
551
- if (reasoning) {
552
- const thinkingBlock = {
553
- type: "thinking",
554
- thinking: reasoning
555
- };
556
- if (reasoningSignature && config.passSignatureToClient) {
557
- thinkingBlock.signature = reasoningSignature;
558
- }
559
- contentBlocks.push(thinkingBlock);
560
- }
561
-
562
- // 文本内容
563
- if (content) {
564
- contentBlocks.push({
565
- type: "text",
566
- text: content
567
- });
568
- }
569
-
570
- // 工具调用
571
- if (toolCalls && toolCalls.length > 0) {
572
- for (const tc of toolCalls) {
573
- try {
574
- const toolBlock = {
575
- type: "tool_use",
576
- id: tc.id,
577
- name: tc.function.name,
578
- input: JSON.parse(tc.function.arguments)
579
- };
580
- if (tc.thoughtSignature && config.passSignatureToClient) {
581
- toolBlock.signature = tc.thoughtSignature;
582
- }
583
- contentBlocks.push(toolBlock);
584
- } catch (e) {
585
- // 解析失败时传入空对象
586
- contentBlocks.push({
587
- type: "tool_use",
588
- id: tc.id,
589
- name: tc.function.name,
590
- input: {}
591
- });
592
- }
593
- }
594
- }
595
-
596
- return {
597
- id: id,
598
- type: "message",
599
- role: "assistant",
600
- content: contentBlocks,
601
- model: model,
602
- stop_reason: stopReason,
603
- stop_sequence: null,
604
- usage: usage ? {
605
- input_tokens: usage.prompt_tokens || 0,
606
- output_tokens: usage.completion_tokens || 0
607
- } : { input_tokens: 0, output_tokens: 0 }
608
- };
609
- };
610
-
611
- // Claude API 处理函数
612
- const handleClaudeRequest = async (req, res, isStream) => {
613
- const { messages, model, system, tools, ...rawParams } = req.body;
614
-
615
- try {
616
- if (!messages) {
617
- return res.status(400).json(buildClaudeErrorPayload({ message: 'messages is required' }, 400));
618
- }
619
-
620
- const token = await tokenManager.getToken();
621
- if (!token) {
622
- throw new Error('没有可用的token,请运行 npm run login 获取token');
623
- }
624
-
625
- // 使用统一参数规范化模块处理 Claude 格式参数
626
- const parameters = normalizeClaudeParameters(rawParams);
627
-
628
- const requestBody = generateClaudeRequestBody(messages, model, parameters, tools, system, token);
629
-
630
- const msgId = `msg_${Date.now()}`;
631
- const maxRetries = Number(config.retryTimes || 0);
632
- const safeRetries = maxRetries > 0 ? Math.floor(maxRetries) : 0;
633
-
634
- if (isStream) {
635
- setStreamHeaders(res);
636
- const heartbeatTimer = createHeartbeat(res);
637
-
638
- try {
639
- let contentIndex = 0;
640
- let usageData = null;
641
- let hasToolCall = false;
642
- let currentBlockType = null;
643
- let reasoningSent = false;
644
-
645
- // 发送 message_start
646
- res.write(createClaudeStreamEvent('message_start', {
647
- type: "message_start",
648
- message: {
649
- id: msgId,
650
- type: "message",
651
- role: "assistant",
652
- content: [],
653
- model: model,
654
- stop_reason: null,
655
- stop_sequence: null,
656
- usage: { input_tokens: 0, output_tokens: 0 }
657
- }
658
- }));
659
-
660
- await with429Retry(
661
- () => generateAssistantResponse(requestBody, token, (data) => {
662
- if (data.type === 'usage') {
663
- usageData = data.usage;
664
- } else if (data.type === 'reasoning') {
665
- // 思维链内容 - 使用 thinking 类型
666
- if (!reasoningSent) {
667
- // 开始思维块
668
- const contentBlock = { type: "thinking", thinking: "" };
669
- if (data.thoughtSignature && config.passSignatureToClient) {
670
- contentBlock.signature = data.thoughtSignature;
671
- }
672
- res.write(createClaudeStreamEvent('content_block_start', {
673
- type: "content_block_start",
674
- index: contentIndex,
675
- content_block: contentBlock
676
- }));
677
- currentBlockType = 'thinking';
678
- reasoningSent = true;
679
- }
680
- // 发送思维增量
681
- const delta = { type: "thinking_delta", thinking: data.reasoning_content || '' };
682
- if (data.thoughtSignature && config.passSignatureToClient) {
683
- delta.signature = data.thoughtSignature;
684
- }
685
- res.write(createClaudeStreamEvent('content_block_delta', {
686
- type: "content_block_delta",
687
- index: contentIndex,
688
- delta: delta
689
- }));
690
- } else if (data.type === 'tool_calls') {
691
- hasToolCall = true;
692
- // 结束之前的块(如果有)
693
- if (currentBlockType) {
694
- res.write(createClaudeStreamEvent('content_block_stop', {
695
- type: "content_block_stop",
696
- index: contentIndex
697
- }));
698
- contentIndex++;
699
- }
700
- // 工具调用
701
- for (const tc of data.tool_calls) {
702
- try {
703
- const inputObj = JSON.parse(tc.function.arguments);
704
- const toolContentBlock = { type: "tool_use", id: tc.id, name: tc.function.name, input: {} };
705
- if (tc.thoughtSignature && config.passSignatureToClient) {
706
- toolContentBlock.signature = tc.thoughtSignature;
707
- }
708
- res.write(createClaudeStreamEvent('content_block_start', {
709
- type: "content_block_start",
710
- index: contentIndex,
711
- content_block: toolContentBlock
712
- }));
713
- // 发送 input 增量
714
- res.write(createClaudeStreamEvent('content_block_delta', {
715
- type: "content_block_delta",
716
- index: contentIndex,
717
- delta: { type: "input_json_delta", partial_json: JSON.stringify(inputObj) }
718
- }));
719
- res.write(createClaudeStreamEvent('content_block_stop', {
720
- type: "content_block_stop",
721
- index: contentIndex
722
- }));
723
- contentIndex++;
724
- } catch (e) {
725
- // 解析失败,跳过
726
- }
727
- }
728
- currentBlockType = null;
729
- } else {
730
- // 普通文本内容
731
- if (currentBlockType === 'thinking') {
732
- // 结束思维块
733
- res.write(createClaudeStreamEvent('content_block_stop', {
734
- type: "content_block_stop",
735
- index: contentIndex
736
- }));
737
- contentIndex++;
738
- currentBlockType = null;
739
- }
740
- if (currentBlockType !== 'text') {
741
- // 开始文本块
742
- res.write(createClaudeStreamEvent('content_block_start', {
743
- type: "content_block_start",
744
- index: contentIndex,
745
- content_block: { type: "text", text: "" }
746
- }));
747
- currentBlockType = 'text';
748
- }
749
- // 发送文本增量
750
- res.write(createClaudeStreamEvent('content_block_delta', {
751
- type: "content_block_delta",
752
- index: contentIndex,
753
- delta: { type: "text_delta", text: data.content || '' }
754
- }));
755
- }
756
- }),
757
- safeRetries,
758
- 'claude.stream '
759
- );
760
-
761
- // 结束最后一个内容块
762
- if (currentBlockType) {
763
- res.write(createClaudeStreamEvent('content_block_stop', {
764
- type: "content_block_stop",
765
- index: contentIndex
766
- }));
767
- }
768
-
769
- // 发送 message_delta
770
- const stopReason = hasToolCall ? 'tool_use' : 'end_turn';
771
- res.write(createClaudeStreamEvent('message_delta', {
772
- type: "message_delta",
773
- delta: { stop_reason: stopReason, stop_sequence: null },
774
- usage: usageData ? { output_tokens: usageData.completion_tokens || 0 } : { output_tokens: 0 }
775
- }));
776
-
777
- // 发送 message_stop
778
- res.write(createClaudeStreamEvent('message_stop', {
779
- type: "message_stop"
780
- }));
781
-
782
- clearInterval(heartbeatTimer);
783
- res.end();
784
- } catch (error) {
785
- clearInterval(heartbeatTimer);
786
- if (!res.writableEnded) {
787
- const statusCode = error.statusCode || error.status || 500;
788
- res.write(createClaudeStreamEvent('error', buildClaudeErrorPayload(error, statusCode)));
789
- res.end();
790
- }
791
- logger.error('Claude 流式请求失败:', error.message);
792
- return;
793
- }
794
- } else {
795
- // 非流式请求
796
- req.setTimeout(0);
797
- res.setTimeout(0);
798
-
799
- const { content, reasoningContent, reasoningSignature, toolCalls, usage } = await with429Retry(
800
- () => generateAssistantResponseNoStream(requestBody, token),
801
- safeRetries,
802
- 'claude.no_stream '
803
- );
804
-
805
- const stopReason = toolCalls.length > 0 ? 'tool_use' : 'end_turn';
806
- const response = createClaudeResponse(
807
- msgId,
808
- model,
809
- content,
810
- reasoningContent,
811
- reasoningSignature,
812
- toolCalls,
813
- stopReason,
814
- usage
815
- );
816
-
817
- res.json(response);
818
- }
819
- } catch (error) {
820
- logger.error('Claude 请求失败:', error.message);
821
- if (res.headersSent) return;
822
- const statusCode = error.statusCode || error.status || 500;
823
- res.status(statusCode).json(buildClaudeErrorPayload(error, statusCode));
824
- }
825
- };
826
-
827
- // Claude Messages API 端点
828
- app.post('/v1/messages', (req, res) => {
829
- const isStream = req.body.stream === true;
830
- handleClaudeRequest(req, res, isStream);
831
- });
832
-
833
  const server = app.listen(config.server.port, config.server.host, () => {
834
  logger.info(`服务器已启动: ${config.server.host}:${config.server.port}`);
835
  });
@@ -847,6 +133,7 @@ server.on('error', (error) => {
847
  }
848
  });
849
 
 
850
  const shutdown = () => {
851
  logger.info('正在关闭服务器...');
852
 
@@ -859,7 +146,7 @@ const shutdown = () => {
859
  logger.info('已关闭子进程请求器');
860
 
861
  // 清理对象池
862
- chunkPool.length = 0;
863
  logger.info('已清理对象池');
864
 
865
  server.close(() => {
@@ -877,7 +164,7 @@ const shutdown = () => {
877
  process.on('SIGINT', shutdown);
878
  process.on('SIGTERM', shutdown);
879
 
880
- // 未捕获异常处理
881
  process.on('uncaughtException', (error) => {
882
  logger.error('未捕获异常:', error.message);
883
  // 不立即退出,让当前请求完成
 
1
+ /**
2
+ * 服务器主入口
3
+ * Express 应用配置、中间件、路由挂载、服务器启动和关闭
4
+ */
5
+
6
  import express from 'express';
7
  import cors from 'cors';
8
  import path from 'path';
9
+ import { closeRequester } from '../api/client.js';
 
 
10
  import logger from '../utils/logger.js';
11
  import config from '../config/config.js';
12
+ import memoryManager from '../utils/memoryManager.js';
13
+ import { getPublicDir, getRelativePath } from '../utils/paths.js';
14
+ import { MEMORY_CHECK_INTERVAL } from '../constants/index.js';
15
+ import { errorHandler } from '../utils/errors.js';
16
+ import { getChunkPoolSize, clearChunkPool } from './stream.js';
17
+
18
+ // 路由模块
19
  import adminRouter from '../routes/admin.js';
20
  import sdRouter from '../routes/sd.js';
21
+ import openaiRouter from '../routes/openai.js';
22
+ import geminiRouter from '../routes/gemini.js';
23
+ import claudeRouter from '../routes/claude.js';
 
24
 
25
  const publicDir = getPublicDir();
26
 
 
28
 
29
  const app = express();
30
 
31
+ // ==================== 内存管理 ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  memoryManager.setThreshold(config.server.memoryThreshold);
33
  memoryManager.start(MEMORY_CHECK_INTERVAL);
34
 
35
+ // ==================== 基础中间件 ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  app.use(cors());
37
  app.use(express.json({ limit: config.security.maxRequestSize }));
38
 
 
46
  // 使用统一错误处理中间件
47
  app.use(errorHandler);
48
 
49
+ // ==================== 请求日志中间件 ====================
50
  app.use((req, res, next) => {
51
+ const ignorePaths = [
52
+ '/images', '/favicon.ico', '/.well-known',
53
+ '/sdapi/v1/options', '/sdapi/v1/samplers', '/sdapi/v1/schedulers',
54
+ '/sdapi/v1/upscalers', '/sdapi/v1/latent-upscale-modes',
55
+ '/sdapi/v1/sd-vae', '/sdapi/v1/sd-modules'
56
+ ];
57
+ if (!ignorePaths.some(p => req.path.startsWith(p))) {
58
  const start = Date.now();
59
  res.on('finish', () => {
60
  logger.request(req.method, req.path, res.statusCode, Date.now() - start);
 
62
  }
63
  next();
64
  });
65
+
66
+ // SD API 路由
67
  app.use('/sdapi/v1', sdRouter);
68
 
69
+ // ==================== API Key 验证中间件 ====================
70
  app.use((req, res, next) => {
71
  if (req.path.startsWith('/v1/')) {
72
  const apiKey = config.security?.apiKey;
 
82
  next();
83
  });
84
 
85
+ // ==================== API 路由 ====================
86
+
87
+ // OpenAI 兼容 API
88
+ app.use('/v1', openaiRouter);
89
+
90
+ // Gemini 兼容 API
91
+ app.use('/v1beta', geminiRouter);
92
+
93
+ // Claude 兼容 API(/v1/messages 由 claudeRouter 处理)
94
+ app.use('/v1', claudeRouter);
95
+
96
+ // ==================== 系统端点 ====================
97
 
98
  // 内存监控端点
99
  app.get('/v1/memory', (req, res) => {
 
106
  arrayBuffers: usage.arrayBuffers,
107
  pressure: memoryManager.getCurrentPressure(),
108
  poolSizes: memoryManager.getPoolSizes(),
109
+ chunkPoolSize: getChunkPoolSize()
110
  });
111
  });
112
 
 
115
  res.json({ status: 'ok', uptime: process.uptime() });
116
  });
117
 
118
+ // ==================== 服务器启动 ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  const server = app.listen(config.server.port, config.server.host, () => {
120
  logger.info(`服务器已启动: ${config.server.host}:${config.server.port}`);
121
  });
 
133
  }
134
  });
135
 
136
+ // ==================== 优雅关闭 ====================
137
  const shutdown = () => {
138
  logger.info('正在关闭服务器...');
139
 
 
146
  logger.info('已关闭子进程请求器');
147
 
148
  // 清理对象池
149
+ clearChunkPool();
150
  logger.info('已清理对象池');
151
 
152
  server.close(() => {
 
164
  process.on('SIGINT', shutdown);
165
  process.on('SIGTERM', shutdown);
166
 
167
+ // ==================== 异常处理 ====================
168
  process.on('uncaughtException', (error) => {
169
  logger.error('未捕获异常:', error.message);
170
  // 不立即退出,让当前请求完成
src/server/stream.js ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * SSE 流式响应和心跳机制工具模块
3
+ * 提供统一的流式响应处理、心跳保活、429重试等功能
4
+ */
5
+
6
+ import config from '../config/config.js';
7
+ import logger from '../utils/logger.js';
8
+ import memoryManager, { registerMemoryPoolCleanup } from '../utils/memoryManager.js';
9
+ import { DEFAULT_HEARTBEAT_INTERVAL } from '../constants/index.js';
10
+
11
+ // ==================== 心跳机制(防止 CF 超时) ====================
12
+ const HEARTBEAT_INTERVAL = config.server.heartbeatInterval || DEFAULT_HEARTBEAT_INTERVAL;
13
+ const SSE_HEARTBEAT = Buffer.from(': heartbeat\n\n');
14
+
15
+ /**
16
+ * 创建心跳定时器
17
+ * @param {Response} res - Express响应对象
18
+ * @returns {NodeJS.Timeout} 定时器
19
+ */
20
+ export const createHeartbeat = (res) => {
21
+ const timer = setInterval(() => {
22
+ if (!res.writableEnded) {
23
+ res.write(SSE_HEARTBEAT);
24
+ } else {
25
+ clearInterval(timer);
26
+ }
27
+ }, HEARTBEAT_INTERVAL);
28
+
29
+ // 响应结束时清理
30
+ res.on('close', () => clearInterval(timer));
31
+ res.on('finish', () => clearInterval(timer));
32
+
33
+ return timer;
34
+ };
35
+
36
+ // ==================== 预编译的常量字符串(避免重复创建) ====================
37
+ const SSE_PREFIX = Buffer.from('data: ');
38
+ const SSE_SUFFIX = Buffer.from('\n\n');
39
+ const SSE_DONE = Buffer.from('data: [DONE]\n\n');
40
+
41
+ /**
42
+ * 生成响应元数据
43
+ * @returns {{id: string, created: number}}
44
+ */
45
+ export const createResponseMeta = () => ({
46
+ id: `chatcmpl-${Date.now()}`,
47
+ created: Math.floor(Date.now() / 1000)
48
+ });
49
+
50
+ /**
51
+ * 设置流式响应头
52
+ * @param {Response} res - Express响应对象
53
+ */
54
+ export const setStreamHeaders = (res) => {
55
+ res.setHeader('Content-Type', 'text/event-stream');
56
+ res.setHeader('Cache-Control', 'no-cache');
57
+ res.setHeader('Connection', 'keep-alive');
58
+ res.setHeader('X-Accel-Buffering', 'no'); // 禁用 nginx 缓冲
59
+ };
60
+
61
+ // ==================== 对象池(减少 GC) ====================
62
+ const chunkPool = [];
63
+
64
+ /**
65
+ * 从对象池获取 chunk 对象
66
+ * @returns {Object}
67
+ */
68
+ export const getChunkObject = () => chunkPool.pop() || { choices: [{ index: 0, delta: {}, finish_reason: null }] };
69
+
70
+ /**
71
+ * 释放 chunk 对象回对象池
72
+ * @param {Object} obj
73
+ */
74
+ export const releaseChunkObject = (obj) => {
75
+ const maxSize = memoryManager.getPoolSizes().chunk;
76
+ if (chunkPool.length < maxSize) chunkPool.push(obj);
77
+ };
78
+
79
+ // 注册内存清理回调
80
+ registerMemoryPoolCleanup(chunkPool, () => memoryManager.getPoolSizes().chunk);
81
+
82
+ /**
83
+ * 获取当前对象池大小(用于监控)
84
+ * @returns {number}
85
+ */
86
+ export const getChunkPoolSize = () => chunkPool.length;
87
+
88
+ /**
89
+ * 清空对象池
90
+ */
91
+ export const clearChunkPool = () => {
92
+ chunkPool.length = 0;
93
+ };
94
+
95
+ /**
96
+ * 零拷贝写入流式数据
97
+ * @param {Response} res - Express响应对象
98
+ * @param {Object} data - 要发送的数据
99
+ */
100
+ export const writeStreamData = (res, data) => {
101
+ const json = JSON.stringify(data);
102
+ res.write(SSE_PREFIX);
103
+ res.write(json);
104
+ res.write(SSE_SUFFIX);
105
+ };
106
+
107
+ /**
108
+ * 结束流式响应
109
+ * @param {Response} res - Express响应对象
110
+ */
111
+ export const endStream = (res) => {
112
+ if (res.writableEnded) return;
113
+ res.write(SSE_DONE);
114
+ res.end();
115
+ };
116
+
117
+ // ==================== 通用重试工具(处理 429) ====================
118
+
119
+ /**
120
+ * 带 429 重试的执行器
121
+ * @param {Function} fn - 要执行的异步函数,接收 attempt 参数
122
+ * @param {number} maxRetries - 最大重试次数
123
+ * @param {string} loggerPrefix - 日志前缀
124
+ * @returns {Promise<any>}
125
+ */
126
+ export const with429Retry = async (fn, maxRetries, loggerPrefix = '') => {
127
+ const retries = Number.isFinite(maxRetries) && maxRetries > 0 ? Math.floor(maxRetries) : 0;
128
+ let attempt = 0;
129
+ // 首次执行 + 最多 retries 次重试
130
+ while (true) {
131
+ try {
132
+ return await fn(attempt);
133
+ } catch (error) {
134
+ // 兼容多种错误格式:error.status, error.statusCode, error.response?.status
135
+ const status = Number(error.status || error.statusCode || error.response?.status);
136
+ if (status === 429 && attempt < retries) {
137
+ const nextAttempt = attempt + 1;
138
+ logger.warn(`${loggerPrefix}收到 429,正在进行第 ${nextAttempt} 次重试(共 ${retries} 次)`);
139
+ attempt = nextAttempt;
140
+ continue;
141
+ }
142
+ throw error;
143
+ }
144
+ }
145
+ };