mirror of
https://github.com/tiddly-gittly/TidGi-Desktop.git
synced 2026-03-10 08:50:20 -07:00
486 lines
16 KiB
TypeScript
486 lines
16 KiB
TypeScript
import { createServer, IncomingMessage, Server, ServerResponse } from 'http';
|
|
import { AddressInfo } from 'net';
|
|
|
|
interface ChatMessage {
|
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
content: string | null;
|
|
}
|
|
|
|
interface ChatRequest {
|
|
messages: ChatMessage[];
|
|
model?: string;
|
|
stream?: boolean;
|
|
}
|
|
|
|
interface Rule {
|
|
response: string;
|
|
stream?: boolean;
|
|
embedding?: number[]; // Optional: predefined embedding vector for this response
|
|
}
|
|
|
|
export class MockOpenAIServer {
|
|
private server: Server | null = null;
|
|
public port = 0;
|
|
public baseUrl = '';
|
|
private chatRules: Rule[] = []; // Chat completion rules (with response)
|
|
private embeddingRules: Array<{ embedding: number[] }> = []; // Embedding-only rules
|
|
private callCount = 0; // Track total API calls (for chat completions)
|
|
private embeddingCallCount = 0; // Track embedding API calls separately
|
|
private lastRequest: ChatRequest | null = null; // Store the most recent request
|
|
private allRequests: ChatRequest[] = []; // Store all requests for debugging
|
|
|
|
constructor(private fixedPort?: number, rules?: Rule[]) {
|
|
if (rules && Array.isArray(rules)) {
|
|
// Separate rules: ones with response go to chatRules, embedding-only go to embeddingRules
|
|
// Note: Rules with both response and embedding will add to both collections
|
|
for (const rule of rules) {
|
|
if (rule.response) {
|
|
this.chatRules.push(rule);
|
|
}
|
|
// Separately handle embedding - a rule can have both response and embedding
|
|
if (rule.embedding) {
|
|
this.embeddingRules.push({ embedding: rule.embedding });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update rules at runtime. This allows tests to reuse a running server
|
|
* and swap the response rules without creating a new server instance.
|
|
*/
|
|
public setRules(rules: Rule[]): void {
|
|
if (Array.isArray(rules)) {
|
|
this.chatRules = [];
|
|
this.embeddingRules = [];
|
|
// Note: Rules with both response and embedding will add to both collections
|
|
for (const rule of rules) {
|
|
if (rule.response) {
|
|
this.chatRules.push(rule);
|
|
}
|
|
// Separately handle embedding - a rule can have both response and embedding
|
|
if (rule.embedding) {
|
|
this.embeddingRules.push({ embedding: rule.embedding });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add new rules to the existing rules at runtime.
|
|
* This allows tests to append responses without replacing existing ones.
|
|
*/
|
|
public addRules(rules: Rule[]): void {
|
|
if (Array.isArray(rules)) {
|
|
// Note: A rule can have both response and embedding - handle them independently
|
|
for (const rule of rules) {
|
|
if (rule.response) {
|
|
this.chatRules.push(rule);
|
|
}
|
|
// Removed 'else' - a rule can have both response and embedding
|
|
if (rule.embedding) {
|
|
this.embeddingRules.push({ embedding: rule.embedding });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the most recent request received by the server
|
|
*/
|
|
public getLastRequest(): ChatRequest | null {
|
|
return this.lastRequest;
|
|
}
|
|
|
|
/**
|
|
* Get all requests received by the server (for debugging)
|
|
*/
|
|
public getAllRequests(): ChatRequest[] {
|
|
return this.allRequests;
|
|
}
|
|
|
|
/**
|
|
* Clear all stored requests
|
|
*/
|
|
public clearAllRequests(): void {
|
|
this.lastRequest = null;
|
|
this.allRequests = [];
|
|
this.callCount = 0;
|
|
this.embeddingCallCount = 0;
|
|
}
|
|
|
|
async start(): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
this.server = createServer((request: IncomingMessage, response: ServerResponse) => {
|
|
response.setHeader('Access-Control-Allow-Origin', '*');
|
|
response.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
|
response.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
|
|
|
if (request.method === 'OPTIONS') {
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(200);
|
|
response.end();
|
|
}
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const url = new URL(request.url || '', `http://127.0.0.1:${this.port}`);
|
|
|
|
if (request.method === 'GET' && url.pathname === '/health') {
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(200, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ status: 'ok' }));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (request.method === 'POST' && url.pathname === '/v1/chat/completions') {
|
|
void this.handleChatCompletions(request, response);
|
|
return;
|
|
}
|
|
|
|
if (request.method === 'POST' && url.pathname === '/v1/embeddings') {
|
|
void this.handleEmbeddings(request, response);
|
|
return;
|
|
}
|
|
|
|
if (request.method === 'POST' && url.pathname === '/reset') {
|
|
// Reset call count for testing
|
|
this.callCount = 0;
|
|
this.embeddingCallCount = 0;
|
|
this.lastRequest = null;
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(200, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ success: true, message: 'Call count reset' }));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (request.method === 'GET' && url.pathname === '/last-request') {
|
|
// Return the last received request for testing
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(200, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ lastRequest: this.lastRequest }));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(404, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ error: 'Not found' }));
|
|
}
|
|
} catch {
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(400, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ error: 'Bad request' }));
|
|
}
|
|
}
|
|
});
|
|
|
|
this.server.on('error', (error) => {
|
|
reject(new Error(String(error)));
|
|
});
|
|
|
|
this.server.on('listening', () => {
|
|
const addr = this.server!.address() as AddressInfo;
|
|
this.port = addr.port;
|
|
this.baseUrl = `http://127.0.0.1:${this.port}`;
|
|
resolve();
|
|
});
|
|
|
|
try {
|
|
this.server.listen(this.fixedPort || 0, '127.0.0.1');
|
|
} catch (error) {
|
|
reject(new Error(String(error)));
|
|
}
|
|
});
|
|
}
|
|
|
|
async stop(): Promise<void> {
|
|
if (!this.server) return;
|
|
return new Promise((resolve) => {
|
|
// Force close all connections before closing server
|
|
this.server!.closeAllConnections?.();
|
|
|
|
this.server!.close(() => {
|
|
this.server = null;
|
|
resolve();
|
|
});
|
|
|
|
// Fallback: force resolve after timeout to prevent hanging
|
|
setTimeout(() => {
|
|
if (this.server) {
|
|
this.server = null;
|
|
resolve();
|
|
}
|
|
}, 1000);
|
|
});
|
|
}
|
|
|
|
private async handleEmbeddings(request: IncomingMessage, response: ServerResponse) {
|
|
let body = '';
|
|
request.on('data', (chunk: Buffer) => {
|
|
body += chunk.toString();
|
|
});
|
|
request.on('end', () => {
|
|
try {
|
|
const embeddingRequest = JSON.parse(body) as { input: string | string[]; model?: string };
|
|
|
|
const inputs = Array.isArray(embeddingRequest.input) ? embeddingRequest.input : [embeddingRequest.input];
|
|
|
|
// Use embeddingCallCount to get predefined embeddings from embeddingRules
|
|
const embeddings = inputs.map((input) => {
|
|
this.embeddingCallCount++;
|
|
const ruleIndex = this.embeddingCallCount - 1;
|
|
const rule = this.embeddingRules[ruleIndex];
|
|
|
|
// Use predefined embedding from embeddingRules (generated by semantic tag in agent.ts)
|
|
if (rule?.embedding && Array.isArray(rule.embedding)) {
|
|
return rule.embedding;
|
|
}
|
|
|
|
// For UI-generated embeddings (not from agent chat), return a simple default vector
|
|
// This allows UI operations (like clicking "Generate" button in preferences) to work
|
|
const simpleVector: number[] = new Array<number>(384).fill(0);
|
|
// Add some variation based on input length to make it somewhat unique
|
|
simpleVector[0] = (input.length % 100) / 100;
|
|
return simpleVector;
|
|
});
|
|
|
|
const resp = {
|
|
object: 'list',
|
|
data: embeddings.map((embedding, index) => ({
|
|
object: 'embedding',
|
|
embedding,
|
|
index,
|
|
})),
|
|
model: embeddingRequest.model || 'text-embedding-ada-002',
|
|
usage: {
|
|
prompt_tokens: inputs.reduce((sum, input) => sum + input.length, 0),
|
|
total_tokens: inputs.reduce((sum, input) => sum + input.length, 0),
|
|
},
|
|
};
|
|
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.setHeader('Content-Type', 'application/json');
|
|
response.writeHead(200);
|
|
response.end(JSON.stringify(resp));
|
|
}
|
|
} catch {
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(400, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
private async handleChatCompletions(request: IncomingMessage, response: ServerResponse) {
|
|
let body = '';
|
|
request.on('data', (chunk: Buffer) => {
|
|
body += chunk.toString();
|
|
});
|
|
request.on('end', () => {
|
|
try {
|
|
// Parse request and handle each request based on provided rules
|
|
const chatRequest = JSON.parse(body) as ChatRequest;
|
|
|
|
// Store the request for testing validation
|
|
this.lastRequest = chatRequest;
|
|
this.allRequests.push(chatRequest);
|
|
|
|
if (chatRequest.stream) {
|
|
this.handleStreamingChatCompletions(chatRequest, response);
|
|
return;
|
|
}
|
|
|
|
const resp = this.generateChatCompletionResponse(chatRequest);
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.setHeader('Content-Type', 'application/json');
|
|
response.writeHead(200);
|
|
response.end(JSON.stringify(resp));
|
|
}
|
|
} catch {
|
|
if (!response.writableEnded && !response.headersSent) {
|
|
response.writeHead(400, { 'Content-Type': 'application/json' });
|
|
response.end(JSON.stringify({ error: 'Invalid JSON' }));
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
private generateChatCompletionResponse(chatRequest: ChatRequest) {
|
|
const modelName = chatRequest.model || 'test-model';
|
|
// Increment call count for each API request
|
|
this.callCount++;
|
|
|
|
// Use call count to determine which response to return (1-indexed)
|
|
const ruleIndex = this.callCount - 1;
|
|
const responseRule = this.chatRules[ruleIndex];
|
|
if (!responseRule) {
|
|
return {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [],
|
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
};
|
|
}
|
|
|
|
return {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
message: {
|
|
role: 'assistant',
|
|
content: responseRule.response,
|
|
},
|
|
finish_reason: 'stop',
|
|
},
|
|
],
|
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
};
|
|
}
|
|
|
|
private handleStreamingChatCompletions(chatRequest: ChatRequest, response: ServerResponse) {
|
|
if (response.writableEnded) return;
|
|
|
|
const modelName = chatRequest.model || 'test-model';
|
|
// Increment call count for streaming requests too
|
|
this.callCount++;
|
|
// Use call count to determine which response to return (1-indexed)
|
|
const ruleIndex = this.callCount - 1;
|
|
const responseRule = this.chatRules[ruleIndex];
|
|
|
|
// If matched: honor client's stream request. If client requests stream, always stream the matched.response.
|
|
if (responseRule && chatRequest.stream) {
|
|
response.setHeader('Content-Type', 'text/plain; charset=utf-8');
|
|
response.setHeader('Cache-Control', 'no-cache');
|
|
response.setHeader('Connection', 'keep-alive');
|
|
response.writeHead(200);
|
|
|
|
// Send first chunk with role
|
|
const roleChunk = {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion.chunk',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
delta: { role: 'assistant' },
|
|
finish_reason: null,
|
|
},
|
|
],
|
|
};
|
|
|
|
// Send content chunks. Support multiple chunks separated by '<stream_split>'
|
|
const rawResponse = typeof responseRule.response === 'string'
|
|
? responseRule.response
|
|
: String(responseRule.response);
|
|
const chunks = rawResponse.split('<stream_split>');
|
|
|
|
const roleLine = `data: ${JSON.stringify(roleChunk)}\n\n`;
|
|
response.write(roleLine);
|
|
|
|
// Helper to write a chunk line
|
|
const writeChunkLine = (content: string) => {
|
|
const contentChunk = {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion.chunk',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
delta: { content },
|
|
finish_reason: null,
|
|
},
|
|
],
|
|
};
|
|
const contentLine = `data: ${JSON.stringify(contentChunk)}\n\n`;
|
|
response.write(contentLine);
|
|
};
|
|
|
|
// Stream each chunk with a delay to simulate streaming.
|
|
// When the rule has stream=false, the client still sends stream=true (Vercel AI SDK always streams),
|
|
// so we must reply in SSE format but skip the long delay to simulate an instant non-streaming response.
|
|
const chunkDelay = responseRule.stream === false ? 0 : 5000;
|
|
void (async () => {
|
|
for (let index = 0; index < chunks.length; index++) {
|
|
// If client closed connection, stop streaming
|
|
if (response.writableEnded) return;
|
|
writeChunkLine(chunks[index]);
|
|
// Delay between chunks must be long enough for cancel-stream tests to click cancel before streaming finishes.
|
|
// 5 seconds keeps 4-chunk streams alive ~15s, well within PLAYWRIGHT_TIMEOUT (25s).
|
|
if (chunkDelay > 0) {
|
|
await new Promise(resolve => setTimeout(resolve, chunkDelay));
|
|
}
|
|
}
|
|
|
|
// Send final empty chunk with finish_reason
|
|
if (!response.writableEnded) {
|
|
const finalChunk = {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion.chunk',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
delta: {},
|
|
finish_reason: 'stop',
|
|
},
|
|
],
|
|
};
|
|
const finalLine = `data: ${JSON.stringify(finalChunk)}\n\n`;
|
|
response.write(finalLine);
|
|
response.write('data: [DONE]\n\n');
|
|
response.end();
|
|
}
|
|
})();
|
|
return;
|
|
}
|
|
|
|
// If matched but client did not request stream, return a regular JSON chat completion
|
|
if (responseRule && !chatRequest.stream) {
|
|
const resp = {
|
|
id: 'chatcmpl-test-' + Date.now().toString(),
|
|
object: 'chat.completion',
|
|
created: Math.floor(Date.now() / 1000),
|
|
model: modelName,
|
|
choices: [
|
|
{
|
|
index: 0,
|
|
message: {
|
|
role: 'assistant',
|
|
content: responseRule.response,
|
|
},
|
|
finish_reason: 'stop',
|
|
},
|
|
],
|
|
usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
};
|
|
if (!response.writableEnded) {
|
|
response.setHeader('Content-Type', 'application/json');
|
|
response.writeHead(200);
|
|
response.end(JSON.stringify(resp));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Default for unmatched stream requests: send only DONE so client can close stream without producing assistant content
|
|
response.setHeader('Content-Type', 'text/plain; charset=utf-8');
|
|
response.setHeader('Cache-Control', 'no-cache');
|
|
response.setHeader('Connection', 'keep-alive');
|
|
response.writeHead(200);
|
|
response.write('data: [DONE]\n\n');
|
|
response.end();
|
|
}
|
|
}
|