feat: show model loading progress

This commit is contained in:
lin onetwo 2024-04-11 23:51:06 +08:00
parent ee2ccc3e9b
commit 1d588134d0
5 changed files with 51 additions and 15 deletions

View file

@ -473,7 +473,8 @@
"ModelNotExist": "Model Not Exist",
"OpenThisPath": "Open This Path",
"ModelNotExistDescription": "Try to load the model using this path you gave, but there is actually no model needed at this location",
"GenerationTimeout": "Generation Timeout, Terminated"
"GenerationTimeout": "Generation Timeout, Terminated",
"ModelLoadingProgress": "Model Loading"
},
"Description": "Description",
"Tags": "Tags",

View file

@ -480,7 +480,8 @@
"ModelNotExistDescription": "尝试使用你给的这个路径加载模型,但在这个位置其实没有所需要的模型",
"OpenThisPath": "打开该位置",
"GenerationTimeout": "模型生成超时,已中止。",
"ModalDisposed": "模型已卸载,生成中止,需要重新加载模型。"
"ModalDisposed": "模型已卸载,生成中止,需要重新加载模型。",
"ModelLoadingProgress": "模型加载进度"
},
"Help": {
"Alternatives": "其它源",

View file

@ -131,11 +131,23 @@ export class LanguageModel implements ILanguageModelService {
next: (result) => {
const loggerCommonMeta = { id: result.id, function: 'LanguageModel.runLanguageModel$' };
if ('type' in result && result.type === 'result') {
const { token, id } = result;
// prevent the case that the result is from previous or next conversation, where its Observable is not properly closed.
if (id === conversationID) {
subscriber.next({ token, id });
if ('type' in result) {
switch (result.type) {
case 'progress': {
const { percentage, id } = result;
if (id === conversationID) {
subscriber.next({ token: `${i18n.t('LanguageModel.ModelLoadingProgress')} ${(percentage * 100).toFixed(1)}%`, id });
}
break;
}
case 'result': {
const { token, id } = result;
// prevent the case that the result is from previous or next conversation, where its Observable is not properly closed.
if (id === conversationID) {
subscriber.next({ token, id });
}
break;
}
}
} else if ('level' in result) {
logger.log(result.level, `${result.message}`, loggerCommonMeta);
@ -163,4 +175,13 @@ export class LanguageModel implements ILanguageModelService {
}
}
}
public async unloadLanguageModel(runner: LanguageModelRunner): Promise<void> {
switch (runner) {
case LanguageModelRunner.llamaCpp: {
await this.llmWorker?.unloadLLama();
break;
}
}
}
}

View file

@ -68,13 +68,24 @@ export interface IRunLLAmaOptions extends ILLMResultBase {
* Run language model on a shared worker, and queue requests to the worker.
*/
export interface ILanguageModelService {
/**
* Abort a chat response generation.
*/
abortLanguageModel(runner: LanguageModelRunner, id: string): Promise<void>;
/**
* Generate text based on options (including prompt).
*/
runLanguageModel$(runner: LanguageModelRunner.llamaCpp, options: IRunLLAmaOptions): Observable<ILLMResultPart>;
/**
* Unload model from memory. So it is possible to load another model, or to free up memory.
*/
unloadLanguageModel(runner: LanguageModelRunner): Promise<void>;
}
export const LanguageModelServiceIPCDescriptor = {
channel: LanguageModelChannel.name,
properties: {
abortLanguageModel: ProxyPropertyType.Function,
runLanguageModel$: ProxyPropertyType.Function$,
unloadLanguageModel: ProxyPropertyType.Function,
},
};

View file

@ -1,3 +1,4 @@
import debounce from 'lodash/debounce';
import { getLlama, Llama, LlamaChatSession, LlamaContext, LlamaContextSequence, LlamaModel, LlamaModelOptions } from 'node-llama-cpp';
import { Observable, type Subscriber } from 'rxjs';
import { ILanguageModelWorkerResponse, IRunLLAmaOptions } from '../interface';
@ -25,14 +26,15 @@ export async function loadLLamaAndModal(
},
});
subscriber.next({ message: 'prepared to load modal', ...loggerCommonMeta, meta: { ...loggerCommonMeta.meta, loadConfigOverwrite } });
const onLoadProgress = debounce((percentage: number) => {
subscriber.next({
type: 'progress',
percentage,
id: conversationID,
});
});
const loadConfig: LlamaModelOptions = {
onLoadProgress: (loadProgress) => {
subscriber.next({
type: 'progress',
percentage: loadProgress,
id: conversationID,
});
},
onLoadProgress,
...loadConfigOverwrite,
};
modalInstance = await llamaInstance.loadModel(loadConfig);
@ -108,7 +110,7 @@ export function runLLama(
if (modalInstance === undefined) {
abortController.abort();
runnerAbortControllers.delete(conversationID);
subscriber.next({ type: 'result', token: texts.timeout, id: conversationID });
subscriber.next({ type: 'result', token: texts.disposed, id: conversationID });
subscriber.complete();
return;
}