feat: show model loading progress

2025-12-06 02:30:47 -08:00 · 2024-04-11 23:51:06 +08:00 · 2024-04-11 23:51:06 +08:00 · 1d588134d0
commit 1d588134d0
parent ee2ccc3e9b
5 changed files with 51 additions and 15 deletions
--- a/localization/locales/en/translation.json
+++ b/localization/locales/en/translation.json
@ -473,7 +473,8 @@
    "ModelNotExist": "Model Not Exist",
    "OpenThisPath": "Open This Path",
    "ModelNotExistDescription": "Try to load the model using this path you gave, but there is actually no model needed at this location",
-    "GenerationTimeout": "Generation Timeout, Terminated"
+    "GenerationTimeout": "Generation Timeout, Terminated",
+    "ModelLoadingProgress": "Model Loading"
  },
  "Description": "Description",
  "Tags": "Tags",
--- a/localization/locales/zh_CN/translation.json
+++ b/localization/locales/zh_CN/translation.json
@ -480,7 +480,8 @@
    "ModelNotExistDescription": "尝试使用你给的这个路径加载模型，但在这个位置其实没有所需要的模型",
    "OpenThisPath": "打开该位置",
    "GenerationTimeout": "模型生成超时，已中止。",
-    "ModalDisposed": "模型已卸载，生成中止，需要重新加载模型。"
+    "ModalDisposed": "模型已卸载，生成中止，需要重新加载模型。",
+    "ModelLoadingProgress": "模型加载进度"
  },
  "Help": {
    "Alternatives": "其它源",
--- a/src/services/languageModel/index.ts
+++ b/src/services/languageModel/index.ts
@ -131,11 +131,23 @@ export class LanguageModel implements ILanguageModelService {
          next: (result) => {
            const loggerCommonMeta = { id: result.id, function: 'LanguageModel.runLanguageModel$' };

-            if ('type' in result && result.type === 'result') {
-              const { token, id } = result;
-              // prevent the case that the result is from previous or next conversation, where its Observable is not properly closed.
-              if (id === conversationID) {
-                subscriber.next({ token, id });
+            if ('type' in result) {
+              switch (result.type) {
+                case 'progress': {
+                  const { percentage, id } = result;
+                  if (id === conversationID) {
+                    subscriber.next({ token: `${i18n.t('LanguageModel.ModelLoadingProgress')} ${(percentage * 100).toFixed(1)}%`, id });
+                  }
+                  break;
+                }
+                case 'result': {
+                  const { token, id } = result;
+                  // prevent the case that the result is from previous or next conversation, where its Observable is not properly closed.
+                  if (id === conversationID) {
+                    subscriber.next({ token, id });
+                  }
+                  break;
+                }
              }
            } else if ('level' in result) {
              logger.log(result.level, `${result.message}`, loggerCommonMeta);
@ -163,4 +175,13 @@ export class LanguageModel implements ILanguageModelService {
      }
    }
  }
+
+  public async unloadLanguageModel(runner: LanguageModelRunner): Promise<void> {
+    switch (runner) {
+      case LanguageModelRunner.llamaCpp: {
+        await this.llmWorker?.unloadLLama();
+        break;
+      }
+    }
+  }
 }
--- a/src/services/languageModel/interface.ts
+++ b/src/services/languageModel/interface.ts
@ -68,13 +68,24 @@ export interface IRunLLAmaOptions extends ILLMResultBase {
 * Run language model on a shared worker, and queue requests to the worker.
 */
 export interface ILanguageModelService {
+  /**
+   * Abort a chat response generation.
+   */
  abortLanguageModel(runner: LanguageModelRunner, id: string): Promise<void>;
+  /**
+   * Generate text based on options (including prompt).
+   */
  runLanguageModel$(runner: LanguageModelRunner.llamaCpp, options: IRunLLAmaOptions): Observable<ILLMResultPart>;
+  /**
+   * Unload model from memory. So it is possible to load another model, or to free up memory.
+   */
+  unloadLanguageModel(runner: LanguageModelRunner): Promise<void>;
 }
 export const LanguageModelServiceIPCDescriptor = {
  channel: LanguageModelChannel.name,
  properties: {
    abortLanguageModel: ProxyPropertyType.Function,
    runLanguageModel$: ProxyPropertyType.Function$,
+    unloadLanguageModel: ProxyPropertyType.Function,
  },
 };
--- a/src/services/languageModel/llmWorker/llamaCpp.ts
+++ b/src/services/languageModel/llmWorker/llamaCpp.ts
@ -1,3 +1,4 @@
+import debounce from 'lodash/debounce';
 import { getLlama, Llama, LlamaChatSession, LlamaContext, LlamaContextSequence, LlamaModel, LlamaModelOptions } from 'node-llama-cpp';
 import { Observable, type Subscriber } from 'rxjs';
 import { ILanguageModelWorkerResponse, IRunLLAmaOptions } from '../interface';
@ -25,14 +26,15 @@ export async function loadLLamaAndModal(
      },
    });
    subscriber.next({ message: 'prepared to load modal', ...loggerCommonMeta, meta: { ...loggerCommonMeta.meta, loadConfigOverwrite } });
+    const onLoadProgress = debounce((percentage: number) => {
+      subscriber.next({
+        type: 'progress',
+        percentage,
+        id: conversationID,
+      });
+    });
    const loadConfig: LlamaModelOptions = {
-      onLoadProgress: (loadProgress) => {
-        subscriber.next({
-          type: 'progress',
-          percentage: loadProgress,
-          id: conversationID,
-        });
-      },
+      onLoadProgress,
      ...loadConfigOverwrite,
    };
    modalInstance = await llamaInstance.loadModel(loadConfig);
@ -108,7 +110,7 @@ export function runLLama(
            if (modalInstance === undefined) {
              abortController.abort();
              runnerAbortControllers.delete(conversationID);
-              subscriber.next({ type: 'result', token: texts.timeout, id: conversationID });
+              subscriber.next({ type: 'result', token: texts.disposed, id: conversationID });
              subscriber.complete();
              return;
            }