// index.mjs
import { LLM } from 'llama-node';
import { LLamaCpp } from 'llama-node/dist/llm/llama-cpp.js';
// import path from 'path';

const model = '/Users/linonetwo/Downloads/ggml-vic7b-q5_1.bin'; /* path.resolve(process.cwd(), '../ggml-vic7b-q5_1.bin') */
const llama = new LLM(LLamaCpp);
const config = {
  modelPath: model,
  enableLogging: true,
  nCtx: 1024,
  seed: 0,
  f16Kv: false,
  logitsAll: false,
  vocabOnly: false,
  useMlock: false,
  embedding: false,
  useMmap: true,
  nGpuLayers: 0,
};

const template = `How to write a science fiction?`;
const prompt = `A chat between a user and a useful assistant.
USER: ${template}
ASSISTANT:`;

const run = async () => {
  await llama.load(config);

  await llama.createCompletion(
    {
      nThreads: 4,
      nTokPredict: 2048,
      topK: 40,
      topP: 0.1,
      temp: 0.2,
      // repeatPenalty: 1,
      prompt,
    },
    (response) => {
      process.stdout.write(response.token);
    },
  );
};

void run();