mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-15 14:51:19 -08:00
Add project files
This commit is contained in:
parent
8321dca121
commit
90715e702b
17 changed files with 4301 additions and 20 deletions
105
projects/text_to_speech/worker.js
Normal file
105
projects/text_to_speech/worker.js
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
import { env, Tensor, AutoTokenizer, SpeechT5ForTextToSpeech, SpeechT5HifiGan } from '@xenova/transformers';
|
||||
import { encodeWAV } from './utils';
|
||||
|
||||
// Disable local model checks
|
||||
env.allowLocalModels = false;
|
||||
|
||||
// Use the Singleton pattern to enable lazy construction of the pipeline.
|
||||
class MyTextToSpeechPipeline {
|
||||
|
||||
static BASE_URL = 'https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/';
|
||||
|
||||
static model_id = 'Xenova/speecht5_tts';
|
||||
static vocoder_id = 'Xenova/speecht5_hifigan';
|
||||
|
||||
static tokenizer_instance = null;
|
||||
static model_instance = null;
|
||||
static vocoder_instance = null;
|
||||
|
||||
static async getInstance(progress_callback = null) {
|
||||
if (this.tokenizer_instance === null) {
|
||||
this.tokenizer = AutoTokenizer.from_pretrained(this.model_id, { progress_callback });
|
||||
}
|
||||
|
||||
if (this.model_instance === null) {
|
||||
this.model_instance = SpeechT5ForTextToSpeech.from_pretrained(this.model_id, {
|
||||
quantized: false,
|
||||
progress_callback,
|
||||
});
|
||||
}
|
||||
|
||||
if (this.vocoder_instance === null) {
|
||||
this.vocoder_instance = SpeechT5HifiGan.from_pretrained(this.vocoder_id, {
|
||||
quantized: false,
|
||||
progress_callback,
|
||||
});
|
||||
}
|
||||
|
||||
return new Promise(async (resolve, reject) => {
|
||||
const result = await Promise.all([
|
||||
this.tokenizer,
|
||||
this.model_instance,
|
||||
this.vocoder_instance,
|
||||
]);
|
||||
self.postMessage({
|
||||
status: 'ready',
|
||||
});
|
||||
resolve(result);
|
||||
});
|
||||
}
|
||||
|
||||
static async getSpeakerEmbeddings(speaker_id) {
|
||||
// e.g., `cmu_us_awb_arctic-wav-arctic_a0001`
|
||||
const speaker_embeddings_url = `${this.BASE_URL}${speaker_id}.bin`;
|
||||
const speaker_embeddings = new Tensor(
|
||||
'float32',
|
||||
new Float32Array(await (await fetch(speaker_embeddings_url)).arrayBuffer()),
|
||||
[1, 512]
|
||||
)
|
||||
return speaker_embeddings;
|
||||
}
|
||||
}
|
||||
|
||||
// Mapping of cached speaker embeddings
|
||||
const speaker_embeddings_cache = new Map();
|
||||
|
||||
// Listen for messages from the main thread
|
||||
self.addEventListener('message', async (event) => {
|
||||
// Load the pipeline
|
||||
const [tokenizer, model, vocoder] = await MyTextToSpeechPipeline.getInstance(x => {
|
||||
// We also add a progress callback so that we can track model loading.
|
||||
self.postMessage(x);
|
||||
});
|
||||
|
||||
// Tokenize the input
|
||||
const { input_ids } = tokenizer(event.data.text);
|
||||
|
||||
// Load the speaker embeddings
|
||||
let speaker_embeddings = speaker_embeddings_cache.get(event.data.speaker_id);
|
||||
if (speaker_embeddings === undefined) {
|
||||
speaker_embeddings = await MyTextToSpeechPipeline.getSpeakerEmbeddings(event.data.speaker_id);
|
||||
speaker_embeddings_cache.set(event.data.speaker_id, speaker_embeddings);
|
||||
}
|
||||
|
||||
// Generate the waveform
|
||||
let response;
|
||||
try {
|
||||
response = await model.generate_speech(input_ids, speaker_embeddings, { vocoder });
|
||||
} catch(e) {
|
||||
self.postMessage({
|
||||
status: 'error',
|
||||
exception: e,
|
||||
});
|
||||
throw e;
|
||||
}
|
||||
const { waveform } = response;
|
||||
|
||||
// Encode the waveform as a WAV file
|
||||
const wav = encodeWAV(waveform.data);
|
||||
|
||||
// Send the output back to the main thread
|
||||
self.postMessage({
|
||||
status: 'complete',
|
||||
output: new Blob([wav], { type: 'audio/wav' }),
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue