refactor: update audio parameter handling in EdgeTTS and stubs

- Remove the unused `language`, `locale`, and `extra_parameters` parameters from the `EdgeTTS` function signature in `g4f/Provider/audio/EdgeTTS.py`.
- Update voice selection logic to check for `"locale"` and `"language"` keys in the `audio` dictionary, defaulting to `cls.default_locale` when neither is provided, and modify the error message accordingly.
- Refactor extraction of extra parameters by building a dict from the `audio` dictionary for keys `"rate"`, `"volume"`, and `"pitch"`.
- In `g4f/api/stubs.py`, remove the try/except block for importing `Annotated` and import `Messages` from `..typing` instead.
- Add an optional `audio: Optional[dict] = None` field to the `ImageGenerationConfig` model.
This commit is contained in:
hlohaus 2025-04-19 03:51:37 +02:00
parent e83282fc4b
commit 1296b3f64f
2 changed files with 12 additions and 18 deletions

View file

@ -38,10 +38,7 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
messages: Messages,
proxy: str = None,
prompt: str = None,
language: str = None,
locale: str = None,
audio: dict = {"voice": None, "format": "mp3"},
extra_parameters: list[str] = ["rate", "volume", "pitch"],
**kwargs
) -> AsyncResult:
prompt = format_image_prompt(messages, prompt)
@ -50,17 +47,17 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
voice = audio.get("voice", model)
if not voice:
voices = await VoicesManager.create()
if locale is None:
if language is None:
if "locale" in audio:
voices = voices.find(Locale=audio["locale"])
elif "language" in audio:
if "-" in audio["language"]:
voices = voices.find(Locale=audio["language"])
else:
voices = voices.find(Language=audio["language"])
else:
voices = voices.find(Locale=cls.default_locale)
elif "-" in language:
voices = voices.find(Locale=language)
else:
voices = voices.find(Language=language)
else:
voices = voices.find(Locale=locale)
if not voices:
raise ValueError(f"No voices found for language '{language}' and locale '{locale}'.")
raise ValueError(f"No voices found for language '{audio.get('language')}' and locale '{audio.get('locale')}'.")
voice = random.choice(voices)["Name"]
format = audio.get("format", "mp3")
@ -68,7 +65,7 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
target_path = os.path.join(get_media_dir(), filename)
ensure_media_dir()
extra_parameters = {param: kwargs[param] for param in extra_parameters if param in kwargs}
extra_parameters = {param: audio[param] for param in ["rate", "volume", "pitch"] if param in audio}
communicate = edge_tts.Communicate(prompt, voice=voice, proxy=proxy, **extra_parameters)
await communicate.save(target_path)

View file

@ -2,12 +2,8 @@ from __future__ import annotations
from pydantic import BaseModel, Field, model_validator
from typing import Union, Optional
try:
from typing import Annotated
except ImportError:
class Annotated:
pass
from g4f.typing import Messages
from ..typing import Messages
class ChatCompletionsConfig(BaseModel):
messages: Messages = Field(examples=[[{"role": "system", "content": ""}, {"role": "user", "content": ""}]])
@ -69,6 +65,7 @@ class ImageGenerationConfig(BaseModel):
n: Optional[int] = None
negative_prompt: Optional[str] = None
resolution: Optional[str] = None
audio: Optional[dict] = None
@model_validator(mode='before')
def parse_size(cls, values):