refactor: update audio parameter handling in EdgeTTS and stubs

- Remove the unused `language`, `locale`, and `extra_parameters` parameters from the `EdgeTTS` function signature in `g4f/Provider/audio/EdgeTTS.py`. - Update voice selection logic to check for `"locale"` and `"language"` keys in the `audio` dictionary, defaulting to `cls.default_locale` when neither is provided, and modify the error message accordingly. - Refactor extraction of extra parameters by building a dict from the `audio` dictionary for keys `"rate"`, `"volume"`, and `"pitch"`. - In `g4f/api/stubs.py`, remove the try/except block for importing `Annotated` and import `Messages` from `..typing` instead. - Add an optional `audio: Optional[dict] = None` field to the `ImageGenerationConfig` model.
2025-12-06 02:30:41 -08:00 · 2025-04-19 03:51:37 +02:00 · 2025-04-19 03:51:37 +02:00 · 1296b3f64f
commit 1296b3f64f
parent e83282fc4b
2 changed files with 12 additions and 18 deletions
--- a/g4f/Provider/audio/EdgeTTS.py
+++ b/g4f/Provider/audio/EdgeTTS.py
@ -38,10 +38,7 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
        messages: Messages,
        proxy: str = None,
        prompt: str = None,
-        language: str = None,
-        locale: str = None,
        audio: dict = {"voice": None, "format": "mp3"},
-        extra_parameters: list[str] = ["rate", "volume", "pitch"],
        **kwargs
    ) -> AsyncResult:
        prompt = format_image_prompt(messages, prompt)
@ -50,17 +47,17 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
        voice = audio.get("voice", model)
        if not voice:
            voices = await VoicesManager.create()
-            if locale is None:
-                if language is None:
+            if "locale" in audio:
+                voices = voices.find(Locale=audio["locale"])
+            elif "language" in audio:
+                if "-" in audio["language"]:
+                    voices = voices.find(Locale=audio["language"])
+                else:
+                    voices = voices.find(Language=audio["language"])
+            else:
                voices = voices.find(Locale=cls.default_locale)
-                elif "-" in language:
-                    voices = voices.find(Locale=language)
-                else:
-                    voices = voices.find(Language=language)
-            else:
-                voices = voices.find(Locale=locale)
            if not voices:
-                raise ValueError(f"No voices found for language '{language}' and locale '{locale}'.")
+                raise ValueError(f"No voices found for language '{audio.get('language')}' and locale '{audio.get('locale')}'.")
            voice = random.choice(voices)["Name"]

        format = audio.get("format", "mp3")
@ -68,7 +65,7 @@ class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
        target_path = os.path.join(get_media_dir(), filename)
        ensure_media_dir()

-        extra_parameters = {param: kwargs[param] for param in extra_parameters if param in kwargs}
+        extra_parameters = {param: audio[param] for param in ["rate", "volume", "pitch"] if param in audio}
        communicate = edge_tts.Communicate(prompt, voice=voice, proxy=proxy, **extra_parameters)

        await communicate.save(target_path)
--- a/g4f/api/stubs.py
+++ b/g4f/api/stubs.py
@ -2,12 +2,8 @@ from __future__ import annotations

 from pydantic import BaseModel, Field, model_validator
 from typing import Union, Optional
-try:
-    from typing import Annotated
-except ImportError:
-    class Annotated:
-        pass
-from g4f.typing import Messages
+
+from ..typing import Messages

 class ChatCompletionsConfig(BaseModel):
    messages: Messages = Field(examples=[[{"role": "system", "content": ""}, {"role": "user", "content": ""}]])
@ -69,6 +65,7 @@ class ImageGenerationConfig(BaseModel):
    n: Optional[int] = None
    negative_prompt: Optional[str] = None
    resolution: Optional[str] = None
+    audio: Optional[dict] = None

    @model_validator(mode='before')
    def parse_size(cls, values):