mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-05 18:20:35 -08:00
- **Docs**
- `docs/file.md`: update upload instructions to use inline `bucket` content parts instead of `tool_calls/bucket_tool`.
- `docs/media.md`: add asynchronous audio transcription example, detailed explanation, and notes.
- **New audio provider**
- Add `g4f/Provider/audio/EdgeTTS.py` implementing Edge Text‑to‑Speech (`EdgeTTS`).
- Create `g4f/Provider/audio/__init__.py` for provider export.
- Register provider in `g4f/Provider/__init__.py`.
- **Refactor image → media**
- Introduce `generated_media/` directory and `get_media_dir()` helper in `g4f/image/copy_images.py`; add `ensure_media_dir()`; keep back‑compat with legacy `generated_images/`.
- Replace `images_dir` references with `get_media_dir()` across:
- `g4f/api/__init__.py`
- `g4f/client/stubs.py`
- `g4f/gui/server/api.py`
- `g4f/gui/server/backend_api.py`
- `g4f/image/copy_images.py`
- Rename CLI/API config field/flag from `image_provider` to `media_provider` (`g4f/cli.py`, `g4f/api/__init__.py`, `g4f/client/__init__.py`).
- Extend `g4f/image/__init__.py`
- add `MEDIA_TYPE_MAP`, `get_extension()`
- revise `is_allowed_extension()`, `to_input_audio()` to support wider media types.
- **Provider adjustments**
- `g4f/Provider/ARTA.py`: swap `raise_error()` parameter order.
- `g4f/Provider/Cloudflare.py`: drop unused `MissingRequirementsError` import; move `get_args_from_nodriver()` inside try; handle `FileNotFoundError`.
- **Core enhancements**
- `g4f/providers/any_provider.py`: use `default_model` instead of literal `"default"`; broaden model/provider matching; update model list cleanup.
- `g4f/models.py`: safeguard provider count logic when model name is falsy.
- `g4f/providers/base_provider.py`: catch `json.JSONDecodeError` when reading auth cache, delete corrupted file.
- `g4f/providers/response.py`: allow `AudioResponse` to accept extra kwargs.
- **Misc**
- Remove obsolete `g4f/image.py`.
- `g4f/Provider/Cloudflare.py`, `g4f/client/types.py`: minor whitespace and import tidy‑ups.
75 lines
No EOL
2.6 KiB
Python
75 lines
No EOL
2.6 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import random
|
|
import asyncio
|
|
|
|
try:
|
|
import edge_tts
|
|
from edge_tts import VoicesManager
|
|
has_edge_tts = True
|
|
except ImportError:
|
|
has_edge_tts = False
|
|
|
|
from ...typing import AsyncResult, Messages
|
|
from ...providers.response import AudioResponse
|
|
from ...image.copy_images import get_filename, get_media_dir, ensure_media_dir
|
|
from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
|
from ..helper import format_image_prompt
|
|
|
|
class EdgeTTS(AsyncGeneratorProvider, ProviderModelMixin):
|
|
label = "Edge TTS"
|
|
working = has_edge_tts
|
|
default_model = "edge-tts"
|
|
default_locale = "en-US"
|
|
|
|
@classmethod
|
|
def get_models(cls) -> list[str]:
|
|
if not cls.models:
|
|
voices = asyncio.run(VoicesManager.create())
|
|
cls.default_model = voices.find(Locale=cls.default_locale)[0]["Name"]
|
|
cls.models = [voice["Name"] for voice in voices.voices]
|
|
return cls.models
|
|
|
|
@classmethod
|
|
async def create_async_generator(
|
|
cls,
|
|
model: str,
|
|
messages: Messages,
|
|
proxy: str = None,
|
|
prompt: str = None,
|
|
language: str = None,
|
|
locale: str = None,
|
|
audio: dict = {"voice": None, "format": "mp3"},
|
|
extra_parameters: list[str] = ["rate", "volume", "pitch"],
|
|
**kwargs
|
|
) -> AsyncResult:
|
|
prompt = format_image_prompt(messages, prompt)
|
|
if not prompt:
|
|
raise ValueError("Prompt is empty.")
|
|
voice = audio.get("voice", model)
|
|
if not voice:
|
|
voices = await VoicesManager.create()
|
|
if locale is None:
|
|
if language is None:
|
|
voices = voices.find(Locale=cls.default_locale)
|
|
elif "-" in language:
|
|
voices = voices.find(Locale=language)
|
|
else:
|
|
voices = voices.find(Language=language)
|
|
else:
|
|
voices = voices.find(Locale=locale)
|
|
if not voices:
|
|
raise ValueError(f"No voices found for language '{language}' and locale '{locale}'.")
|
|
voice = random.choice(voices)["Name"]
|
|
|
|
format = audio.get("format", "mp3")
|
|
filename = get_filename([cls.default_model], prompt, f".{format}", prompt)
|
|
target_path = os.path.join(get_media_dir(), filename)
|
|
ensure_media_dir()
|
|
|
|
extra_parameters = {param: kwargs[param] for param in extra_parameters if param in kwargs}
|
|
communicate = edge_tts.Communicate(prompt, voice=voice, proxy=proxy, **extra_parameters)
|
|
|
|
await communicate.save(target_path)
|
|
yield AudioResponse(f"/media/{filename}", voice=voice, prompt=prompt) |