gpt4free/g4f/api/stubs.py
hlohaus c3632984f7 feat: add audio speech generation endpoint and media handling refactor
- Added new `/v1/audio/speech` and `/api/{path_provider}/audio/speech` endpoints in `g4f/api/__init__.py` for generating speech from text
- Introduced `AudioSpeechConfig` model in `g4f/api/stubs.py` with fields for input, model, provider, voice, instructions, and response format
- Updated `PollinationsAI.py` to support `modalities` in `kwargs` when checking for audio
- Set default voice for audio models in `PollinationsAI.py` if not provided in `kwargs`
- Added debug print in `PollinationsAI.py` to log request data to text API endpoint
- Extended supported FastAPI response types in `g4f/api/__init__.py` to include `FileResponse` from `starlette.responses`
- Added `BackgroundTask` to clean up generated audio files after serving in `g4f/api/__init__.py`
- Modified `AnyProvider.py` to include `EdgeTTS`, `gTTS`, and `MarkItDown` as audio providers when `audio` is in `kwargs` or `modalities`
- Created `resolve_media` helper in `g4f/client/__init__.py` to standardize media handling for audio/image input
- Replaced manual media preprocessing in `Completions`, `AsyncCompletions`, and `Images` classes with `resolve_media`
- Added `/docs/README.md` with a link to the documentation site
2025-04-26 12:21:49 +02:00

129 lines
No EOL
3.9 KiB
Python

from __future__ import annotations
from pydantic import BaseModel, Field, model_validator
from typing import Union, Optional
from ..typing import Messages
class ChatCompletionsConfig(BaseModel):
messages: Messages = Field(examples=[[{"role": "system", "content": ""}, {"role": "user", "content": ""}]])
model: str = Field(default="")
provider: Optional[str] = None
stream: bool = False
image: Optional[str] = None
image_name: Optional[str] = None
images: Optional[list[tuple[str, str]]] = None
media: Optional[list[tuple[str, str]]] = None
modalities: Optional[list[str]] = ["text", "audio"]
temperature: Optional[float] = None
presence_penalty: Optional[float] = None
frequency_penalty: Optional[float] = None
top_p: Optional[float] = None
max_tokens: Optional[int] = None
stop: Union[list[str], str, None] = None
api_key: Optional[str] = None
api_base: str = None
web_search: Optional[bool] = None
proxy: Optional[str] = None
conversation_id: Optional[str] = None
conversation: Optional[dict] = None
return_conversation: Optional[bool] = None
history_disabled: Optional[bool] = None
timeout: Optional[int] = None
tool_calls: list = Field(default=[], examples=[[
{
"function": {
"arguments": {"query":"search query", "max_results":5, "max_words": 2500, "backend": "auto", "add_text": True, "timeout": 5},
"name": "search_tool"
},
"type": "function"
}
]])
tools: list = None
parallel_tool_calls: bool = None
tool_choice: Optional[str] = None
reasoning_effort: Optional[str] = None
logit_bias: Optional[dict] = None
modalities: Optional[list[str]] = None
audio: Optional[dict] = None
response_format: Optional[dict] = None
extra_data: Optional[dict] = None
class ImageGenerationConfig(BaseModel):
prompt: str
model: Optional[str] = None
provider: Optional[str] = None
response_format: Optional[str] = None
api_key: Optional[str] = None
proxy: Optional[str] = None
width: Optional[int] = None
height: Optional[int] = None
num_inference_steps: Optional[int] = None
seed: Optional[int] = None
guidance_scale: Optional[int] = None
aspect_ratio: Optional[str] = None
n: Optional[int] = None
negative_prompt: Optional[str] = None
resolution: Optional[str] = None
audio: Optional[dict] = None
@model_validator(mode='before')
def parse_size(cls, values):
if values.get('width') is not None and values.get('height') is not None:
return values
size = values.get('size')
if size:
try:
width, height = map(int, size.split('x'))
values['width'] = width
values['height'] = height
except (ValueError, AttributeError): pass # If the format is incorrect, we simply ignore it.
return values
class ProviderResponseModel(BaseModel):
id: str
object: str = "provider"
created: int
url: Optional[str]
label: Optional[str]
class ProviderResponseDetailModel(ProviderResponseModel):
models: list[str]
image_models: list[str]
vision_models: list[str]
params: list[str]
class ModelResponseModel(BaseModel):
id: str
object: str = "model"
created: int
owned_by: Optional[str]
class UploadResponseModel(BaseModel):
bucket_id: str
url: str
class ErrorResponseModel(BaseModel):
error: ErrorResponseMessageModel
model: Optional[str] = None
provider: Optional[str] = None
class ErrorResponseMessageModel(BaseModel):
message: str
class FileResponseModel(BaseModel):
filename: str
class TranscriptionResponseModel(BaseModel):
text: str
model: str
provider: str
class AudioSpeechConfig(BaseModel):
input: str
model: Optional[str] = None
provider: Optional[str] = None
voice: Optional[str] = None
instrcutions: str = "Speech this text in a natural way."
response_format: Optional[str] = None