mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
- Added new `/v1/audio/speech` and `/api/{path_provider}/audio/speech` endpoints in `g4f/api/__init__.py` for generating speech from text
- Introduced `AudioSpeechConfig` model in `g4f/api/stubs.py` with fields for input, model, provider, voice, instructions, and response format
- Updated `PollinationsAI.py` to support `modalities` in `kwargs` when checking for audio
- Set default voice for audio models in `PollinationsAI.py` if not provided in `kwargs`
- Added debug print in `PollinationsAI.py` to log request data to text API endpoint
- Extended supported FastAPI response types in `g4f/api/__init__.py` to include `FileResponse` from `starlette.responses`
- Added `BackgroundTask` to clean up generated audio files after serving in `g4f/api/__init__.py`
- Modified `AnyProvider.py` to include `EdgeTTS`, `gTTS`, and `MarkItDown` as audio providers when `audio` is in `kwargs` or `modalities`
- Created `resolve_media` helper in `g4f/client/__init__.py` to standardize media handling for audio/image input
- Replaced manual media preprocessing in `Completions`, `AsyncCompletions`, and `Images` classes with `resolve_media`
- Added `/docs/README.md` with a link to the documentation site
129 lines
No EOL
3.9 KiB
Python
129 lines
No EOL
3.9 KiB
Python
from __future__ import annotations
|
|
|
|
from pydantic import BaseModel, Field, model_validator
|
|
from typing import Union, Optional
|
|
|
|
from ..typing import Messages
|
|
|
|
class ChatCompletionsConfig(BaseModel):
|
|
messages: Messages = Field(examples=[[{"role": "system", "content": ""}, {"role": "user", "content": ""}]])
|
|
model: str = Field(default="")
|
|
provider: Optional[str] = None
|
|
stream: bool = False
|
|
image: Optional[str] = None
|
|
image_name: Optional[str] = None
|
|
images: Optional[list[tuple[str, str]]] = None
|
|
media: Optional[list[tuple[str, str]]] = None
|
|
modalities: Optional[list[str]] = ["text", "audio"]
|
|
temperature: Optional[float] = None
|
|
presence_penalty: Optional[float] = None
|
|
frequency_penalty: Optional[float] = None
|
|
top_p: Optional[float] = None
|
|
max_tokens: Optional[int] = None
|
|
stop: Union[list[str], str, None] = None
|
|
api_key: Optional[str] = None
|
|
api_base: str = None
|
|
web_search: Optional[bool] = None
|
|
proxy: Optional[str] = None
|
|
conversation_id: Optional[str] = None
|
|
conversation: Optional[dict] = None
|
|
return_conversation: Optional[bool] = None
|
|
history_disabled: Optional[bool] = None
|
|
timeout: Optional[int] = None
|
|
tool_calls: list = Field(default=[], examples=[[
|
|
{
|
|
"function": {
|
|
"arguments": {"query":"search query", "max_results":5, "max_words": 2500, "backend": "auto", "add_text": True, "timeout": 5},
|
|
"name": "search_tool"
|
|
},
|
|
"type": "function"
|
|
}
|
|
]])
|
|
tools: list = None
|
|
parallel_tool_calls: bool = None
|
|
tool_choice: Optional[str] = None
|
|
reasoning_effort: Optional[str] = None
|
|
logit_bias: Optional[dict] = None
|
|
modalities: Optional[list[str]] = None
|
|
audio: Optional[dict] = None
|
|
response_format: Optional[dict] = None
|
|
extra_data: Optional[dict] = None
|
|
|
|
class ImageGenerationConfig(BaseModel):
|
|
prompt: str
|
|
model: Optional[str] = None
|
|
provider: Optional[str] = None
|
|
response_format: Optional[str] = None
|
|
api_key: Optional[str] = None
|
|
proxy: Optional[str] = None
|
|
width: Optional[int] = None
|
|
height: Optional[int] = None
|
|
num_inference_steps: Optional[int] = None
|
|
seed: Optional[int] = None
|
|
guidance_scale: Optional[int] = None
|
|
aspect_ratio: Optional[str] = None
|
|
n: Optional[int] = None
|
|
negative_prompt: Optional[str] = None
|
|
resolution: Optional[str] = None
|
|
audio: Optional[dict] = None
|
|
|
|
@model_validator(mode='before')
|
|
def parse_size(cls, values):
|
|
if values.get('width') is not None and values.get('height') is not None:
|
|
return values
|
|
|
|
size = values.get('size')
|
|
if size:
|
|
try:
|
|
width, height = map(int, size.split('x'))
|
|
values['width'] = width
|
|
values['height'] = height
|
|
except (ValueError, AttributeError): pass # If the format is incorrect, we simply ignore it.
|
|
return values
|
|
|
|
class ProviderResponseModel(BaseModel):
|
|
id: str
|
|
object: str = "provider"
|
|
created: int
|
|
url: Optional[str]
|
|
label: Optional[str]
|
|
|
|
class ProviderResponseDetailModel(ProviderResponseModel):
|
|
models: list[str]
|
|
image_models: list[str]
|
|
vision_models: list[str]
|
|
params: list[str]
|
|
|
|
class ModelResponseModel(BaseModel):
|
|
id: str
|
|
object: str = "model"
|
|
created: int
|
|
owned_by: Optional[str]
|
|
|
|
class UploadResponseModel(BaseModel):
|
|
bucket_id: str
|
|
url: str
|
|
|
|
class ErrorResponseModel(BaseModel):
|
|
error: ErrorResponseMessageModel
|
|
model: Optional[str] = None
|
|
provider: Optional[str] = None
|
|
|
|
class ErrorResponseMessageModel(BaseModel):
|
|
message: str
|
|
|
|
class FileResponseModel(BaseModel):
|
|
filename: str
|
|
|
|
class TranscriptionResponseModel(BaseModel):
|
|
text: str
|
|
model: str
|
|
provider: str
|
|
|
|
class AudioSpeechConfig(BaseModel):
|
|
input: str
|
|
model: Optional[str] = None
|
|
provider: Optional[str] = None
|
|
voice: Optional[str] = None
|
|
instrcutions: str = "Speech this text in a natural way."
|
|
response_format: Optional[str] = None |