Add audio example usage

This commit is contained in:
hlohaus 2025-03-21 05:13:59 +01:00
parent c97ba0c88e
commit 705ad02954
5 changed files with 46 additions and 5 deletions

28
etc/examples/audio.py Normal file
View file

@ -0,0 +1,28 @@
import asyncio
from g4f.client import AsyncClient
import g4f.Provider
import g4f.models
async def main():
client = AsyncClient(provider=g4f.Provider.PollinationsAI)
# Generate audio with PollinationsAI
response = await client.chat.completions.create(
model="openai-audio",
messages=[{"role": "user", "content": "Say good day to the world"}],
audio={ "voice": "alloy", "format": "mp3" },
)
response.choices[0].message.save("alloy.mp3")
# Transcribe a audio file
with open("audio.wav", "rb") as audio_file:
response = await client.chat.completions.create(
messages="Transcribe this audio",
provider=g4f.Provider.Microsoft_Phi_4,
media=[[audio_file, "audio.wav"]],
modalities=["text"],
)
print(response.choices[0].message.content)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -152,7 +152,7 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
top_p: float = 1, top_p: float = 1,
frequency_penalty: float = None, frequency_penalty: float = None,
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities"], extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities", "audio"],
**kwargs **kwargs
) -> AsyncResult: ) -> AsyncResult:
# Load model list # Load model list

View file

@ -18,6 +18,7 @@ class ChatCompletionsConfig(BaseModel):
image_name: Optional[str] = None image_name: Optional[str] = None
images: Optional[list[tuple[str, str]]] = None images: Optional[list[tuple[str, str]]] = None
media: Optional[list[tuple[str, str]]] = None media: Optional[list[tuple[str, str]]] = None
modalities: Optional[list[str]] = ["text", "audio"]
temperature: Optional[float] = None temperature: Optional[float] = None
presence_penalty: Optional[float] = None presence_penalty: Optional[float] = None
frequency_penalty: Optional[float] = None frequency_penalty: Optional[float] = None

View file

@ -1,8 +1,10 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional, List, Dict, Any from typing import Optional, List
from time import time from time import time
from ..image import extract_data_uri
from ..client.helper import filter_markdown
from .helper import filter_none from .helper import filter_none
try: try:
@ -103,6 +105,16 @@ class ChatCompletionMessage(BaseModel):
def model_construct(cls, content: str, tool_calls: list = None): def model_construct(cls, content: str, tool_calls: list = None):
return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls)) return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))
def save(self, filepath: str, allowd_types = None):
if self.content.startswith("data:"):
with open(filepath, "wb") as f:
f.write(extract_data_uri(self.content))
return
content = filter_markdown(self.content, allowd_types)
if content is not None:
with open(filepath, "w") as f:
f.write(content)
class ChatCompletionChoice(BaseModel): class ChatCompletionChoice(BaseModel):
index: int index: int
message: ChatCompletionMessage message: ChatCompletionMessage
@ -118,7 +130,7 @@ class ChatCompletion(BaseModel):
created: int created: int
model: str model: str
provider: Optional[str] provider: Optional[str]
choices: List[ChatCompletionChoice] choices: list[ChatCompletionChoice]
usage: UsageModel usage: UsageModel
@classmethod @classmethod

View file

@ -248,14 +248,14 @@ def to_input_audio(audio: ImageType, filename: str = None) -> str:
if filename is not None and (filename.endswith(".wav") or filename.endswith(".mp3")): if filename is not None and (filename.endswith(".wav") or filename.endswith(".mp3")):
return { return {
"data": base64.b64encode(to_bytes(audio)).decode(), "data": base64.b64encode(to_bytes(audio)).decode(),
"format": "wav" if filename.endswith(".wav") else "mpeg" "format": "wav" if filename.endswith(".wav") else "mp3"
} }
raise ValueError("Invalid input audio") raise ValueError("Invalid input audio")
audio = re.match(r'^data:audio/(\w+);base64,(.+?)', audio) audio = re.match(r'^data:audio/(\w+);base64,(.+?)', audio)
if audio: if audio:
return { return {
"data": audio.group(2), "data": audio.group(2),
"format": audio.group(1), "format": audio.group(1).replace("mpeg", "mp3")
} }
raise ValueError("Invalid input audio") raise ValueError("Invalid input audio")