mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
Add audio example usage
This commit is contained in:
parent
c97ba0c88e
commit
705ad02954
5 changed files with 46 additions and 5 deletions
28
etc/examples/audio.py
Normal file
28
etc/examples/audio.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import asyncio
|
||||
from g4f.client import AsyncClient
|
||||
import g4f.Provider
|
||||
import g4f.models
|
||||
|
||||
async def main():
|
||||
client = AsyncClient(provider=g4f.Provider.PollinationsAI)
|
||||
|
||||
# Generate audio with PollinationsAI
|
||||
response = await client.chat.completions.create(
|
||||
model="openai-audio",
|
||||
messages=[{"role": "user", "content": "Say good day to the world"}],
|
||||
audio={ "voice": "alloy", "format": "mp3" },
|
||||
)
|
||||
response.choices[0].message.save("alloy.mp3")
|
||||
|
||||
# Transcribe a audio file
|
||||
with open("audio.wav", "rb") as audio_file:
|
||||
response = await client.chat.completions.create(
|
||||
messages="Transcribe this audio",
|
||||
provider=g4f.Provider.Microsoft_Phi_4,
|
||||
media=[[audio_file, "audio.wav"]],
|
||||
modalities=["text"],
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -152,7 +152,7 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
|
|||
top_p: float = 1,
|
||||
frequency_penalty: float = None,
|
||||
response_format: Optional[dict] = None,
|
||||
extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities"],
|
||||
extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities", "audio"],
|
||||
**kwargs
|
||||
) -> AsyncResult:
|
||||
# Load model list
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ class ChatCompletionsConfig(BaseModel):
|
|||
image_name: Optional[str] = None
|
||||
images: Optional[list[tuple[str, str]]] = None
|
||||
media: Optional[list[tuple[str, str]]] = None
|
||||
modalities: Optional[list[str]] = ["text", "audio"]
|
||||
temperature: Optional[float] = None
|
||||
presence_penalty: Optional[float] = None
|
||||
frequency_penalty: Optional[float] = None
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, List, Dict, Any
|
||||
from typing import Optional, List
|
||||
from time import time
|
||||
|
||||
from ..image import extract_data_uri
|
||||
from ..client.helper import filter_markdown
|
||||
from .helper import filter_none
|
||||
|
||||
try:
|
||||
|
|
@ -103,6 +105,16 @@ class ChatCompletionMessage(BaseModel):
|
|||
def model_construct(cls, content: str, tool_calls: list = None):
|
||||
return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))
|
||||
|
||||
def save(self, filepath: str, allowd_types = None):
|
||||
if self.content.startswith("data:"):
|
||||
with open(filepath, "wb") as f:
|
||||
f.write(extract_data_uri(self.content))
|
||||
return
|
||||
content = filter_markdown(self.content, allowd_types)
|
||||
if content is not None:
|
||||
with open(filepath, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
class ChatCompletionChoice(BaseModel):
|
||||
index: int
|
||||
message: ChatCompletionMessage
|
||||
|
|
@ -118,7 +130,7 @@ class ChatCompletion(BaseModel):
|
|||
created: int
|
||||
model: str
|
||||
provider: Optional[str]
|
||||
choices: List[ChatCompletionChoice]
|
||||
choices: list[ChatCompletionChoice]
|
||||
usage: UsageModel
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -248,14 +248,14 @@ def to_input_audio(audio: ImageType, filename: str = None) -> str:
|
|||
if filename is not None and (filename.endswith(".wav") or filename.endswith(".mp3")):
|
||||
return {
|
||||
"data": base64.b64encode(to_bytes(audio)).decode(),
|
||||
"format": "wav" if filename.endswith(".wav") else "mpeg"
|
||||
"format": "wav" if filename.endswith(".wav") else "mp3"
|
||||
}
|
||||
raise ValueError("Invalid input audio")
|
||||
audio = re.match(r'^data:audio/(\w+);base64,(.+?)', audio)
|
||||
if audio:
|
||||
return {
|
||||
"data": audio.group(2),
|
||||
"format": audio.group(1),
|
||||
"format": audio.group(1).replace("mpeg", "mp3")
|
||||
}
|
||||
raise ValueError("Invalid input audio")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue