mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
Add audio example usage
This commit is contained in:
parent
c97ba0c88e
commit
705ad02954
5 changed files with 46 additions and 5 deletions
28
etc/examples/audio.py
Normal file
28
etc/examples/audio.py
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
import asyncio
|
||||||
|
from g4f.client import AsyncClient
|
||||||
|
import g4f.Provider
|
||||||
|
import g4f.models
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
client = AsyncClient(provider=g4f.Provider.PollinationsAI)
|
||||||
|
|
||||||
|
# Generate audio with PollinationsAI
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
model="openai-audio",
|
||||||
|
messages=[{"role": "user", "content": "Say good day to the world"}],
|
||||||
|
audio={ "voice": "alloy", "format": "mp3" },
|
||||||
|
)
|
||||||
|
response.choices[0].message.save("alloy.mp3")
|
||||||
|
|
||||||
|
# Transcribe a audio file
|
||||||
|
with open("audio.wav", "rb") as audio_file:
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
messages="Transcribe this audio",
|
||||||
|
provider=g4f.Provider.Microsoft_Phi_4,
|
||||||
|
media=[[audio_file, "audio.wav"]],
|
||||||
|
modalities=["text"],
|
||||||
|
)
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
@ -152,7 +152,7 @@ class PollinationsAI(AsyncGeneratorProvider, ProviderModelMixin):
|
||||||
top_p: float = 1,
|
top_p: float = 1,
|
||||||
frequency_penalty: float = None,
|
frequency_penalty: float = None,
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities"],
|
extra_parameters: list[str] = ["tools", "parallel_tool_calls", "tool_choice", "reasoning_effort", "logit_bias", "voice", "modalities", "audio"],
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> AsyncResult:
|
) -> AsyncResult:
|
||||||
# Load model list
|
# Load model list
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ class ChatCompletionsConfig(BaseModel):
|
||||||
image_name: Optional[str] = None
|
image_name: Optional[str] = None
|
||||||
images: Optional[list[tuple[str, str]]] = None
|
images: Optional[list[tuple[str, str]]] = None
|
||||||
media: Optional[list[tuple[str, str]]] = None
|
media: Optional[list[tuple[str, str]]] = None
|
||||||
|
modalities: Optional[list[str]] = ["text", "audio"]
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
presence_penalty: Optional[float] = None
|
presence_penalty: Optional[float] = None
|
||||||
frequency_penalty: Optional[float] = None
|
frequency_penalty: Optional[float] = None
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
|
from ..image import extract_data_uri
|
||||||
|
from ..client.helper import filter_markdown
|
||||||
from .helper import filter_none
|
from .helper import filter_none
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -103,6 +105,16 @@ class ChatCompletionMessage(BaseModel):
|
||||||
def model_construct(cls, content: str, tool_calls: list = None):
|
def model_construct(cls, content: str, tool_calls: list = None):
|
||||||
return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))
|
return super().model_construct(role="assistant", content=content, **filter_none(tool_calls=tool_calls))
|
||||||
|
|
||||||
|
def save(self, filepath: str, allowd_types = None):
|
||||||
|
if self.content.startswith("data:"):
|
||||||
|
with open(filepath, "wb") as f:
|
||||||
|
f.write(extract_data_uri(self.content))
|
||||||
|
return
|
||||||
|
content = filter_markdown(self.content, allowd_types)
|
||||||
|
if content is not None:
|
||||||
|
with open(filepath, "w") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
class ChatCompletionChoice(BaseModel):
|
class ChatCompletionChoice(BaseModel):
|
||||||
index: int
|
index: int
|
||||||
message: ChatCompletionMessage
|
message: ChatCompletionMessage
|
||||||
|
|
@ -118,7 +130,7 @@ class ChatCompletion(BaseModel):
|
||||||
created: int
|
created: int
|
||||||
model: str
|
model: str
|
||||||
provider: Optional[str]
|
provider: Optional[str]
|
||||||
choices: List[ChatCompletionChoice]
|
choices: list[ChatCompletionChoice]
|
||||||
usage: UsageModel
|
usage: UsageModel
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
||||||
|
|
@ -248,14 +248,14 @@ def to_input_audio(audio: ImageType, filename: str = None) -> str:
|
||||||
if filename is not None and (filename.endswith(".wav") or filename.endswith(".mp3")):
|
if filename is not None and (filename.endswith(".wav") or filename.endswith(".mp3")):
|
||||||
return {
|
return {
|
||||||
"data": base64.b64encode(to_bytes(audio)).decode(),
|
"data": base64.b64encode(to_bytes(audio)).decode(),
|
||||||
"format": "wav" if filename.endswith(".wav") else "mpeg"
|
"format": "wav" if filename.endswith(".wav") else "mp3"
|
||||||
}
|
}
|
||||||
raise ValueError("Invalid input audio")
|
raise ValueError("Invalid input audio")
|
||||||
audio = re.match(r'^data:audio/(\w+);base64,(.+?)', audio)
|
audio = re.match(r'^data:audio/(\w+);base64,(.+?)', audio)
|
||||||
if audio:
|
if audio:
|
||||||
return {
|
return {
|
||||||
"data": audio.group(2),
|
"data": audio.group(2),
|
||||||
"format": audio.group(1),
|
"format": audio.group(1).replace("mpeg", "mp3")
|
||||||
}
|
}
|
||||||
raise ValueError("Invalid input audio")
|
raise ValueError("Invalid input audio")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue