mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
- Added try-except block to catch RuntimeError around asyncio.run(nodriver_read_models()) in Cloudflare.py to set cls.models to fallback_models if encountered - Corrected indentation of "followups" key in PollinationsAI.py from 43 to 44, changing it from nested to proper dictionary key - No other code logic changed in these files
105 lines
3.2 KiB
Python
105 lines
3.2 KiB
Python
from typing import Any, BinaryIO
|
|
|
|
from markitdown.converters._exiftool import exiftool_metadata
|
|
from markitdown._base_converter import DocumentConverter, DocumentConverterResult
|
|
from markitdown._stream_info import StreamInfo
|
|
from markitdown._exceptions import MissingDependencyException
|
|
|
|
from ._transcribe_audio import transcribe_audio
|
|
|
|
ACCEPTED_MIME_TYPE_PREFIXES = [
|
|
"audio/x-wav",
|
|
"audio/mpeg",
|
|
"video/mp4",
|
|
"video/webm",
|
|
"audio/webm",
|
|
]
|
|
|
|
ACCEPTED_FILE_EXTENSIONS = [
|
|
".wav",
|
|
".mp3",
|
|
".m4a",
|
|
".mp4",
|
|
".webm",
|
|
]
|
|
|
|
class AudioConverter(DocumentConverter):
|
|
"""
|
|
Converts audio files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed).
|
|
"""
|
|
|
|
def accepts(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
**kwargs: Any, # Options to pass to the converter
|
|
) -> bool:
|
|
mimetype = (stream_info.mimetype or "").lower()
|
|
extension = (stream_info.extension or "").lower()
|
|
|
|
if extension in ACCEPTED_FILE_EXTENSIONS:
|
|
return True
|
|
|
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
|
if mimetype.startswith(prefix):
|
|
return True
|
|
|
|
return False
|
|
|
|
def convert(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
recognition_language: str = None,
|
|
**kwargs: Any, # Options to pass to the converter
|
|
) -> DocumentConverterResult:
|
|
md_content = ""
|
|
|
|
# Add metadata
|
|
metadata = exiftool_metadata(
|
|
file_stream, exiftool_path=kwargs.get("exiftool_path")
|
|
)
|
|
if metadata:
|
|
for f in [
|
|
"Title",
|
|
"Artist",
|
|
"Author",
|
|
"Band",
|
|
"Album",
|
|
"Genre",
|
|
"Track",
|
|
"DateTimeOriginal",
|
|
"CreateDate",
|
|
# "Duration", -- Wrong values when read from memory
|
|
"NumChannels",
|
|
"SampleRate",
|
|
"AvgBytesPerSec",
|
|
"BitsPerSample",
|
|
]:
|
|
if f in metadata:
|
|
md_content += f"{f}: {metadata[f]}\n"
|
|
|
|
# Figure out the audio format for transcription
|
|
if stream_info.extension == ".wav" or stream_info.mimetype == "audio/x-wav":
|
|
audio_format = "wav"
|
|
elif stream_info.extension == ".mp3" or stream_info.mimetype == "audio/mpeg":
|
|
audio_format = "mp3"
|
|
elif (
|
|
stream_info.extension in [".mp4", ".m4a"]
|
|
or stream_info.mimetype == "video/mp4"
|
|
):
|
|
audio_format = "mp4"
|
|
elif stream_info.extension == ".webm" or stream_info.mimetype in ("audio/webm", "video/webm"):
|
|
audio_format = "webm"
|
|
else:
|
|
audio_format = None
|
|
|
|
# Transcribe
|
|
if audio_format:
|
|
try:
|
|
md_content = transcribe_audio(file_stream, audio_format=audio_format, language=recognition_language)
|
|
except MissingDependencyException:
|
|
pass
|
|
|
|
# Return the result
|
|
return DocumentConverterResult(markdown=md_content.strip())
|