gpt4free/g4f/Provider/audio/MarkItDown.py
hlohaus e238ca3a58 refactor: update model mappings, error handling, and file utils
- Changed `generate_commit_message` return to `.strip("`").strip()` in `commit.py`
- Added new model mappings in `PollinationsAI.py`, including `gpt-4.1`, `gpt-4.1-mini`, and `deepseek-r1-distill-*`
- Removed `print` debug statement from `PollinationsAI.py` request payload
- Replaced temp file handling in `MarkItDown.py` with `get_tempfile` utility
- Added `get_tempfile` function to `files.py` for consistent tempfile creation
- Added `gpt-4.1` to `text_models` list in `models.py`
- Added `ModelNotSupportedError` to exception handling in `OpenaiChat.py`
- Updated message content creation to use `to_string()` in `OpenaiChat.py`
- Wrapped `get_model()` in try-except to ignore `ModelNotSupportedError` in `OpenaiChat.py`
- Adjusted `convert` endpoint in `api/__init__.py` to accept optional `provider` param
- Refactored `/api/markitdown` to reuse `convert()` handler in `api/__init__.py
2025-04-26 17:50:48 +02:00

39 lines
No EOL
1.2 KiB
Python

from __future__ import annotations
import os
try:
from markitdown import MarkItDown as MaItDo, StreamInfo
has_markitdown = True
except ImportError:
has_markitdown = False
from ...typing import AsyncResult, Messages, MediaListType
from ...tools.files import get_tempfile
from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
working = has_markitdown
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
media: MediaListType = None,
**kwargs
) -> AsyncResult:
md = MaItDo()
for file, filename in media:
text = None
try:
text = md.convert(file, stream_info=StreamInfo(filename=filename) if filename else None).text_content
except TypeError:
copyfile = get_tempfile(file, filename)
try:
text = md.convert(copyfile).text_content
finally:
os.remove(copyfile)
text = text.split("### Audio Transcript:\n")[-1]
if text:
yield text