Refactor MarkItDown and OpenaiChat classes for improved media handling and optional parameters; enhance is_data_an_media function to support binary/octet-stream return type for unsupported URLs.

This commit is contained in:
hlohaus 2025-10-31 18:16:19 +01:00
parent 35e3fa95f3
commit 2317bd5a83
4 changed files with 19 additions and 25 deletions

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import os
import asyncio
from typing import Any
try:
from ...integration.markitdown import MarkItDown as MaItDo, StreamInfo
@ -23,7 +22,6 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
model: str,
messages: Messages,
media: MediaListType = None,
llm_client: Any = None,
**kwargs
) -> AsyncResult:
if media is None:
@ -34,12 +32,10 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
for file, filename in media:
text = None
try:
result = md.convert(
file,
stream_info=StreamInfo(filename=filename) if filename else None,
llm_client=llm_client,
llm_model=model
)
if isinstance(file, str) and file.startswith(("http://", "https://")):
result = md.convert_url(file)
else:
result = md.convert(file, stream_info=StreamInfo(filename=filename) if filename else None)
if asyncio.iscoroutine(result.text_content):
text = await result.text_content
else:
@ -47,11 +43,7 @@ class MarkItDown(AsyncGeneratorProvider, ProviderModelMixin):
except TypeError:
copyfile = get_tempfile(file, filename)
try:
result = md.convert(
copyfile,
llm_client=llm_client,
llm_model=model
)
result = md.convert(copyfile)
if asyncio.iscoroutine(result.text_content):
text = await result.text_content
else:

View file

@ -24,7 +24,7 @@ from ...requests import StreamSession
from ...requests import get_nodriver_session
from ...image import ImageRequest, to_image, to_bytes, is_accepted_format, detect_file_type
from ...errors import MissingAuthError, NoValidHarFileError, ModelNotFoundError
from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, format_link
from ...providers.response import JsonConversation, FinishReason, SynthesizeData, AuthResult, ImageResponse, ImagePreview, ResponseType, JsonRequest, format_link
from ...providers.response import TitleGeneration, RequestLogin, Reasoning
from ...tools.media import merge_media
from ..helper import format_cookies, format_media_prompt, to_string
@ -330,14 +330,15 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
proxy: str = None,
timeout: int = 360,
auto_continue: bool = False,
action: str = "next",
action: Optional[str] = None,
conversation: Conversation = None,
media: MediaListType = None,
return_conversation: bool = True,
web_search: bool = False,
prompt: str = None,
conversation_mode=None,
temporary=False,
conversation_mode: Optional[dict] = None,
temporary: Optional[bool] = None,
conversation_id: Optional[str] = None,
**kwargs
) -> AsyncResult:
"""
@ -351,7 +352,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
api_key (str): Access token for authentication.
auto_continue (bool): Flag to automatically continue the conversation.
action (str): Type of action ('next', 'continue', 'variant').
conversation_id (str): ID of the conversation.
media (MediaListType): Images to include in the conversation.
return_conversation (bool): Flag to include response fields in the output.
**kwargs: Additional keyword arguments.
@ -362,6 +362,10 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
Raises:
RuntimeError: If an error occurs during processing.
"""
if temporary is None:
temporary = action is not None and conversation_id is not None
if action is None:
action = "next"
async with StreamSession(
proxy=proxy,
impersonate="chrome",
@ -431,7 +435,6 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
}
if temporary:
data["history_and_training_disabled"] = True
async with session.post(
prepare_url,
json=data,
@ -494,7 +497,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
if temporary:
data["history_and_training_disabled"] = True
if conversation.conversation_id is not None:
if conversation.conversation_id is not None and not temporary:
data["conversation_id"] = conversation.conversation_id
debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}")
prompt = conversation.prompt = format_media_prompt(messages, prompt)
@ -510,6 +513,7 @@ class OpenaiChat(AsyncAuthedProvider, ProviderModelMixin):
else:
new_messages.append(message)
data["messages"] = cls.create_messages(new_messages, image_requests, ["search"] if web_search else None)
yield JsonRequest.from_dict(data)
headers = {
**cls._headers,
"accept": "text/event-stream",

View file

@ -107,11 +107,12 @@ def is_data_an_media(data, filename: str = None) -> str:
return content_type
if isinstance(data, bytes):
return is_accepted_format(data)
if isinstance(data, str) and data.startswith("http"):
if isinstance(data, str) and data.startswith(("http://", "https://")):
path = urlparse(data).path
extension = get_extension(path)
if extension is not None:
return EXTENSIONS_MAP[extension]
return EXTENSIONS_MAP[extension]
return "binary/octet-stream"
return is_data_uri_an_image(data)
def is_valid_media(data: ImageType = None, filename: str = None) -> str:

View file

@ -75,7 +75,6 @@ class YouTubeConverter(DocumentConverter):
) -> DocumentConverterResult:
# Parse the stream
encoding = "utf-8" if stream_info.charset is None else stream_info.charset
print(file_stream)
soup = bs4.BeautifulSoup(file_stream, "html.parser", from_encoding=encoding)
# Read the meta tags
@ -95,8 +94,6 @@ class YouTubeConverter(DocumentConverter):
if key and content: # Only add non-empty content
metadata[key] = content
break
print(f"Extracted metadata keys: {list(metadata.keys())}")
# Try reading the description
try: