mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
- Modified g4f/providers/response.py to ensure format_images_markdown returns the result directly without additional flags in the 'format_images_markdown' function.
- Updated g4f/gui/server/api.py to add 'tempfiles' parameter with default empty list to '_create_response_stream' method.
- Changed or added code in API response handling to iterate over 'tempfiles' and attempt to remove each file after response completion, with exception handling (try-except block with logger.exception).
- Adjusted g4f/Tools/files.py to fix tempfile creation: corrected the 'suffix' parameter in 'get_tempfile' to use 'suffix' directly instead of splitting.
- In g4f/tools/media.py, changed 'render_part' function to handle 'text' key properly, checking 'part.get("text")' and returning a dictionary with 'type': 'text' and 'text': value, if present.
92 lines
No EOL
2.7 KiB
Python
92 lines
No EOL
2.7 KiB
Python
from typing import BinaryIO, Any
|
|
import asyncio
|
|
from markitdown._base_converter import DocumentConverter, DocumentConverterResult
|
|
from markitdown._stream_info import StreamInfo
|
|
from markitdown.converters._llm_caption import llm_caption
|
|
from markitdown.converters._exiftool import exiftool_metadata
|
|
|
|
from ._base_converter import AsyncDocumentConverterResult
|
|
|
|
ACCEPTED_MIME_TYPE_PREFIXES = [
|
|
"image/jpeg",
|
|
"image/png",
|
|
]
|
|
|
|
ACCEPTED_FILE_EXTENSIONS = [".jpg", ".jpeg", ".png"]
|
|
|
|
|
|
class ImageConverter(DocumentConverter):
|
|
"""
|
|
Converts images to markdown via extraction of metadata (if `exiftool` is installed), and description via a multimodal LLM (if an llm_client is configured).
|
|
"""
|
|
|
|
def accepts(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
**kwargs: Any,
|
|
) -> bool:
|
|
mimetype = (stream_info.mimetype or "").lower()
|
|
extension = (stream_info.extension or "").lower()
|
|
|
|
if extension in ACCEPTED_FILE_EXTENSIONS:
|
|
return True
|
|
|
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
|
if mimetype.startswith(prefix):
|
|
return True
|
|
|
|
return False
|
|
|
|
def convert(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
**kwargs: Any, # Options to pass to the converter
|
|
) -> DocumentConverterResult:
|
|
md_content = ""
|
|
|
|
# Add metadata
|
|
metadata = exiftool_metadata(
|
|
file_stream, exiftool_path=kwargs.get("exiftool_path")
|
|
)
|
|
|
|
if metadata:
|
|
for f in [
|
|
"ImageSize",
|
|
"Title",
|
|
"Caption",
|
|
"Description",
|
|
"Keywords",
|
|
"Artist",
|
|
"Author",
|
|
"DateTimeOriginal",
|
|
"CreateDate",
|
|
"GPSPosition",
|
|
]:
|
|
if f in metadata:
|
|
md_content += f"{f}: {metadata[f]}\n"
|
|
|
|
# Try describing the image with GPT
|
|
llm_client = kwargs.get("llm_client")
|
|
llm_model = kwargs.get("llm_model")
|
|
if llm_client is not None and llm_model is not None:
|
|
llm_description = llm_caption(
|
|
file_stream,
|
|
stream_info,
|
|
client=llm_client,
|
|
model=llm_model,
|
|
prompt=kwargs.get("llm_prompt"),
|
|
)
|
|
|
|
if asyncio.iscoroutine(llm_description):
|
|
return AsyncDocumentConverterResult(
|
|
llm_description,
|
|
)
|
|
|
|
if llm_description is not None:
|
|
md_content += "\n# Description:\n" + llm_description.strip() + "\n"
|
|
|
|
return DocumentConverterResult(
|
|
markdown=md_content,
|
|
) |