mirror of
https://github.com/xtekky/gpt4free.git
synced 2025-12-06 02:30:41 -08:00
- Added "No auth / HAR file" authentication type in providers-and-models.md - Added "Video generation" column to provider tables for future capability - Updated model counts and provider capabilities throughout documentation - Fixed ARTA provider with improved error handling and response validation - Enhanced AllenAI provider with vision model support and proper image handling - Significantly improved Blackbox provider: - Added HAR file authentication support - Added subscription status checking - Added premium/demo model differentiation - Improved session handling and error recovery - Enhanced DDG provider with better error handling for challenges - Improved PollinationsAI and PollinationsImage providers' model handling - Added VideoModel class in g4f/models.py - Added audio/video generation indicators in GUI components - Added new Ai2 models: olmo-1-7b, olmo-2-32b, olmo-4-synthetic - Added new commit message generation tool in etc/tool/commit.py
546 lines
26 KiB
Python
546 lines
26 KiB
Python
from __future__ import annotations
|
|
|
|
import time
|
|
from aiohttp import ClientSession, ClientTimeout
|
|
import json
|
|
import asyncio
|
|
import random
|
|
import base64
|
|
import hashlib
|
|
from yarl import URL
|
|
|
|
from ..typing import AsyncResult, Messages, Cookies
|
|
from ..requests.raise_for_status import raise_for_status
|
|
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
|
from .helper import format_prompt, get_last_user_message
|
|
from ..providers.response import FinishReason, JsonConversation
|
|
from ..errors import ModelNotSupportedError, ResponseStatusError, RateLimitError, TimeoutError, ConversationLimitError
|
|
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
has_bs4 = True
|
|
except ImportError:
|
|
has_bs4 = False
|
|
|
|
|
|
class DuckDuckGoSearchException(Exception):
|
|
"""Base exception class for duckduckgo_search."""
|
|
|
|
class DuckDuckGoChallengeError(ResponseStatusError):
|
|
"""Raised when DuckDuckGo presents a challenge that needs to be solved."""
|
|
|
|
class Conversation(JsonConversation):
|
|
vqd: str = None
|
|
vqd_hash_1: str = None
|
|
message_history: Messages = []
|
|
cookies: dict = {}
|
|
fe_version: str = None
|
|
|
|
def __init__(self, model: str):
|
|
self.model = model
|
|
|
|
class DDG(AsyncGeneratorProvider, ProviderModelMixin):
|
|
label = "DuckDuckGo AI Chat"
|
|
url = "https://duckduckgo.com/aichat"
|
|
api_endpoint = "https://duckduckgo.com/duckchat/v1/chat"
|
|
status_url = "https://duckduckgo.com/duckchat/v1/status"
|
|
|
|
working = True
|
|
supports_stream = True
|
|
supports_system_message = True
|
|
supports_message_history = True
|
|
|
|
default_model = "gpt-4o-mini"
|
|
|
|
# Model mapping from user-friendly names to API model names
|
|
_chat_models = {
|
|
"gpt-4": default_model,
|
|
"gpt-4o-mini": default_model,
|
|
"llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
"o3-mini": "o3-mini",
|
|
"mixtral-small-24b": "mistralai/Mistral-Small-24B-Instruct-2501",
|
|
}
|
|
|
|
# Available models (user-friendly names)
|
|
models = list(_chat_models.keys())
|
|
|
|
last_request_time = 0
|
|
max_retries = 3
|
|
base_delay = 2
|
|
|
|
# Class variable to store the x-fe-version across instances
|
|
_chat_xfe = ""
|
|
|
|
@staticmethod
|
|
def sha256_base64(text: str) -> str:
|
|
"""Return the base64 encoding of the SHA256 digest of the text."""
|
|
sha256_hash = hashlib.sha256(text.encode("utf-8")).digest()
|
|
return base64.b64encode(sha256_hash).decode()
|
|
|
|
@staticmethod
|
|
def parse_dom_fingerprint(js_text: str) -> str:
|
|
if not has_bs4:
|
|
# Fallback if BeautifulSoup is not available
|
|
return "1000"
|
|
|
|
try:
|
|
html_snippet = js_text.split("e.innerHTML = '")[1].split("';")[0]
|
|
offset_value = js_text.split("return String(")[1].split(" ")[0]
|
|
soup = BeautifulSoup(html_snippet, "html.parser")
|
|
corrected_inner_html = soup.body.decode_contents()
|
|
inner_html_length = len(corrected_inner_html)
|
|
fingerprint = int(offset_value) + inner_html_length
|
|
return str(fingerprint)
|
|
except Exception:
|
|
# Return a fallback value if parsing fails
|
|
return "1000"
|
|
|
|
@staticmethod
|
|
def parse_server_hashes(js_text: str) -> list:
|
|
try:
|
|
return js_text.split('server_hashes: ["', maxsplit=1)[1].split('"]', maxsplit=1)[0].split('","')
|
|
except Exception:
|
|
# Return a fallback value if parsing fails
|
|
return ["1", "2"]
|
|
|
|
@classmethod
|
|
def build_x_vqd_hash_1(cls, vqd_hash_1: str, headers: dict) -> str:
|
|
"""Build the x-vqd-hash-1 header value."""
|
|
try:
|
|
# If we received a valid base64 string, try to decode it
|
|
if vqd_hash_1 and len(vqd_hash_1) > 20:
|
|
try:
|
|
# Try to decode and parse as JSON first
|
|
decoded_json = json.loads(base64.b64decode(vqd_hash_1).decode())
|
|
# If it's already a complete structure with meta, return it as is
|
|
if isinstance(decoded_json, dict) and "meta" in decoded_json:
|
|
return vqd_hash_1
|
|
|
|
# Otherwise, extract what we can from it
|
|
if isinstance(decoded_json, dict) and "server_hashes" in decoded_json:
|
|
server_hashes = decoded_json.get("server_hashes", ["1", "2"])
|
|
else:
|
|
# Fall back to parsing from string
|
|
decoded = base64.b64decode(vqd_hash_1).decode()
|
|
server_hashes = cls.parse_server_hashes(decoded)
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
# If it's not valid JSON, try to parse it as a string
|
|
decoded = base64.b64decode(vqd_hash_1).decode()
|
|
server_hashes = cls.parse_server_hashes(decoded)
|
|
else:
|
|
# Default server hashes if we can't extract them
|
|
server_hashes = ["1", "2"]
|
|
|
|
# Generate fingerprints
|
|
dom_fingerprint = "1000" # Default value
|
|
ua_fingerprint = headers.get("User-Agent", "") + headers.get("sec-ch-ua", "")
|
|
ua_hash = cls.sha256_base64(ua_fingerprint)
|
|
dom_hash = cls.sha256_base64(dom_fingerprint)
|
|
|
|
# Create a challenge ID (random hex string)
|
|
challenge_id = ''.join(random.choice('0123456789abcdef') for _ in range(40)) + 'h8jbt'
|
|
|
|
# Build the complete structure including meta
|
|
final_result = {
|
|
"server_hashes": server_hashes,
|
|
"client_hashes": [ua_hash, dom_hash],
|
|
"signals": {},
|
|
"meta": {
|
|
"v": "1",
|
|
"challenge_id": challenge_id,
|
|
"origin": "https://duckduckgo.com",
|
|
"stack": "Error\nat ke (https://duckduckgo.com/dist/wpm.chat.js:1:29526)\nat async dispatchServiceInitialVQD (https://duckduckgo.com/dist/wpm.chat.js:1:45076)"
|
|
}
|
|
}
|
|
|
|
base64_final_result = base64.b64encode(json.dumps(final_result).encode()).decode()
|
|
return base64_final_result
|
|
except Exception as e:
|
|
# If anything fails, return an empty string
|
|
return ""
|
|
|
|
@classmethod
|
|
def validate_model(cls, model: str) -> str:
|
|
"""Validates and returns the correct model name for the API"""
|
|
if not model:
|
|
return cls.default_model
|
|
|
|
# Check aliases first
|
|
if model in cls.model_aliases:
|
|
model = cls.model_aliases[model]
|
|
|
|
# Check if it's a valid model name
|
|
if model not in cls.models:
|
|
raise ModelNotSupportedError(f"Model {model} not supported. Available models: {cls.models}")
|
|
|
|
return model
|
|
|
|
@classmethod
|
|
async def sleep(cls, multiplier=1.0):
|
|
"""Implements rate limiting between requests"""
|
|
now = time.time()
|
|
if cls.last_request_time > 0:
|
|
delay = max(0.0, 1.5 - (now - cls.last_request_time)) * multiplier
|
|
if delay > 0:
|
|
await asyncio.sleep(delay)
|
|
cls.last_request_time = time.time()
|
|
|
|
@classmethod
|
|
async def get_default_cookies(cls, session: ClientSession) -> dict:
|
|
"""Obtains default cookies needed for API requests"""
|
|
try:
|
|
await cls.sleep()
|
|
# Make initial request to get cookies
|
|
async with session.get(cls.url) as response:
|
|
# Set the required cookies
|
|
cookies = {}
|
|
cookies_dict = {'dcs': '1', 'dcm': '3'}
|
|
|
|
# Add any cookies from the response
|
|
for cookie in response.cookies.values():
|
|
cookies[cookie.key] = cookie.value
|
|
|
|
# Ensure our required cookies are set
|
|
for name, value in cookies_dict.items():
|
|
cookies[name] = value
|
|
url_obj = URL(cls.url)
|
|
session.cookie_jar.update_cookies({name: value}, url_obj)
|
|
|
|
# Make a second request to the status endpoint to get any additional cookies
|
|
headers = {
|
|
"accept": "text/event-stream",
|
|
"accept-language": "en",
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
|
"origin": "https://duckduckgo.com",
|
|
"referer": "https://duckduckgo.com/",
|
|
}
|
|
|
|
await cls.sleep()
|
|
async with session.get(cls.status_url, headers=headers) as status_response:
|
|
# Add any cookies from the status response
|
|
for cookie in status_response.cookies.values():
|
|
cookies[cookie.key] = cookie.value
|
|
url_obj = URL(cls.url)
|
|
session.cookie_jar.update_cookies({cookie.key: cookie.value}, url_obj)
|
|
|
|
return cookies
|
|
except Exception as e:
|
|
# Return at least the required cookies on error
|
|
cookies = {'dcs': '1', 'dcm': '3'}
|
|
url_obj = URL(cls.url)
|
|
for name, value in cookies.items():
|
|
session.cookie_jar.update_cookies({name: value}, url_obj)
|
|
return cookies
|
|
|
|
@classmethod
|
|
async def fetch_fe_version(cls, session: ClientSession) -> str:
|
|
"""Fetches the fe-version from the initial page load."""
|
|
if cls._chat_xfe:
|
|
return cls._chat_xfe
|
|
|
|
try:
|
|
url = "https://duckduckgo.com/?q=DuckDuckGo+AI+Chat&ia=chat&duckai=1"
|
|
await cls.sleep()
|
|
async with session.get(url) as response:
|
|
await raise_for_status(response)
|
|
content = await response.text()
|
|
|
|
# Extract x-fe-version components
|
|
try:
|
|
# Try to extract the version components
|
|
xfe1 = content.split('__DDG_BE_VERSION__="', 1)[1].split('"', 1)[0]
|
|
xfe2 = content.split('__DDG_FE_CHAT_HASH__="', 1)[1].split('"', 1)[0]
|
|
|
|
# Format it like "serp_YYYYMMDD_HHMMSS_ET-hash"
|
|
from datetime import datetime
|
|
current_date = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
cls._chat_xfe = f"serp_{current_date}_ET-{xfe2}"
|
|
|
|
return cls._chat_xfe
|
|
except Exception:
|
|
# Fallback to a default format if extraction fails
|
|
from datetime import datetime
|
|
current_date = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
cls._chat_xfe = f"serp_{current_date}_ET-78c2e87e3d286691cc21"
|
|
return cls._chat_xfe
|
|
except Exception:
|
|
# Fallback to a default format if request fails
|
|
from datetime import datetime
|
|
current_date = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
cls._chat_xfe = f"serp_{current_date}_ET-78c2e87e3d286691cc21"
|
|
return cls._chat_xfe
|
|
|
|
@classmethod
|
|
async def fetch_vqd_and_hash(cls, session: ClientSession, retry_count: int = 0) -> tuple[str, str]:
|
|
"""Fetches the required VQD token and hash for the chat session with retries."""
|
|
headers = {
|
|
"accept": "text/event-stream",
|
|
"accept-language": "en",
|
|
"cache-control": "no-cache",
|
|
"pragma": "no-cache",
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
|
"origin": "https://duckduckgo.com",
|
|
"referer": "https://duckduckgo.com/",
|
|
"x-vqd-accept": "1",
|
|
}
|
|
|
|
# Make sure we have cookies first
|
|
if len(session.cookie_jar) == 0:
|
|
await cls.get_default_cookies(session)
|
|
|
|
try:
|
|
await cls.sleep(multiplier=1.0 + retry_count * 0.5)
|
|
async with session.get(cls.status_url, headers=headers) as response:
|
|
await raise_for_status(response)
|
|
|
|
vqd = response.headers.get("x-vqd-4", "")
|
|
vqd_hash_1 = response.headers.get("x-vqd-hash-1", "")
|
|
|
|
if vqd:
|
|
# Return the fetched vqd and vqd_hash_1
|
|
return vqd, vqd_hash_1
|
|
|
|
response_text = await response.text()
|
|
raise RuntimeError(f"Failed to fetch VQD token and hash: {response.status} {response_text}")
|
|
|
|
except Exception as e:
|
|
if retry_count < cls.max_retries:
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
return await cls.fetch_vqd_and_hash(session, retry_count + 1)
|
|
else:
|
|
raise RuntimeError(f"Failed to fetch VQD token and hash after {cls.max_retries} attempts: {str(e)}")
|
|
|
|
@classmethod
|
|
async def create_async_generator(
|
|
cls,
|
|
model: str,
|
|
messages: Messages,
|
|
proxy: str = None,
|
|
timeout: int = 60,
|
|
cookies: Cookies = None,
|
|
conversation: Conversation = None,
|
|
return_conversation: bool = False,
|
|
**kwargs
|
|
) -> AsyncResult:
|
|
model = cls.validate_model(model)
|
|
retry_count = 0
|
|
|
|
while retry_count <= cls.max_retries:
|
|
try:
|
|
session_timeout = ClientTimeout(total=timeout)
|
|
async with ClientSession(timeout=session_timeout, cookies=cookies) as session:
|
|
# Step 1: Ensure we have the fe_version
|
|
if not cls._chat_xfe:
|
|
cls._chat_xfe = await cls.fetch_fe_version(session)
|
|
|
|
# Step 2: Initialize or update conversation
|
|
if conversation is None:
|
|
# Get initial cookies if not provided
|
|
if not cookies:
|
|
await cls.get_default_cookies(session)
|
|
|
|
# Create a new conversation
|
|
conversation = Conversation(model)
|
|
conversation.fe_version = cls._chat_xfe
|
|
|
|
# Step 3: Get VQD tokens
|
|
vqd, vqd_hash_1 = await cls.fetch_vqd_and_hash(session)
|
|
conversation.vqd = vqd
|
|
conversation.vqd_hash_1 = vqd_hash_1
|
|
conversation.message_history = [{"role": "user", "content": format_prompt(messages)}]
|
|
else:
|
|
# Update existing conversation with new message
|
|
last_message = get_last_user_message(messages.copy())
|
|
conversation.message_history.append({"role": "user", "content": last_message})
|
|
|
|
# Step 4: Prepare headers with proper x-vqd-hash-1
|
|
headers = {
|
|
"accept": "text/event-stream",
|
|
"accept-language": "en",
|
|
"cache-control": "no-cache",
|
|
"content-type": "application/json",
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
|
|
"origin": "https://duckduckgo.com",
|
|
"referer": "https://duckduckgo.com/",
|
|
"pragma": "no-cache",
|
|
"priority": "u=1, i",
|
|
"sec-ch-ua": '"Not:A-Brand";v="24", "Chromium";v="134"',
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": '"Linux"',
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "same-origin",
|
|
"x-fe-version": conversation.fe_version or cls._chat_xfe,
|
|
"x-vqd-4": conversation.vqd,
|
|
}
|
|
|
|
# For the first request, send an empty x-vqd-hash-1 header
|
|
# This matches the behavior in the duckduckgo_search module
|
|
headers["x-vqd-hash-1"] = ""
|
|
|
|
# Step 5: Prepare the request data
|
|
# Convert the user-friendly model name to the API model name
|
|
api_model = cls._chat_models.get(model, model)
|
|
|
|
data = {
|
|
"model": api_model,
|
|
"messages": conversation.message_history,
|
|
}
|
|
|
|
# Step 6: Send the request
|
|
await cls.sleep(multiplier=1.0 + retry_count * 0.5)
|
|
async with session.post(cls.api_endpoint, json=data, headers=headers, proxy=proxy) as response:
|
|
# Handle 429 and 418 errors specifically
|
|
if response.status == 429:
|
|
response_text = await response.text()
|
|
|
|
if retry_count < cls.max_retries:
|
|
retry_count += 1
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
|
|
# Get fresh tokens and cookies
|
|
cookies = await cls.get_default_cookies(session)
|
|
continue
|
|
else:
|
|
raise RateLimitError(f"Rate limited after {cls.max_retries} retries")
|
|
elif response.status == 418:
|
|
# Check if it's a challenge error
|
|
try:
|
|
response_text = await response.text()
|
|
try:
|
|
response_json = json.loads(response_text)
|
|
|
|
# Extract challenge data if available
|
|
challenge_data = None
|
|
if response_json.get("type") == "ERR_CHALLENGE" and "cd" in response_json:
|
|
challenge_data = response_json["cd"]
|
|
|
|
if retry_count < cls.max_retries:
|
|
retry_count += 1
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
|
|
# Reset tokens and try again with fresh session
|
|
conversation = None
|
|
cls._chat_xfe = ""
|
|
|
|
# Get fresh cookies
|
|
cookies = await cls.get_default_cookies(session)
|
|
|
|
# If we have challenge data, try to use it
|
|
if challenge_data and isinstance(challenge_data, dict):
|
|
# Extract any useful information from challenge data
|
|
# This could be used to build a better response in the future
|
|
pass
|
|
|
|
continue
|
|
else:
|
|
raise DuckDuckGoChallengeError(f"Challenge error after {cls.max_retries} retries")
|
|
except json.JSONDecodeError:
|
|
# If we can't parse the JSON, assume it's a challenge error anyway
|
|
if retry_count < cls.max_retries:
|
|
retry_count += 1
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
|
|
# Reset tokens and try again with fresh session
|
|
conversation = None
|
|
cls._chat_xfe = ""
|
|
cookies = await cls.get_default_cookies(session)
|
|
continue
|
|
else:
|
|
raise DuckDuckGoChallengeError(f"Challenge error after {cls.max_retries} retries")
|
|
except Exception as e:
|
|
# If any other error occurs during handling, still try to recover
|
|
if retry_count < cls.max_retries:
|
|
retry_count += 1
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
|
|
# Reset tokens and try again with fresh session
|
|
conversation = None
|
|
cls._chat_xfe = ""
|
|
cookies = await cls.get_default_cookies(session)
|
|
continue
|
|
else:
|
|
raise DuckDuckGoChallengeError(f"Challenge error after {cls.max_retries} retries: {str(e)}")
|
|
|
|
# For other status codes, use the standard error handler
|
|
await raise_for_status(response)
|
|
reason = None
|
|
full_message = ""
|
|
|
|
# Step 7: Process the streaming response
|
|
async for line in response.content:
|
|
line = line.decode("utf-8").strip()
|
|
|
|
if line.startswith("data:"):
|
|
try:
|
|
message = json.loads(line[5:].strip())
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if "action" in message and message["action"] == "error":
|
|
error_type = message.get("type", "")
|
|
if message.get("status") == 429:
|
|
if error_type == "ERR_CONVERSATION_LIMIT":
|
|
raise ConversationLimitError(error_type)
|
|
raise RateLimitError(error_type)
|
|
elif message.get("status") == 418 and error_type == "ERR_CHALLENGE":
|
|
# Handle challenge error by refreshing tokens and retrying
|
|
if retry_count < cls.max_retries:
|
|
# Don't raise here, let the outer exception handler retry
|
|
raise DuckDuckGoChallengeError(f"Challenge detected: {error_type}")
|
|
raise DuckDuckGoSearchException(error_type)
|
|
|
|
if "message" in message:
|
|
if message["message"]:
|
|
yield message["message"]
|
|
full_message += message["message"]
|
|
reason = "length"
|
|
else:
|
|
reason = "stop"
|
|
|
|
# Step 8: Update conversation with response information
|
|
# Always update the VQD tokens from the response headers
|
|
conversation.vqd = response.headers.get("x-vqd-4", conversation.vqd)
|
|
conversation.vqd_hash_1 = response.headers.get("x-vqd-hash-1", conversation.vqd_hash_1)
|
|
|
|
# Update cookies
|
|
conversation.cookies = {
|
|
n: c.value
|
|
for n, c in session.cookie_jar.filter_cookies(URL(cls.url)).items()
|
|
}
|
|
|
|
# If requested, return the updated conversation
|
|
if return_conversation:
|
|
conversation.message_history.append({"role": "assistant", "content": full_message})
|
|
yield conversation
|
|
|
|
if reason is not None:
|
|
yield FinishReason(reason)
|
|
|
|
# If we got here, the request was successful
|
|
break
|
|
|
|
except (RateLimitError, ResponseStatusError, DuckDuckGoChallengeError) as e:
|
|
if ("429" in str(e) or isinstance(e, DuckDuckGoChallengeError)) and retry_count < cls.max_retries:
|
|
retry_count += 1
|
|
wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
|
|
await asyncio.sleep(wait_time)
|
|
|
|
# For challenge errors, refresh tokens and cookies
|
|
if isinstance(e, DuckDuckGoChallengeError):
|
|
# Reset conversation to force new token acquisition
|
|
conversation = None
|
|
# Clear class cache to force refresh
|
|
cls._chat_xfe = ""
|
|
else:
|
|
raise
|
|
except asyncio.TimeoutError as e:
|
|
raise TimeoutError(f"Request timed out: {str(e)}")
|
|
except Exception as e:
|
|
raise
|