diff --git a/g4f/Provider/Copilot.py b/g4f/Provider/Copilot.py index 4357c622..ccacb8d0 100644 --- a/g4f/Provider/Copilot.py +++ b/g4f/Provider/Copilot.py @@ -9,7 +9,7 @@ from urllib.parse import quote try: from curl_cffi.requests import AsyncSession - from curl_cffi import CurlWsFlag + from curl_cffi import CurlWsFlag, CurlMime has_curl_cffi = True except ImportError: has_curl_cffi = False @@ -20,7 +20,6 @@ except ImportError: has_nodriver = False from .base_provider import AsyncAuthedProvider, ProviderModelMixin -from .helper import format_prompt_max_length from .openai.har_file import get_headers, get_har_files from ..typing import AsyncResult, Messages, MediaListType from ..errors import MissingRequirementsError, NoValidHarFileError, MissingAuthError @@ -30,7 +29,7 @@ from ..requests import get_nodriver from ..image import to_bytes, is_accepted_format from .helper import get_last_user_message from ..files import get_bucket_dir -from ..tools.files import get_filenames, read_bucket +from ..tools.files import read_bucket from pathlib import Path from .. import debug @@ -46,8 +45,10 @@ def extract_bucket_items(messages: Messages) -> list[dict]: for message in messages: if isinstance(message, dict) and isinstance(message.get("content"), list): for content_item in message["content"]: - if isinstance(content_item, dict) and ("bucket_id" in content_item or "bucket" in content_item): + if isinstance(content_item, dict) and "bucket_id" in content_item and "name" not in content_item: bucket_items.append(content_item) + if message.get("role") == "assistant": + bucket_items = [] return bucket_items class Copilot(AsyncAuthedProvider, ProviderModelMixin): @@ -109,6 +110,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): media: MediaListType = None, conversation: BaseConversation = None, return_conversation: bool = True, + useridentitytype: str = "google", api_key: str = None, **kwargs ) -> AsyncResult: @@ -130,7 +132,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): cls._access_token, cls._cookies = await get_access_token_and_cookies(cls.url, proxy) else: raise h - websocket_url = f"{websocket_url}&accessToken={quote(cls._access_token)}" + websocket_url = f"{websocket_url}&accessToken={quote(cls._access_token)}&X-UserIdentityType={quote(useridentitytype)}" headers = {"authorization": f"Bearer {cls._access_token}"} async with AsyncSession( @@ -142,7 +144,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): ) as session: if cls._access_token is not None: cls._cookies = session.cookies.jar if hasattr(session.cookies, "jar") else session.cookies - response = await session.get("https://copilot.microsoft.com/c/api/user") + response = await session.get("https://copilot.microsoft.com/c/api/user?api-version=2", headers={"x-useridentitytype": useridentitytype}) if response.status_code == 401: raise MissingAuthError("Status 401: Invalid access token") response.raise_for_status() @@ -154,17 +156,13 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): else: debug.log(f"Copilot: User: {user}") if conversation is None: - response = await session.post(cls.conversation_url) + response = await session.post(cls.conversation_url, headers={"x-useridentitytype": useridentitytype}) response.raise_for_status() conversation_id = response.json().get("id") conversation = Conversation(conversation_id) - if prompt is None: - prompt = format_prompt_max_length(messages, 10000) debug.log(f"Copilot: Created conversation: {conversation_id}") else: conversation_id = conversation.conversation_id - if prompt is None: - prompt = get_last_user_message(messages) debug.log(f"Copilot: Use conversation: {conversation_id}") if return_conversation: yield conversation @@ -180,6 +178,7 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): headers={ "content-type": is_accepted_format(data), "content-length": str(len(data)), + "x-useridentitytype": useridentitytype }, data=data ) @@ -191,69 +190,33 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): bucket_items = extract_bucket_items(messages) for item in bucket_items: try: - if "name" in item: - # Handle specific file from bucket with name - file_path = Path(get_bucket_dir(item["bucket_id"], "media", item["name"])) - if file_path.exists() and file_path.is_file(): - with open(file_path, "rb") as f: - file_data = f.read() - - filename = item["name"] - # Determine content type based on file extension - content_type = "application/octet-stream" - if filename.endswith(".pdf"): - content_type = "application/pdf" - elif filename.endswith(".docx"): - content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - elif filename.endswith(".txt"): - content_type = "text/plain" - elif filename.endswith(".md"): - content_type = "text/markdown" - elif filename.endswith(".json"): - content_type = "application/json" - - response = await session.post( - "https://copilot.microsoft.com/c/api/attachments", - headers={ - "content-type": content_type, - "content-length": str(len(file_data)), - }, - data=file_data - ) - response.raise_for_status() - file_url = response.json().get("url") - uploaded_attachments.append({"type": "file", "url": file_url, "name": filename}) - debug.log(f"Copilot: Uploaded bucket file: {filename}") - else: - debug.log(f"Copilot: Bucket file not found: {item.get('name')}") - else: - # Handle plain text content from bucket - bucket_path = Path(get_bucket_dir(item["bucket"])) - plain_text_content = "" - for text_chunk in read_bucket(bucket_path): - plain_text_content += text_chunk - - if plain_text_content.strip(): + # Handle plain text content from bucket + bucket_path = Path(get_bucket_dir(item["bucket_id"])) + for text_chunk in read_bucket(bucket_path): + if text_chunk.strip(): # Upload plain text as a text file - text_data = plain_text_content.encode('utf-8') + text_data = text_chunk.encode('utf-8') + data = CurlMime() + data.addpart("file", filename=f"bucket_{item['bucket_id']}.txt", content_type="text/plain", data=text_data) response = await session.post( "https://copilot.microsoft.com/c/api/attachments", - headers={ - "content-type": "text/plain", - "content-length": str(len(text_data)), - }, - data=text_data + multipart=data, + headers={"x-useridentitytype": useridentitytype} ) response.raise_for_status() - file_url = response.json().get("url") - uploaded_attachments.append({"type": "file", "url": file_url, "name": f"bucket_{item['bucket']}.txt"}) - debug.log(f"Copilot: Uploaded bucket text content: {item['bucket']}") + data = response.json() + uploaded_attachments.append({"type": "document", "attachmentId": data.get("id")}) + debug.log(f"Copilot: Uploaded bucket text content: {item['bucket_id']}") else: - debug.log(f"Copilot: No text content found in bucket: {item['bucket']}") + debug.log(f"Copilot: No text content found in bucket: {item['bucket_id']}") except Exception as e: - debug.log(f"Copilot: Failed to upload bucket item {item}: {e}") + debug.log(f"Copilot: Failed to upload bucket item: {item}") + debug.error(e) - wss = await session.ws_connect(cls.websocket_url, timeout=3) + if prompt is None: + prompt = get_last_user_message(messages, False) + + wss = await session.ws_connect(websocket_url, timeout=3) if "Think" in model: mode = "reasoning" elif model.startswith("gpt-5") or "GPT-5" in model: @@ -317,32 +280,32 @@ class Copilot(AsyncAuthedProvider, ProviderModelMixin): if not wss.closed: await wss.close() -async def get_access_token_and_cookies(url: str, proxy: str = None, target: str = "ChatAI",): - browser, stop_browser = await get_nodriver(proxy=proxy, user_data_dir="copilot") +async def get_access_token_and_cookies(url: str, proxy: str = None): + browser, stop_browser = await get_nodriver(proxy=proxy) try: page = await browser.get(url) access_token = None while access_token is None: - access_token = await page.evaluate(""" - (() => { - for (var i = 0; i < localStorage.length; i++) { - try { - item = JSON.parse(localStorage.getItem(localStorage.key(i))); - if (item.credentialType == "AccessToken" - && item.expiresOn > Math.floor(Date.now() / 1000) - && item.target.includes("target")) { - return item.secret; - } - } catch(e) {} - } - })() - """.replace('"target"', json.dumps(target))) - if access_token is None: - await asyncio.sleep(1) + for _ in range(2): + await asyncio.sleep(3) + access_token = await page.evaluate(""" + (() => { + for (var i = 0; i < localStorage.length; i++) { + try { + item = JSON.parse(localStorage.getItem(localStorage.key(i))); + if (item?.body?.access_token) { + return item.body.access_token; + } + } catch(e) {} + } + })() + """) + if access_token is None: + await asyncio.sleep(1) cookies = {} for c in await page.send(nodriver.cdp.network.get_cookies([url])): cookies[c.name] = c.value - await page.close() + stop_browser() return access_token, cookies finally: stop_browser() diff --git a/g4f/providers/helper.py b/g4f/providers/helper.py index aaa97253..0c98b789 100644 --- a/g4f/providers/helper.py +++ b/g4f/providers/helper.py @@ -66,15 +66,23 @@ def format_prompt(messages: Messages, add_special_tokens: bool = False, do_conti def get_system_prompt(messages: Messages) -> str: return "\n".join([m["content"] for m in messages if m["role"] in ("developer", "system")]) -def get_last_user_message(messages: Messages) -> str: +def get_last_user_message(messages: Messages, include_buckets: bool = True) -> str: user_messages = [] for message in messages[::-1]: if message.get("role") == "user" or not user_messages: if message.get("role") != "user": continue - content = to_string(message.get("content")).strip() - if content: + content = message.get("content") + if include_buckets: + content = to_string(content).strip() + if isinstance(content, str): user_messages.append(content) + else: + for content_item in content: + if content_item.get("type") == "text": + content = content_item.get("text").strip() + if content: + user_messages.append(content) else: return "\n".join(user_messages[::-1]) return "\n".join(user_messages[::-1])