Spaces:

OEvortex
/

Webscout-API

Paused

App Files Files Community

Abhaykoul commited on Jul 18, 2024

Commit

25af675

verified ·

1 Parent(s): 0e6e27c

Update webscout.py

Browse files

Files changed (1) hide show

webscout.py +51 -31

webscout.py CHANGED Viewed

@@ -5,11 +5,14 @@ from datetime import datetime, timezone
 from decimal import Decimal
 from functools import cached_property
 from itertools import cycle, islice
 from threading import Event
 from types import TracebackType
 from typing import Dict, List, Optional, Tuple, Type, Union, cast
-import pyreqwest_impersonate as pri  # type: ignore
 try:
     from lxml.etree import _Element
@@ -26,29 +29,33 @@ from html import unescape
 from math import atan2, cos, radians, sin, sqrt
 from typing import Any, Dict, List, Union
 from urllib.parse import unquote
-import orjson
-import requests
-import base64
-from typing import List, Dict, Union
-import json
-import requests
-import base64
-from typing import List, Dict, Union
 REGEX_STRIP_TAGS = re.compile("<.*?>")
 def json_dumps(obj: Any) -> str:
     try:
-        return orjson.dumps(obj).decode("utf-8")
     except Exception as ex:
         raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
 def json_loads(obj: Union[str, bytes]) -> Any:
     try:
-        return orjson.loads(obj)
     except Exception as ex:
         raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
@@ -101,8 +108,6 @@ def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decim
     c = 2 * atan2(sqrt(a), sqrt(1 - a))
     return R * c
-logger = logging.getLogger("webscout.WEBS")
 class WebscoutE(Exception):
     """Base exception class for search."""
@@ -121,14 +126,26 @@ class AllProvidersFailure(Exception):
     """None of the providers generated response successfully"""
     pass
 class WEBS:
     """webscout class to get search results from duckduckgo.com."""
     _executor: ThreadPoolExecutor = ThreadPoolExecutor()
     def __init__(
         self,
-        headers: Optional[Dict[str, str]] = {'0': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', '1': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', '2': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', '3': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', '4': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', '5': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62', '6': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0', '7': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Mobile/15E148 Safari/605.1.15', '8': 'Mozilla/5.0 (iPad; CPU OS 16_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Mobile/15E148 Safari/605.1.15', '9': 'Mozilla/5.0 (Android 13; Mobile; rv:109.0) Gecko/109.0 Firefox/109.0', '10': 'Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '11': 'Mozilla/5.0 (Linux; U; Android 11; en-us; SM-G991U) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/89.0.4387.119 Mobile Safari/537.36', '12': 'Mozilla/5.0 (Linux; Android 12; SM-G998U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '13': 'Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '14': 'Mozilla/5.0 (Linux; Android 12; LM-G900V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '15': 'Mozilla/5.0 (Linux; Android 11; SM-G975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '16': 'Mozilla/5.0 (Linux; Android 11; SM-N975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '17': 'Mozilla/5.0 (Linux; Android 13; SM-S918U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36', '18': 'Mozilla/5.0 (Linux; Android 13; SM-F936U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36'},
         proxy: Optional[str] = None,
         proxies: Union[Dict[str, str], str, None] = None,  # deprecated
         timeout: Optional[int] = 10,
@@ -152,9 +169,9 @@ class WEBS:
             headers=self.headers,
             proxy=self.proxy,
             timeout=timeout,
-            cookie_store=False,
             referer=True,
-            impersonate="chrome_124",
             follow_redirects=False,
             verify=False,
         )
@@ -208,13 +225,14 @@ class WEBS:
         resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
         return _extract_vqd(resp_content, keywords)
-    def chat(self, keywords: str, model: str = "gpt-3.5") -> str:
         """Initiates a chat session with DuckDuckGo AI.
         Args:
             keywords (str): The initial message or question to send to the AI.
             model (str): The model to use: "gpt-3.5", "claude-3-haiku", "llama-3-70b", "mixtral-8x7b".
                 Defaults to "gpt-3.5".
         Returns:
             str: The response from the AI.
@@ -237,18 +255,16 @@ class WEBS:
             "messages": self._chat_messages,
         }
         resp = self.client.post(
-            "https://duckduckgo.com/duckchat/v1/chat", headers={"x-vqd-4": self._chat_vqd}, json=json_data
         )
         self._chat_vqd = resp.headers.get("x-vqd-4", "")
-        messages = []
-        for line in resp.text.replace("data: ", "").replace("[DONE]", "").split("\n\n"):
-            x = line.strip()
-            if x:
-                j = json_loads(x)
-                message = j.get("message", "")
-                messages.append(message)
-        result = "".join(messages)
         self._chat_messages.append({"role": "assistant", "content": result})
         return result
@@ -435,7 +451,7 @@ class WEBS:
             for e in elements:
                 if isinstance(e, _Element):
                     hrefxpath = e.xpath("./a/@href")
-                    href = str(hrefxpath[0]) if isinstance(hrefxpath, List) else None
                     if (
                         href
                         and href not in cache
@@ -445,9 +461,9 @@ class WEBS:
                     ):
                         cache.add(href)
                         titlexpath = e.xpath("./h2/a/text()")
-                        title = str(titlexpath[0]) if isinstance(titlexpath, List) else ""
                         bodyxpath = e.xpath("./a//text()")
-                        body = "".join(str(x) for x in bodyxpath) if isinstance(bodyxpath, List) else ""
                         result = {
                             "title": _normalize(title),
                             "href": _normalize_url(href),
@@ -537,10 +553,14 @@ class WEBS:
                         else:
                             cache.add(href)
                             titlexpath = e.xpath(".//a//text()")
-                            title = str(titlexpath[0]) if isinstance(titlexpath, List) else ""
                     elif i == 2:
                         bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
-                        body = "".join(str(x) for x in bodyxpath) if isinstance(bodyxpath, List) else ""
                         if href:
                             result = {
                                 "title": _normalize(title),

 from decimal import Decimal
 from functools import cached_property
 from itertools import cycle, islice
+from random import choice
 from threading import Event
 from types import TracebackType
 from typing import Dict, List, Optional, Tuple, Type, Union, cast
+import pyreqwest_impersonate as pri
 try:
     from lxml.etree import _Element
 from math import atan2, cos, radians, sin, sqrt
 from typing import Any, Dict, List, Union
 from urllib.parse import unquote
+from .exceptions import WebscoutE
+try:
+    HAS_ORJSON = True
+    import orjson
+except ImportError:
+    HAS_ORJSON = False
+    import json
 REGEX_STRIP_TAGS = re.compile("<.*?>")
 def json_dumps(obj: Any) -> str:
     try:
+        return (
+            orjson.dumps(obj, option=orjson.OPT_INDENT_2).decode()
+            if HAS_ORJSON
+            else json.dumps(obj, ensure_ascii=False, indent=2)
+        )
     except Exception as ex:
         raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
 def json_loads(obj: Union[str, bytes]) -> Any:
     try:
+        return orjson.loads(obj) if HAS_ORJSON else json.loads(obj)
     except Exception as ex:
         raise WebscoutE(f"{type(ex).__name__}: {ex}") from ex
     c = 2 * atan2(sqrt(a), sqrt(1 - a))
     return R * c
 class WebscoutE(Exception):
     """Base exception class for search."""
     """None of the providers generated response successfully"""
     pass
+logger = logging.getLogger("webscout.WEBS")
 class WEBS:
     """webscout class to get search results from duckduckgo.com."""
     _executor: ThreadPoolExecutor = ThreadPoolExecutor()
+    _impersonates = (
+        "chrome_99", "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_108",
+        "chrome_107", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118", "chrome_119",
+        "chrome_120", #"chrome_123", "chrome_124", "chrome_126",
+        "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_15.3", "safari_15.5",
+        "safari_15.6.1", "safari_16", "safari_16.5", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
+        #"okhttp_3.9", "okhttp_3.11", "okhttp_3.13", "okhttp_3.14", "okhttp_4.9", "okhttp_4.10", "okhttp_5",
+        "edge_99", "edge_101", "edge_122",
+    )  # fmt: skip
     def __init__(
         self,
+        headers: Optional[Dict[str, str]] = None,
         proxy: Optional[str] = None,
         proxies: Union[Dict[str, str], str, None] = None,  # deprecated
         timeout: Optional[int] = 10,
             headers=self.headers,
             proxy=self.proxy,
             timeout=timeout,
+            cookie_store=True,
             referer=True,
+            impersonate=choice(self._impersonates),
             follow_redirects=False,
             verify=False,
         )
         resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords})
         return _extract_vqd(resp_content, keywords)
+    def chat(self, keywords: str, model: str = "gpt-3.5", timeout: int = 20) -> str:
         """Initiates a chat session with DuckDuckGo AI.
         Args:
             keywords (str): The initial message or question to send to the AI.
             model (str): The model to use: "gpt-3.5", "claude-3-haiku", "llama-3-70b", "mixtral-8x7b".
                 Defaults to "gpt-3.5".
+            timeout (int): Timeout value for the HTTP client. Defaults to 20.
         Returns:
             str: The response from the AI.
             "messages": self._chat_messages,
         }
         resp = self.client.post(
+            "https://duckduckgo.com/duckchat/v1/chat",
+            headers={"x-vqd-4": self._chat_vqd},
+            json=json_data,
+            timeout=timeout,
         )
         self._chat_vqd = resp.headers.get("x-vqd-4", "")
+        data = ",".join(x for line in resp.text.rstrip("[DONE]\n").split("data:") if (x := line.strip()))
+        result = "".join(x.get("message", "") for x in json_loads("[" + data + "]"))
         self._chat_messages.append({"role": "assistant", "content": result})
         return result
             for e in elements:
                 if isinstance(e, _Element):
                     hrefxpath = e.xpath("./a/@href")
+                    href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, List) else None
                     if (
                         href
                         and href not in cache
                     ):
                         cache.add(href)
                         titlexpath = e.xpath("./h2/a/text()")
+                        title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, List) else ""
                         bodyxpath = e.xpath("./a//text()")
+                        body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, List) else ""
                         result = {
                             "title": _normalize(title),
                             "href": _normalize_url(href),
                         else:
                             cache.add(href)
                             titlexpath = e.xpath(".//a//text()")
+                            title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, List) else ""
                     elif i == 2:
                         bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
+                        body = (
+                            "".join(str(x) for x in bodyxpath).strip()
+                            if bodyxpath and isinstance(bodyxpath, List)
+                            else ""
+                        )
                         if href:
                             result = {
                                 "title": _normalize(title),