hadadrjt commited on
Commit
b3cf31b
·
1 Parent(s): ec837b4

SearchGPT: Speed up process.

Browse files
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio[oauth,mcp]
2
- openai
 
 
1
  gradio[oauth,mcp]
2
+ openai
3
+ aiohttp[speedups]
src/engine/browser_engine.py CHANGED
@@ -3,14 +3,17 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
 
 
6
  import requests
 
7
  from config import CONTENT_EXTRACTION, SEARCH_SELECTION
8
  from src.core.web_loader import web_loader
9
 
10
  class BrowserEngine:
11
  def __init__(self, configuration):
12
  self.config = configuration
13
-
14
  def generate_headers(self):
15
  ipv4 = web_loader.get_ipv4()
16
  ipv6 = web_loader.get_ipv6()
@@ -45,44 +48,92 @@ class BrowserEngine:
45
  "X-Timezone": location['timezone']
46
  }
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def extract_page_content(self, target_url: str) -> str:
49
  try:
50
- headers = self.generate_headers()
51
- payload = {
52
- "url": target_url
53
- }
54
- request_response = requests.post(
55
- self.config.content_reader_api,
56
- data=payload,
57
- headers=headers,
58
- timeout=self.config.request_timeout,
59
- )
60
- request_response.raise_for_status()
61
- extracted_content = request_response.text
62
- return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
63
- except Exception as error:
64
- return f"Error reading URL: {str(error)}"
 
65
 
66
  def perform_search(self, search_query: str, search_provider: str = "google") -> str:
67
  try:
68
- headers = self.generate_headers()
69
-
70
- if search_provider == "baidu":
71
- full_url = f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={requests.utils.quote(search_query)}"
72
- headers["X-Target-Selector"] = "#content_left"
73
- else:
74
- provider_prefix = "!go" if search_provider == "google" else "!bi"
75
- encoded_query = requests.utils.quote(f"{provider_prefix} {search_query}")
76
- full_url = f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={encoded_query}"
77
- headers["X-Target-Selector"] = "#urls"
78
-
79
- search_response = requests.get(
80
- full_url,
81
- headers=headers,
82
- timeout=self.config.request_timeout
83
- )
84
- search_response.raise_for_status()
85
- search_results = search_response.text
86
- return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
87
- except Exception as error:
88
- return f"Error during search: {str(error)}"
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ import asyncio
7
+ import aiohttp
8
  import requests
9
+ from urllib.parse import quote
10
  from config import CONTENT_EXTRACTION, SEARCH_SELECTION
11
  from src.core.web_loader import web_loader
12
 
13
  class BrowserEngine:
14
  def __init__(self, configuration):
15
  self.config = configuration
16
+
17
  def generate_headers(self):
18
  ipv4 = web_loader.get_ipv4()
19
  ipv6 = web_loader.get_ipv6()
 
48
  "X-Timezone": location['timezone']
49
  }
50
 
51
+ def _build_search_url_and_selector(self, search_query: str, search_provider: str = "google"):
52
+ if search_provider == "baidu":
53
+ return (
54
+ f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
55
+ "#content_left"
56
+ )
57
+ provider_prefix = "!go" if search_provider == "google" else "!bi"
58
+ return (
59
+ f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
60
+ "#urls"
61
+ )
62
+
63
+ async def _async_post(self, url: str, data: dict, headers: dict):
64
+ timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
65
+ async with aiohttp.ClientSession(timeout=timeout) as session:
66
+ async with session.post(url, data=data, headers=headers) as response:
67
+ text = await response.text()
68
+ if response.status >= 400:
69
+ raise aiohttp.ClientResponseError(
70
+ request_info=response.request_info,
71
+ history=response.history,
72
+ status=response.status,
73
+ message=text,
74
+ headers=response.headers
75
+ )
76
+ return text
77
+
78
+ async def _async_get(self, url: str, headers: dict):
79
+ timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
80
+ async with aiohttp.ClientSession(timeout=timeout) as session:
81
+ async with session.get(url, headers=headers) as response:
82
+ text = await response.text()
83
+ if response.status >= 400:
84
+ raise aiohttp.ClientResponseError(
85
+ request_info=response.request_info,
86
+ history=response.history,
87
+ status=response.status,
88
+ message=text,
89
+ headers=response.headers
90
+ )
91
+ return text
92
+
93
+ def _sync_post(self, url: str, data: dict, headers: dict):
94
+ response = requests.post(url, data=data, headers=headers, timeout=self.config.request_timeout)
95
+ response.raise_for_status()
96
+ return response.text
97
+
98
+ def _sync_get(self, url: str, headers: dict):
99
+ response = requests.get(url, headers=headers, timeout=self.config.request_timeout)
100
+ response.raise_for_status()
101
+ return response.text
102
+
103
+ async def async_extract_page_content(self, target_url: str) -> str:
104
+ headers = self.generate_headers()
105
+ payload = {"url": target_url}
106
+ extracted_content = await self._async_post(self.config.content_reader_api, payload, headers)
107
+ return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
108
+
109
  def extract_page_content(self, target_url: str) -> str:
110
  try:
111
+ return asyncio.run(self.async_extract_page_content(target_url))
112
+ except Exception:
113
+ try:
114
+ headers = self.generate_headers()
115
+ payload = {"url": target_url}
116
+ extracted_content = self._sync_post(self.config.content_reader_api, payload, headers)
117
+ return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
118
+ except Exception as error:
119
+ return f"Error reading URL: {str(error)}"
120
+
121
+ async def async_perform_search(self, search_query: str, search_provider: str = "google") -> str:
122
+ headers = self.generate_headers()
123
+ full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
124
+ headers["X-Target-Selector"] = selector
125
+ search_results = await self._async_get(full_url, headers)
126
+ return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
127
 
128
  def perform_search(self, search_query: str, search_provider: str = "google") -> str:
129
  try:
130
+ return asyncio.run(self.async_perform_search(search_query, search_provider))
131
+ except Exception:
132
+ try:
133
+ headers = self.generate_headers()
134
+ full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
135
+ headers["X-Target-Selector"] = selector
136
+ search_results = self._sync_get(full_url, headers)
137
+ return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
138
+ except Exception as error:
139
+ return f"Error during search: {str(error)}"
 
 
 
 
 
 
 
 
 
 
 
src/processor/tools/interaction.py CHANGED
@@ -14,109 +14,212 @@ from config import MAX_TOKENS, REASONING_DELAY
14
 
15
  def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
16
  maximum_iterations = 1
 
 
17
  logs_generator = ""
18
  tool_results = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- for iteration_index in range(maximum_iterations):
21
- try:
22
- model_response = server.chat.completions.create(
23
- model=model_name,
24
- messages=conversation_messages,
25
- tools=tool_definitions,
26
- tool_choice="auto",
27
- max_tokens=MAX_TOKENS,
28
- temperature=0.6
29
- )
30
- except Exception:
31
- return conversation_messages, logs_generator, False
32
-
33
- response_choice = model_response.choices[0]
34
- assistant_message = response_choice.message
35
- formatted_assistant_message = assistant_response(assistant_message)
36
 
37
- conversation_messages.append(
38
- {
39
- "role": formatted_assistant_message["role"],
40
- "content": formatted_assistant_message["content"],
41
- "tool_calls": formatted_assistant_message["tool_calls"]
42
- }
43
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- pending_tool_calls = assistant_message.tool_calls or []
46
- if not pending_tool_calls:
47
- if logs_generator:
48
- logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
49
- return conversation_messages, logs_generator, False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- for tool_invocation in pending_tool_calls:
52
- tool_name = tool_invocation.function.name
53
- tool_arguments_raw = tool_invocation.function.arguments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
 
56
 
57
- if extraction_error:
58
- error_reasoning = tool_reasoning(tool_name, None, "error", error=extraction_error)
59
- for i in range(0, len(error_reasoning), 5):
60
- logs_generator = styles(reasoning_interfaces(error_reasoning, i), expanded=True)
61
- yield logs_generator
62
- time.sleep(REASONING_DELAY)
63
- logs_generator = styles(error_reasoning, expanded=True)
64
- yield logs_generator
65
- tool_execution_result = extraction_error
66
  else:
67
- reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
68
- for i in range(0, len(reasoning_status), 5):
69
- logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
70
- yield logs_generator
71
- time.sleep(REASONING_DELAY)
72
-
73
- reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
74
- for i in range(0, len(reasoning_start), 5):
75
- logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
76
  yield logs_generator
77
  time.sleep(REASONING_DELAY)
78
-
79
- try:
80
- tool_execution_result = invoke_tool_function(
81
- search_engine,
82
- tool_name,
83
- extracted_arguments
84
- )
85
- tool_results.append({
86
- "tool": tool_name,
87
- "arguments": extracted_arguments,
88
- "result": tool_execution_result
89
- })
90
-
91
- reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
92
- for i in range(0, len(reasoning_done), 5):
93
- logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
94
- yield logs_generator
95
- time.sleep(REASONING_DELAY)
96
- logs_generator = styles(reasoning_done, expanded=False)
97
- yield logs_generator
98
-
99
- except Exception as tool_error:
100
- error_reasoning = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
101
- for i in range(0, len(error_reasoning), 5):
102
- logs_generator = styles(reasoning_interfaces(error_reasoning, i), expanded=True)
103
- yield logs_generator
104
- time.sleep(REASONING_DELAY)
105
- logs_generator = styles(error_reasoning, expanded=True)
106
- yield logs_generator
107
- tool_execution_result = str(tool_error)
108
-
109
- conversation_messages.append(
110
- {
111
- "role": "tool",
112
- "tool_call_id": tool_invocation.id,
113
- "name": tool_name,
114
- "content": tool_execution_result
115
- }
116
- )
117
 
118
  if logs_generator:
119
  logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
120
 
121
- results_generator = len(tool_results) > 0
122
- return conversation_messages, logs_generator, results_generator
 
14
 
15
  def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
16
  maximum_iterations = 1
17
+ max_retry_limit = 10
18
+ retry_count = 0
19
  logs_generator = ""
20
  tool_results = []
21
+ execution_success = False
22
+ last_error = None
23
+ error_history = []
24
+ iteration_metrics = {
25
+ "attempts": 0,
26
+ "failures": 0,
27
+ "success_rate": 0,
28
+ "error_patterns": {},
29
+ "retry_delays": [
30
+ 0.5,
31
+ 1.0,
32
+ 1.5,
33
+ 2.0,
34
+ 2.5,
35
+ 3.0
36
+ ],
37
+ "backoff_multiplier": 1.5
38
+ }
39
 
40
+ while maximum_iterations <= max_retry_limit and not execution_success:
41
+ iteration_metrics["attempts"] += 1
42
+ current_iteration_successful = False
43
+ iteration_errors = []
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ for iteration_index in range(maximum_iterations):
46
+ try:
47
+ retry_delay = iteration_metrics["retry_delays"][min(retry_count, len(iteration_metrics["retry_delays"]) - 1)]
48
+ if retry_count > 0:
49
+ time.sleep(retry_delay * iteration_metrics["backoff_multiplier"])
50
+
51
+ model_response = server.chat.completions.create(
52
+ model=model_name,
53
+ messages=conversation_messages,
54
+ tools=tool_definitions,
55
+ tool_choice="auto",
56
+ max_tokens=MAX_TOKENS,
57
+ temperature=0.6
58
+ )
59
+
60
+ response_choice = model_response.choices[0]
61
+ assistant_message = response_choice.message
62
+ formatted_assistant_message = assistant_response(assistant_message)
63
+
64
+ conversation_messages.append(
65
+ {
66
+ "role": formatted_assistant_message["role"],
67
+ "content": formatted_assistant_message["content"],
68
+ "tool_calls": formatted_assistant_message["tool_calls"]
69
+ }
70
+ )
71
 
72
+ pending_tool_calls = assistant_message.tool_calls or []
73
+ if not pending_tool_calls:
74
+ if logs_generator:
75
+ logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
76
+ execution_success = True
77
+ current_iteration_successful = True
78
+ break
79
+
80
+ tool_execution_errors = []
81
+ for tool_invocation in pending_tool_calls:
82
+ tool_name = tool_invocation.function.name
83
+ tool_arguments_raw = tool_invocation.function.arguments
84
+
85
+ extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
86
+
87
+ if extraction_error:
88
+ error_key = f"{tool_name}_extraction"
89
+ iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
90
+ tool_execution_errors.append({
91
+ "tool": tool_name,
92
+ "error": extraction_error,
93
+ "type": "extraction"
94
+ })
95
+
96
+ reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
97
+ for i in range(0, len(reasoning_error), 20):
98
+ logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
99
+ yield logs_generator
100
+ time.sleep(REASONING_DELAY)
101
+ logs_generator = styles(reasoning_error, expanded=True)
102
+ yield logs_generator
103
+ tool_execution_result = extraction_error
104
+ else:
105
+ reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
106
+ for i in range(0, len(reasoning_status), 20):
107
+ logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
108
+ yield logs_generator
109
+ time.sleep(REASONING_DELAY)
110
+
111
+ reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
112
+ for i in range(0, len(reasoning_start), 20):
113
+ logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
114
+ yield logs_generator
115
+ time.sleep(REASONING_DELAY)
116
+
117
+ try:
118
+ tool_execution_result = invoke_tool_function(
119
+ search_engine,
120
+ tool_name,
121
+ extracted_arguments
122
+ )
123
+ tool_results.append({
124
+ "tool": tool_name,
125
+ "arguments": extracted_arguments,
126
+ "result": tool_execution_result,
127
+ "iteration": maximum_iterations,
128
+ "retry_count": retry_count
129
+ })
130
+
131
+ reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
132
+ for i in range(0, len(reasoning_done), 20):
133
+ logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
134
+ yield logs_generator
135
+ time.sleep(REASONING_DELAY)
136
+ logs_generator = styles(reasoning_done, expanded=False)
137
+ yield logs_generator
138
+
139
+ except Exception as tool_error:
140
+ error_key = f"{tool_name}_execution"
141
+ iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
142
+ tool_execution_errors.append({
143
+ "tool": tool_name,
144
+ "error": str(tool_error),
145
+ "type": "execution",
146
+ "arguments": extracted_arguments
147
+ })
148
+
149
+ reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
150
+ for i in range(0, len(reasoning_error), 20):
151
+ logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
152
+ yield logs_generator
153
+ time.sleep(REASONING_DELAY)
154
+ logs_generator = styles(reasoning_error, expanded=True)
155
+ yield logs_generator
156
+ tool_execution_result = str(tool_error)
157
 
158
+ conversation_messages.append(
159
+ {
160
+ "role": "tool",
161
+ "tool_call_id": tool_invocation.id,
162
+ "name": tool_name,
163
+ "content": tool_execution_result
164
+ }
165
+ )
166
+
167
+ if not tool_execution_errors:
168
+ execution_success = True
169
+ current_iteration_successful = True
170
+ break
171
+ else:
172
+ iteration_errors.extend(tool_execution_errors)
173
+
174
+ except Exception as model_error:
175
+ last_error = str(model_error)
176
+ error_history.append({
177
+ "iteration": maximum_iterations,
178
+ "error": last_error,
179
+ "timestamp": time.time()
180
+ })
181
+ iteration_metrics["failures"] += 1
182
+ iteration_errors.append({
183
+ "error": last_error,
184
+ "type": "model"
185
+ })
186
+
187
+ if current_iteration_successful:
188
+ execution_success = True
189
+ break
190
+ else:
191
+ if iteration_errors:
192
+ error_history.extend(iteration_errors)
193
 
194
+ retry_count += 1
195
+ previous_iterations = maximum_iterations
196
 
197
+ if iteration_metrics["error_patterns"]:
198
+ frequent_errors = max(iteration_metrics["error_patterns"].values())
199
+ if frequent_errors > 3:
200
+ maximum_iterations = min(maximum_iterations + 2, max_retry_limit)
201
+ else:
202
+ maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
 
 
 
203
  else:
204
+ maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
205
+
206
+ if maximum_iterations > previous_iterations:
207
+ retry_reasoning = f"Retrying with increased iterations: {maximum_iterations} (attempt {retry_count + 1})"
208
+ for i in range(0, len(retry_reasoning), 20):
209
+ logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
 
 
 
210
  yield logs_generator
211
  time.sleep(REASONING_DELAY)
212
+
213
+ if maximum_iterations >= max_retry_limit:
214
+ final_error = f"Maximum retry limit reached after {iteration_metrics['attempts']} attempts with {iteration_metrics['failures']} failures"
215
+ logs_generator = styles(final_error, expanded=True)
216
+ yield logs_generator
217
+ break
218
+
219
+ iteration_metrics["success_rate"] = (len(tool_results) / max(iteration_metrics["attempts"], 1)) * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  if logs_generator:
222
  logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
223
 
224
+ generator_results = len(tool_results) > 0
225
+ return conversation_messages, logs_generator, generator_results