bivalve commited on
Commit
05fba81
·
1 Parent(s): 36a1d88

added missing dependencies for wikipedia reader and transcription tools. also removed truncation of webpage extraction tool

Browse files
Files changed (2) hide show
  1. requirements.txt +3 -0
  2. tools.py +2 -2
requirements.txt CHANGED
@@ -11,3 +11,6 @@ pandas
11
  av
12
  yt-dlp
13
  beautifulsoup4
 
 
 
 
11
  av
12
  yt-dlp
13
  beautifulsoup4
14
+ torch
15
+ transformers
16
+ lxml
tools.py CHANGED
@@ -162,8 +162,8 @@ def extract_url_content(url_list: list[str]) -> str:
162
  if extract_results and 'results' in extract_results and len(extract_results['results']) > 0:
163
  for i, page_content in enumerate(extract_results['results']):
164
  del extract_results['results'][i]['images']
165
- if len(page_content['raw_content']) > 40000:
166
- extract_results['results'][i]['raw_content'] = page_content['raw_content'][:40000] + '... [truncated]'
167
  return json.dumps(extract_results['results'], indent=2)
168
  else:
169
  return f"No content could be extracted from the provided URLs: {url_list}"
 
162
  if extract_results and 'results' in extract_results and len(extract_results['results']) > 0:
163
  for i, page_content in enumerate(extract_results['results']):
164
  del extract_results['results'][i]['images']
165
+ # if len(page_content['raw_content']) > 40000:
166
+ # extract_results['results'][i]['raw_content'] = page_content['raw_content'][:40000] + '... [truncated]'
167
  return json.dumps(extract_results['results'], indent=2)
168
  else:
169
  return f"No content could be extracted from the provided URLs: {url_list}"