Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -48,7 +48,7 @@ def parse_action(string: str):
|
|
| 48 |
|
| 49 |
VERBOSE = True
|
| 50 |
MAX_HISTORY = 100
|
| 51 |
-
MAX_DATA =
|
| 52 |
|
| 53 |
def format_prompt(message, history):
|
| 54 |
prompt = "<s>"
|
|
@@ -289,7 +289,8 @@ def find_all(purpose,task,history, url):
|
|
| 289 |
print (f"trying URL:: {url}")
|
| 290 |
try:
|
| 291 |
if url != "" and url != None:
|
| 292 |
-
rawp = []
|
|
|
|
| 293 |
source = requests.get(url)
|
| 294 |
#source = urllib.request.urlopen(url).read()
|
| 295 |
soup = bs4.BeautifulSoup(source.content,'lxml')
|
|
@@ -303,18 +304,22 @@ def find_all(purpose,task,history, url):
|
|
| 303 |
print(soup.title.parent.name)
|
| 304 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
| 305 |
print([tag.name for tag in soup.find_all()])
|
| 306 |
-
rawp=soup
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
c=0
|
| 308 |
-
rl = len(
|
| 309 |
-
print(rl)
|
| 310 |
-
for
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
print (c)
|
| 315 |
if rl > MAX_DATA:
|
| 316 |
print("compressing...")
|
| 317 |
-
rawp = compress_data(
|
| 318 |
print (rawp)
|
| 319 |
history += "observation: the search results are:\n {}\n".format(rawp)
|
| 320 |
task = "complete?"
|
|
|
|
| 48 |
|
| 49 |
VERBOSE = True
|
| 50 |
MAX_HISTORY = 100
|
| 51 |
+
MAX_DATA = 1000
|
| 52 |
|
| 53 |
def format_prompt(message, history):
|
| 54 |
prompt = "<s>"
|
|
|
|
| 289 |
print (f"trying URL:: {url}")
|
| 290 |
try:
|
| 291 |
if url != "" and url != None:
|
| 292 |
+
#rawp = []
|
| 293 |
+
out = []
|
| 294 |
source = requests.get(url)
|
| 295 |
#source = urllib.request.urlopen(url).read()
|
| 296 |
soup = bs4.BeautifulSoup(source.content,'lxml')
|
|
|
|
| 304 |
print(soup.title.parent.name)
|
| 305 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
| 306 |
print([tag.name for tag in soup.find_all()])
|
| 307 |
+
rawp=(f'RAW TEXT RETURNED:\n*********\n{soup.text}\n*********\n')
|
| 308 |
+
out.append(rawp)
|
| 309 |
+
q=("a","p","span","content","article")
|
| 310 |
+
for p in soup.find_all(f'{q}'):
|
| 311 |
+
out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
|
| 312 |
c=0
|
| 313 |
+
rl = len(out)
|
| 314 |
+
print(f'rl:: {rl}')
|
| 315 |
+
for ea in out:
|
| 316 |
+
for i in str(ea):
|
| 317 |
+
if i == " " or i==",":
|
| 318 |
+
c +=1
|
| 319 |
+
print (f'c:: {c}')
|
| 320 |
if rl > MAX_DATA:
|
| 321 |
print("compressing...")
|
| 322 |
+
rawp = compress_data(c,purpose,task,out)
|
| 323 |
print (rawp)
|
| 324 |
history += "observation: the search results are:\n {}\n".format(rawp)
|
| 325 |
task = "complete?"
|