Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -309,10 +309,12 @@ def find_rss():
|
|
| 309 |
error_box_schema={"Name":"","Error":"","Keys":"","Other":""}
|
| 310 |
lod=""
|
| 311 |
out_box=[]
|
|
|
|
| 312 |
yield [],[(None,"loading sources")],None
|
| 313 |
with open ('raw_feed.json','r') as j:
|
| 314 |
cont = json.loads(j.read())
|
| 315 |
#print(cont)
|
|
|
|
| 316 |
for ea in cont:
|
| 317 |
try:
|
| 318 |
#lod=""
|
|
@@ -338,7 +340,7 @@ def find_rss():
|
|
| 338 |
except Exception as e:
|
| 339 |
lod=f'{rss_url} ::ERROR:: {e}'
|
| 340 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":1})
|
| 341 |
-
|
| 342 |
except Exception as e:
|
| 343 |
lod=f'{rss_url} ::ERROR:: {e}'
|
| 344 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":2})
|
|
@@ -348,8 +350,8 @@ def find_rss():
|
|
| 348 |
error_box.append({"Name":rss_url,"Error":f'Status Code:{r.status_code}',"Error Code":3})
|
| 349 |
pass
|
| 350 |
try:
|
| 351 |
-
print(lod['rss']['channel']['item'][0].keys())
|
| 352 |
-
print(lod['rss'].keys())
|
| 353 |
for i,ea in enumerate(lod['rss']['channel']['item']):
|
| 354 |
try:
|
| 355 |
r_link = ea['link']
|
|
@@ -357,6 +359,7 @@ def find_rss():
|
|
| 357 |
r_description = ea['description']
|
| 358 |
lods = {"title":r_title, "description":r_description,"link":r_link}
|
| 359 |
except Exception:
|
|
|
|
| 360 |
try:
|
| 361 |
r_link = ea['link']
|
| 362 |
r_title = ea['source']
|
|
@@ -383,12 +386,30 @@ def find_rss():
|
|
| 383 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":6})
|
| 384 |
print(f'Exception::{e}')
|
| 385 |
pass
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
|
|
|
| 388 |
#user_repo=save_data.split('datasets/',1)[1].split('/raw',1)[0]
|
| 389 |
timestamp=str(datetime.datetime.now())
|
| 390 |
timename=timestamp.replace(" ","--").replace(":","-").replace(".","-")
|
| 391 |
-
|
| 392 |
json_object = json.dumps(out_box)
|
| 393 |
#json_object = json.dumps(out_box,indent=4)
|
| 394 |
with open("tmp2.json", "w") as outfile:
|
|
@@ -401,6 +422,7 @@ def find_rss():
|
|
| 401 |
token=token_self,
|
| 402 |
repo_type="dataset",
|
| 403 |
)
|
|
|
|
| 404 |
yield out_box,[(None,f'Source is current as of:\n{timestamp} UTC\n\nThe current Date and Time is:\n{timestamp} UTC')],error_box
|
| 405 |
|
| 406 |
|
|
|
|
| 309 |
error_box_schema={"Name":"","Error":"","Keys":"","Other":""}
|
| 310 |
lod=""
|
| 311 |
out_box=[]
|
| 312 |
+
valid_box=[]
|
| 313 |
yield [],[(None,"loading sources")],None
|
| 314 |
with open ('raw_feed.json','r') as j:
|
| 315 |
cont = json.loads(j.read())
|
| 316 |
#print(cont)
|
| 317 |
+
j.close()
|
| 318 |
for ea in cont:
|
| 319 |
try:
|
| 320 |
#lod=""
|
|
|
|
| 340 |
except Exception as e:
|
| 341 |
lod=f'{rss_url} ::ERROR:: {e}'
|
| 342 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":1})
|
| 343 |
+
valid_box.append({"source":ea['NAME'],"link":ea['URL'],"section":ea['TOPIC'],"description":ea['DESCRIPTION']})
|
| 344 |
except Exception as e:
|
| 345 |
lod=f'{rss_url} ::ERROR:: {e}'
|
| 346 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":2})
|
|
|
|
| 350 |
error_box.append({"Name":rss_url,"Error":f'Status Code:{r.status_code}',"Error Code":3})
|
| 351 |
pass
|
| 352 |
try:
|
| 353 |
+
#print(lod['rss']['channel']['item'][0].keys())
|
| 354 |
+
#print(lod['rss'].keys())
|
| 355 |
for i,ea in enumerate(lod['rss']['channel']['item']):
|
| 356 |
try:
|
| 357 |
r_link = ea['link']
|
|
|
|
| 359 |
r_description = ea['description']
|
| 360 |
lods = {"title":r_title, "description":r_description,"link":r_link}
|
| 361 |
except Exception:
|
| 362 |
+
print(f"Exception::{ea}")
|
| 363 |
try:
|
| 364 |
r_link = ea['link']
|
| 365 |
r_title = ea['source']
|
|
|
|
| 386 |
error_box.append({"Name":rss_url,"Error":e,"Error Code":6})
|
| 387 |
print(f'Exception::{e}')
|
| 388 |
pass
|
| 389 |
+
json_object_valid = json.dumps(valid_box)
|
| 390 |
+
with open("tmp3.json", "w") as outfile3:
|
| 391 |
+
outfile3.write(json_object_valid)
|
| 392 |
+
api.upload_file(
|
| 393 |
+
path_or_fileobj="tmp3.json",
|
| 394 |
+
path_in_repo=f"/rss/valid-{timename}.json",
|
| 395 |
+
repo_id=reponame,
|
| 396 |
+
#repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0],
|
| 397 |
+
token=token_self,
|
| 398 |
+
repo_type="dataset",
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
yield out_box,[(None,'')],error_box
|
| 406 |
+
|
| 407 |
|
| 408 |
+
|
| 409 |
#user_repo=save_data.split('datasets/',1)[1].split('/raw',1)[0]
|
| 410 |
timestamp=str(datetime.datetime.now())
|
| 411 |
timename=timestamp.replace(" ","--").replace(":","-").replace(".","-")
|
| 412 |
+
print("DONE")
|
| 413 |
json_object = json.dumps(out_box)
|
| 414 |
#json_object = json.dumps(out_box,indent=4)
|
| 415 |
with open("tmp2.json", "w") as outfile:
|
|
|
|
| 422 |
token=token_self,
|
| 423 |
repo_type="dataset",
|
| 424 |
)
|
| 425 |
+
|
| 426 |
yield out_box,[(None,f'Source is current as of:\n{timestamp} UTC\n\nThe current Date and Time is:\n{timestamp} UTC')],error_box
|
| 427 |
|
| 428 |
|