WebDatasets

Runtime error

App Files Files Community

awacke1 commited on Dec 9, 2023

Commit

66c145a

1 Parent(s): f8f0382

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -30

app.py CHANGED Viewed

@@ -6,12 +6,10 @@ import base64
 from bs4 import BeautifulSoup
 import hashlib
 import json
-import mimetypes
-import shutil
-from zipfile import ZipFile
-EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md', '.gitattributes', "backup.py", "Dockerfile"]
 if not os.path.exists("history.json"):
     with open("history.json", "w") as f:
         json.dump({}, f)
@@ -37,10 +35,12 @@ def download_html_and_files(url, subdir):
         file_url = urllib.parse.urljoin(base_url, link.get('href'))
         local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
         if not local_filename.endswith('/') and local_filename != subdir:
             link['href'] = local_filename
             download_file(file_url, local_filename)
     with open(os.path.join(subdir, "index.html"), "w") as file:
         file.write(str(soup))
@@ -48,6 +48,36 @@ def list_files(directory_path='.'):
     files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
     return [f for f in files if f not in EXCLUDED_FILES]
 def get_download_link(file):
     with open(file, "rb") as f:
         bytes = f.read()
@@ -55,35 +85,16 @@ def get_download_link(file):
         href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
     return href
-def delete_all_files():
-    for root, dirs, files in os.walk(".", topdown=False):
-        for name in files:
-            if name not in EXCLUDED_FILES:
-                os.remove(os.path.join(root, name))
-        for name in dirs:
-            shutil.rmtree(os.path.join(root, name))
-    st.success("All files and folders deleted successfully!")
-def create_zip_and_get_link():
-    zip_filename = "all_files.zip"
-    with ZipFile(zip_filename, 'w') as zipf:
-        for root, dirs, files in os.walk(".", topdown=False):
-            for file in files:
-                if file not in EXCLUDED_FILES and file != zip_filename:
-                    zipf.write(os.path.join(root, file))
-    with open(zip_filename, "rb") as f:
-        bytes = f.read()
-        b64 = base64.b64encode(bytes).decode()
-        href = f'<a href="data:file/zip;base64,{b64}" download=\'{zip_filename}\'>🔽 Download All Files</a>'
-        st.markdown(href, unsafe_allow_html=True)
 def main():
     st.sidebar.title('Web Datasets Bulk Downloader')
     url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
     with open("history.json", "r") as f:
         history = json.load(f)
     if url:
         subdir = hashlib.md5(url.encode()).hexdigest()
         if not os.path.exists(subdir):
@@ -101,13 +112,11 @@ def main():
         for subdir in history.values():
             show_download_links(subdir)
     with st.expander("URL History and Downloaded Files"):
         for url, subdir in history.items():
             st.markdown(f"#### {url}")
             show_download_links(subdir)
-    if st.sidebar.button('🗑️ Delete All'):
-        delete_all_files()
-    if st.sidebar.button('📦 Download All'):
-        create_zip_and_get_link

 from bs4 import BeautifulSoup
 import hashlib
 import json
+EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
+# Create a history.json file if it doesn't exist yet
 if not os.path.exists("history.json"):
     with open("history.json", "w") as f:
         json.dump({}, f)
         file_url = urllib.parse.urljoin(base_url, link.get('href'))
         local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
+        # Skip if the local filename is a directory
         if not local_filename.endswith('/') and local_filename != subdir:
             link['href'] = local_filename
             download_file(file_url, local_filename)
+    # Save the modified HTML content
     with open(os.path.join(subdir, "index.html"), "w") as file:
         file.write(str(soup))
     files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
     return [f for f in files if f not in EXCLUDED_FILES]
+def show_file_operations(file_path):
+    st.write(f"File: {os.path.basename(file_path)}")
+    # Edit button
+    if st.button(f"✏️ Edit {os.path.basename(file_path)}"):
+        with open(file_path, "r") as f:
+            file_content = f.read()
+        file_content = st.text_area("Edit the file content:", value=file_content, height=250)
+        if st.button(f"💾 Save {os.path.basename(file_path)}"):
+            with open(file_path, "w") as f:
+                f.write(file_content)
+            st.success(f"File {os.path.basename(file_path)} saved!")
+    # Delete button
+    if st.button(f"🗑️ Delete {os.path.basename(file_path)}"):
+        os.remove(file_path)
+        st.markdown(f"🎉 File {os.path.basename(file_path)} deleted!")
+def show_download_links(subdir):
+    st.write(f'Files for {subdir}:')
+    for file in list_files(subdir):
+        file_path = os.path.join(subdir, file)
+        if os.path.isfile(file_path):
+            st.markdown(get_download_link(file_path), unsafe_allow_html=True)
+            show_file_operations(file_path)
+        else:
+            st.write(f"File not found: {file}")
 def get_download_link(file):
     with open(file, "rb") as f:
         bytes = f.read()
         href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
     return href
 def main():
     st.sidebar.title('Web Datasets Bulk Downloader')
     url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
+    # Load history
     with open("history.json", "r") as f:
         history = json.load(f)
+    # Save the history of URL entered as a json file
     if url:
         subdir = hashlib.md5(url.encode()).hexdigest()
         if not os.path.exists(subdir):
         for subdir in history.values():
             show_download_links(subdir)
+    # Display history as markdown
     with st.expander("URL History and Downloaded Files"):
         for url, subdir in history.items():
             st.markdown(f"#### {url}")
             show_download_links(subdir)
+if __name__ == "__main__":
+    main()