Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """Download latest LanguageTool distribution.""" | |
| import logging | |
| import os | |
| import re | |
| import requests | |
| import subprocess | |
| import sys | |
| import tempfile | |
| import tqdm | |
| from typing import Optional | |
| import zipfile | |
| from distutils.spawn import find_executable | |
| from urllib.parse import urljoin | |
| from .utils import ( | |
| find_existing_language_tool_downloads, | |
| get_language_tool_download_path, | |
| LTP_JAR_DIR_PATH_ENV_VAR | |
| ) | |
| # Create logger for this file. | |
| logging.basicConfig(format='%(message)s') | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| # Get download host from environment or default. | |
| BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') | |
| FILENAME = 'LanguageTool-{version}.zip' | |
| LTP_DOWNLOAD_VERSION = '6.4' | |
| JAVA_VERSION_REGEX = re.compile( | |
| r'^(?:java|openjdk) version "(?P<major1>\d+)(|\.(?P<major2>\d+)\.[^"]+)"', | |
| re.MULTILINE) | |
| # Updated for later versions of java | |
| JAVA_VERSION_REGEX_UPDATED = re.compile( | |
| r'^(?:java|openjdk) [version ]?(?P<major1>\d+)\.(?P<major2>\d+)', | |
| re.MULTILINE) | |
| def parse_java_version(version_text): | |
| """Return Java version (major1, major2). | |
| >>> parse_java_version('''java version "1.6.0_65" | |
| ... Java(TM) SE Runtime Environment (build 1.6.0_65-b14-462-11M4609) | |
| ... Java HotSpot(TM) 64-Bit Server VM (build 20.65-b04-462, mixed mode)) | |
| ... ''') | |
| (1, 6) | |
| >>> parse_java_version(''' | |
| ... openjdk version "1.8.0_60" | |
| ... OpenJDK Runtime Environment (build 1.8.0_60-b27) | |
| ... OpenJDK 64-Bit Server VM (build 25.60-b23, mixed mode)) | |
| ... ''') | |
| (1, 8) | |
| """ | |
| match = ( | |
| re.search(JAVA_VERSION_REGEX, version_text) | |
| or re.search(JAVA_VERSION_REGEX_UPDATED, version_text) | |
| ) | |
| if not match: | |
| raise SystemExit( | |
| 'Could not parse Java version from """{}""".'.format(version_text)) | |
| major1 = int(match.group('major1')) | |
| major2 = int(match.group('major2')) if match.group('major2') else 0 | |
| return (major1, major2) | |
| def confirm_java_compatibility(): | |
| """ Confirms Java major version >= 8. """ | |
| java_path = find_executable('java') | |
| if not java_path: | |
| raise ModuleNotFoundError( | |
| 'No java install detected. ' | |
| 'Please install java to use language-tool-python.' | |
| ) | |
| output = subprocess.check_output([java_path, '-version'], | |
| stderr=subprocess.STDOUT, | |
| universal_newlines=True) | |
| major_version, minor_version = parse_java_version(output) | |
| # Some installs of java show the version number like `14.0.1` | |
| # and others show `1.14.0.1` | |
| # (with a leading 1). We want to support both, | |
| # as long as the major version is >= 8. | |
| # (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x) | |
| if major_version == 1 and minor_version >= 8: | |
| return True | |
| elif major_version >= 8: | |
| return True | |
| else: | |
| raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version)) | |
| def get_common_prefix(z): | |
| """Get common directory in a zip file if any.""" | |
| name_list = z.namelist() | |
| if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]): | |
| return name_list[0] | |
| return None | |
| def http_get(url, out_file, proxies=None): | |
| """ Get contents of a URL and save to a file. | |
| """ | |
| req = requests.get(url, stream=True, proxies=proxies) | |
| content_length = req.headers.get('Content-Length') | |
| total = int(content_length) if content_length is not None else None | |
| if req.status_code == 403: # Not found on AWS | |
| raise Exception('Could not find at URL {}.'.format(url)) | |
| progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, | |
| desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}') | |
| for chunk in req.iter_content(chunk_size=1024): | |
| if chunk: # filter out keep-alive new chunks | |
| progress.update(len(chunk)) | |
| out_file.write(chunk) | |
| progress.close() | |
| def unzip_file(temp_file, directory_to_extract_to): | |
| """ Unzips a .zip file to folder path. """ | |
| logger.info( | |
| 'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to) | |
| ) | |
| with zipfile.ZipFile(temp_file.name, 'r') as zip_ref: | |
| zip_ref.extractall(directory_to_extract_to) | |
| def download_zip(url, directory): | |
| """ Downloads and unzips zip file from `url` to `directory`. """ | |
| # Download file. | |
| downloaded_file = tempfile.NamedTemporaryFile(suffix='.zip', delete=False) | |
| http_get(url, downloaded_file) | |
| # Close the file so we can extract it. | |
| downloaded_file.close() | |
| # Extract zip file to path. | |
| unzip_file(downloaded_file, directory) | |
| # Remove the temporary file. | |
| os.remove(downloaded_file.name) | |
| # Tell the user the download path. | |
| logger.info('Downloaded {} to {}.'.format(url, directory)) | |
| def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION): | |
| confirm_java_compatibility() | |
| download_folder = get_language_tool_download_path() | |
| # Use the env var to the jar directory if it is defined | |
| # otherwise look in the download directory | |
| if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR): | |
| return | |
| # Make download path, if it doesn't exist. | |
| os.makedirs(download_folder, exist_ok=True) | |
| assert os.path.isdir(download_folder) | |
| old_path_list = find_existing_language_tool_downloads(download_folder) | |
| if language_tool_version: | |
| version = language_tool_version | |
| filename = FILENAME.format(version=version) | |
| language_tool_download_url = urljoin(BASE_URL, filename) | |
| dirname, _ = os.path.splitext(filename) | |
| extract_path = os.path.join(download_folder, dirname) | |
| if extract_path in old_path_list: | |
| return | |
| download_zip(language_tool_download_url, download_folder) | |
| if __name__ == '__main__': | |
| sys.exit(download_lt()) | |