modm_data.dl.store
1# Copyright 2022, Niklas Hauser 2# SPDX-License-Identifier: MPL-2.0 3 4import os 5import shutil 6import logging 7import tempfile 8import subprocess 9from pathlib import Path 10from urllib.request import urlopen, Request 11 12 13LOGGER = logging.getLogger(__name__) 14_hdr = { 15 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 16 'Sec-Fetch-Site': 'none', 17 'Accept-Encoding': 'identity', 18 'Sec-Fetch-Mode': 'navigate', 19 'Host': 'www.st.com', 20 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', 21 'Accept-Language': 'en-GB,en;q=0.9', 22 'Sec-Fetch-Dest': 'document', 23 'Connection': 'keep-alive', 24} 25 26 27def download_data(url: str, encoding: str = None, errors: str = None) -> str: 28 """ 29 Download and decode the data of a URL. 30 31 :param url: URL to download 32 :param encoding: optional encoding to apply (default is `utf-8`) 33 :param errors: optional error handling (default is `ignore`) 34 :return: The data as a decoded string. 35 """ 36 LOGGER.debug(f"Downloading data from {url}") 37 cmd = f"curl '{url}' -L -s --max-time 120 -o - " + " ".join(f"-H '{k}: {v}'" for k,v in _hdr.items()) 38 data = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE).stdout 39 return data.decode(encoding=encoding or "utf-8", errors=errors or "ignore") 40 41 42def download_file(url: str, path: Path, overwrite: bool = False) -> bool: 43 """ 44 Download a file from a URL and copy it to a path, potentially overwriting an 45 existing file there. Creates directories if necessary. 46 47 :param url: File URL to download. 48 :param path: Path to copy the downloaded file to. 49 :param overwrite: If the file already exists, overwrite it. 50 :return: Whether the file was downloaded and copied. 51 """ 52 if not overwrite and path.exists(): 53 LOGGER.error(f"File {path} already exists!") 54 return False 55 if isinstance(path, Path): 56 path.parent.mkdir(parents=True, exist_ok=True) 57 LOGGER.debug(f"Downloading file from {url} to {path}") 58 cmd = f"curl '{url}' -L -s --max-time 60 -o {path} " + " ".join(f"-H '{k}: {v}'" for k,v in _hdr.items()) 59 return subprocess.call(cmd, shell=True) == 0 60 # with tempfile.NamedTemporaryFile() as outfile: 61 # os.system(f'wget -q --user-agent="{_hdr["User-Agent"]}" "{url}" -O {outfile.name}') 62 # shutil.copy(outfile.name, str(path)) 63 # This doesn't work with all PDFs, redirects maybe? 64 # with urlopen(Request(url, headers=_hdr)) as infile, \ 65 # tempfile.NamedTemporaryFile() as outfile: 66 # shutil.copyfileobj(infile, outfile) 67 # shutil.copy(outfile.name, str(path)) 68 # return True
LOGGER =
<Logger modm_data.dl.store (WARNING)>
def
download_data(url: str, encoding: str = None, errors: str = None) -> str:
28def download_data(url: str, encoding: str = None, errors: str = None) -> str: 29 """ 30 Download and decode the data of a URL. 31 32 :param url: URL to download 33 :param encoding: optional encoding to apply (default is `utf-8`) 34 :param errors: optional error handling (default is `ignore`) 35 :return: The data as a decoded string. 36 """ 37 LOGGER.debug(f"Downloading data from {url}") 38 cmd = f"curl '{url}' -L -s --max-time 120 -o - " + " ".join(f"-H '{k}: {v}'" for k,v in _hdr.items()) 39 data = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE).stdout 40 return data.decode(encoding=encoding or "utf-8", errors=errors or "ignore")
Download and decode the data of a URL.
Parameters
- url: URL to download
- encoding: optional encoding to apply (default is
utf-8
) - errors: optional error handling (default is
ignore
)
Returns
The data as a decoded string.
def
download_file(url: str, path: pathlib.Path, overwrite: bool = False) -> bool:
43def download_file(url: str, path: Path, overwrite: bool = False) -> bool: 44 """ 45 Download a file from a URL and copy it to a path, potentially overwriting an 46 existing file there. Creates directories if necessary. 47 48 :param url: File URL to download. 49 :param path: Path to copy the downloaded file to. 50 :param overwrite: If the file already exists, overwrite it. 51 :return: Whether the file was downloaded and copied. 52 """ 53 if not overwrite and path.exists(): 54 LOGGER.error(f"File {path} already exists!") 55 return False 56 if isinstance(path, Path): 57 path.parent.mkdir(parents=True, exist_ok=True) 58 LOGGER.debug(f"Downloading file from {url} to {path}") 59 cmd = f"curl '{url}' -L -s --max-time 60 -o {path} " + " ".join(f"-H '{k}: {v}'" for k,v in _hdr.items()) 60 return subprocess.call(cmd, shell=True) == 0 61 # with tempfile.NamedTemporaryFile() as outfile: 62 # os.system(f'wget -q --user-agent="{_hdr["User-Agent"]}" "{url}" -O {outfile.name}') 63 # shutil.copy(outfile.name, str(path)) 64 # This doesn't work with all PDFs, redirects maybe? 65 # with urlopen(Request(url, headers=_hdr)) as infile, \ 66 # tempfile.NamedTemporaryFile() as outfile: 67 # shutil.copyfileobj(infile, outfile) 68 # shutil.copy(outfile.name, str(path)) 69 # return True
Download a file from a URL and copy it to a path, potentially overwriting an existing file there. Creates directories if necessary.
Parameters
- url: File URL to download.
- path: Path to copy the downloaded file to.
- overwrite: If the file already exists, overwrite it.
Returns
Whether the file was downloaded and copied.