Source code for pycharmers.utils.download_utils

# coding: utf-8
import os
import urllib

from .generic_utils import readable_bytes
from .monitor_utils import progress_reporthook_create
from .print_utils import pretty_3quote
from ._colorings import toBLUE, toRED, toGREEN

# Use Specific Opener
opener = urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

CONTENT_ENCODING2EXT = {
    "x-gzip"                    : ".gz",
    "image/jpeg"                : ".jpg",
    "image/jpx"                 : ".jpx", 
    "image/png"                 : ".png",
    "image/gif"                 : ".gif",
    "image/webp"                : ".webp",
    "image/x-canon-cr2"         : ".cr2",
    "image/tiff"                : ".tif",
    "image/bmp"                 : ".bmp",
    "image/vnd.ms-photo"        : ".jxr",
    "image/vnd.adobe.photoshop" : ".psd",
    "image/x-icon"              : ".ico",
    "image/heic"                : ".heic",
}

CONTENT_TYPE2EXT = {
    "application/epub+zip"                  : ".epub",
    "application/zip"                       : ".zip",
    "application/x-tar"                     : ".tar",
    "application/x-rar-compressed"          : ".rar",
    "application/gzip"                      : ".gz",
    "application/x-bzip2"                   : ".bz2",
    "application/x-7z-compressed"           : ".7z",
    "application/x-xz"                      : ".xz",
    "application/pdf"                       : ".pdf",
    "application/x-msdownload"              : ".exe",
    "application/x-shockwave-flash"         : ".swf",
    "application/rtf"                       : ".rtf",
    "application/octet-stream"              : ".eot",
    "application/postscript"                : ".ps",
    "application/x-sqlite3"                 : ".sqlite",
    "application/x-nintendo-nes-rom"        : ".nes",
    "application/x-google-chrome-extension" : ".crx",
    "application/vnd.ms-cab-compressed"     : ".cab",
    "application/x-deb"                     : ".deb",
    "application/x-unix-archive"            : ".ar",
    "application/x-compress"                : ".Z",
    "application/x-lzip"                    : ".lz",
}

[docs]def decide_extension(content_encoding=None, content_type=None, filename=None): """Decide File Extension based on ``content_encoding`` and ``content_type`` Args: content_encoding (str) : The MIME type of the resource or the data. content_type (str) : The Content-Encoding entity header is used to compress the media-type. filename (str) : The filename. Returns: ext (str): Starts with "." Examples: >>> from pycharmers.utils import decide_extension >>> decide_extension(content_encoding="x-gzip", content_type="application/zip") .gz >>> decide_extension(content_encoding="image/png", content_type=None) .png >>> decide_extension(content_encoding=None, content_type="application/pdf") .pdf """ ext = CONTENT_ENCODING2EXT.get(content_encoding) or CONTENT_TYPE2EXT.get(content_type) or "." + str(filename).split(".")[-1] return ext
[docs]def download_file(url, dirname=".", path=None, bar_width=20, verbose=True): """Download a file. Args: url (str) : File URL. dirname (str) : The directory where downloaded data will be saved. path (str) : path/to/downloaded_file bar_width (int) : The width of progress bar. verbose (bool) : Whether print verbose or not. Returns: path (str) : path/to/downloaded_file Examples: >>> from pycharmers.utils import download_file >>> download_file(url="https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml") Download a file from https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_eye.xml * Content-Encoding : None * Content-Length : (333.404296875, 'MB') * Content-Type : text/plain; charset=utf-8 * Save Destination : ./haarcascade_eye.xml haarcascade_eye.xml 100.0%[####################] 0.1[s] 5.5[GB/s] eta -0.0[s] './haarcascade_eye.xml' """ try: with urllib.request.urlopen(url) as web_file: # Get Information from webfile header headers = dict(web_file.headers._headers) content_encoding = headers.get("Content-Encoding") content_length, unit = readable_bytes(int(headers.get("Content-Length", 0))) content_length = f"{content_length:.1f} [{unit}]" content_type = headers.get("Content-Type") fn = url.split("/")[-1] if path is None: *name, ext = fn.split(".") name = ".".join(name) guessed_ext = decide_extension(content_encoding, content_type, fn) path = os.path.join(dirname, name+guessed_ext) if verbose: print(*pretty_3quote(f""" Download a file from {toBLUE(url)} * Content-Encoding : {toGREEN(content_encoding)} * Content-Length : {toGREEN(content_length)} * Content-Type : {toGREEN(content_type)} * Save Destination : {toBLUE(path)}""" )) _, res = urllib.request.urlretrieve(url=url, filename=path, reporthook=progress_reporthook_create(filename=fn, bar_width=bar_width, verbose=verbose)) except urllib.error.URLError as e: if verbose: print(f"{toRED(e)} : url={toBLUE(url)}") return path