Wgetter
Publicado por Fernando (última atualização em 02/05/2014)
[ Hits: 6.163 ]
Homepage: https://github.com/phoemur/
Esta é a minha implementação do Wget escrita em Python.
Se gostar você pode instalar o módulo em:
https://pypi.python.org/pypi/wgetter/
Ou contribuir no GitHub em:
https://github.com/phoemur/wgetter
#!/usr/bin/env python
"""
Wgetter is another command line download utility written completely in python.
It is based on python-wget (https://bitbucket.org/techtonik/python-wget/src) with some improvements.
It works on python >= 2.6 or python >=3.0 Runs on Windows or Linux or Mac
API Usage:
>>> import wgetter
>>> filename = wgetter.download('https://sites.google.com/site/doctormike/pacman-1.2.tar.gz', outdir='/home/user')
100 % [====================================================>] 19.9KiB / 19.9KiB 100.0KiB/s
>>> filename
'/home/user/pacman-1.2.tar.gz'
"""
import sys
import os
import shutil
import tempfile
import hashlib
from time import time
PY3K = sys.version_info >= (3, 0)
if PY3K:
import urllib.request as ulib
import urllib.parse as urlparse
else:
import urllib2 as ulib
import urlparse
SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'],
1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']}
def approximate_size(size, a_kilobyte_is_1024_bytes=True):
'''
Humansize.py from Dive into Python3 - Mark Pilgrim - http://www.diveintopython3.net/
Copyright (c) 2009, Mark Pilgrim, All rights reserved.
Convert a file size to human-readable form.
Keyword arguments:
size -- file size in bytes
a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024
if False, use multiples of 1000
Returns: string
'''
size = float(size)
if size < 0:
raise ValueError('number must be non-negative')
multiple = 1024 if a_kilobyte_is_1024_bytes else 1000
for suffix in SUFFIXES[multiple]:
size /= multiple
if size < multiple:
return '{0:.1f}{1}'.format(size, suffix)
raise ValueError('number too large')
def get_console_width():
"""Return width of available window area. Autodetection works for
Windows and POSIX platforms. Returns 80 for others
Code from http://bitbucket.org/techtonik/python-pager
"""
if os.name == 'nt':
STD_INPUT_HANDLE = -10
STD_OUTPUT_HANDLE = -11
STD_ERROR_HANDLE = -12
# get console handle
from ctypes import windll, Structure, byref
try:
from ctypes.wintypes import SHORT, WORD, DWORD
except ImportError:
# workaround for missing types in Python 2.5
from ctypes import (
c_short as SHORT, c_ushort as WORD, c_ulong as DWORD)
console_handle = windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE)
# CONSOLE_SCREEN_BUFFER_INFO Structure
class COORD(Structure):
_fields_ = [("X", SHORT), ("Y", SHORT)]
class SMALL_RECT(Structure):
_fields_ = [("Left", SHORT), ("Top", SHORT),
("Right", SHORT), ("Bottom", SHORT)]
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
_fields_ = [("dwSize", COORD),
("dwCursorPosition", COORD),
("wAttributes", WORD),
("srWindow", SMALL_RECT),
("dwMaximumWindowSize", DWORD)]
sbi = CONSOLE_SCREEN_BUFFER_INFO()
ret = windll.kernel32.GetConsoleScreenBufferInfo(
console_handle, byref(sbi))
if ret == 0:
return 0
return sbi.srWindow.Right + 1
elif os.name == 'posix':
from fcntl import ioctl
from termios import TIOCGWINSZ
from array import array
winsize = array("H", [0] * 4)
try:
ioctl(sys.stdout.fileno(), TIOCGWINSZ, winsize)
except IOError:
pass
return (winsize[1], winsize[0])[0]
return 80
CONSOLE_WIDTH = get_console_width()
# Need 2 spaces more to avoid linefeed on Windows
AVAIL_WIDTH = CONSOLE_WIDTH - 44 if os.name == 'nt' else CONSOLE_WIDTH - 42
def filename_from_url(url):
""":return: detected filename or None"""
fname = os.path.basename(urlparse.urlparse(url).path)
if len(fname.strip(" \n\t.")) == 0:
return None
return fname
def filename_from_headers(headers):
"""Detect filename from Content-Disposition headers if present.
http://greenbytes.de/tech/tc2231/
:param: headers as dict, list or string
:return: filename from content-disposition header or None
"""
if type(headers) == str:
headers = headers.splitlines()
if type(headers) == list:
headers = dict([x.split(':', 1) for x in headers])
cdisp = headers.get("Content-Disposition")
if not cdisp:
return None
cdtype = cdisp.split(';')
if len(cdtype) == 1:
return None
if cdtype[0].strip().lower() not in ('inline', 'attachment'):
return None
# several filename params is illegal, but just in case
fnames = [x for x in cdtype[1:] if x.strip().startswith('filename=')]
if len(fnames) > 1:
return None
name = fnames[0].split('=')[1].strip(' \t"')
name = os.path.basename(name)
if not name:
return None
return name
def filename_fix_existing(filename, dirname):
"""Expands name portion of filename with numeric ' (x)' suffix to
return filename that doesn't exist already.
"""
name, ext = filename.rsplit('.', 1)
names = [x for x in os.listdir(dirname) if x.startswith(name)]
names = [x.rsplit('.', 1)[0] for x in names]
suffixes = [x.replace(name, '') for x in names]
# filter suffixes that match ' (x)' pattern
suffixes = [x[2:-1] for x in suffixes
if x.startswith(' (') and x.endswith(')')]
indexes = [int(x) for x in suffixes
if set(x) <= set('0123456789')]
idx = 1
if indexes:
idx += sorted(indexes)[-1]
return '{0}({1}).{2}'.format(name, idx, ext)
def report_bar(bytes_so_far, chunk_size, total_size, speed):
'''
This callback for the download function is used to print the download bar
'''
percent = int(bytes_so_far * 100 / total_size)
current = approximate_size(bytes_so_far).center(9)
total = approximate_size(total_size).center(9)
shaded = int(float(bytes_so_far) / total_size * AVAIL_WIDTH)
sys.stdout.write(" {0}% [{1}{2}{3}]".format(str(percent).center(4), '=' * (shaded - 1), '>',
' ' * (AVAIL_WIDTH - shaded)) + "{0}/{1} {2}".format(current, total,
(approximate_size(speed) + '/s').center(12)))
sys.stdout.write("\r")
sys.stdout.flush()
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def report_unknown(bytes_so_far, chunk_size, total_size, speed):
'''
This callback for the download function is used when the total size is unknown
'''
sys.stdout.write("Downloading: {0} / Unknown - {1}/s\r".format(approximate_size(bytes_so_far),
approximate_size(speed)))
def report_onlysize(bytes_so_far, chunk_size, total_size, speed):
'''
This callback for the download function is used when console width is not enough to print the bar.
It prints only the sizes
'''
percent = int(bytes_so_far * 100 / total_size)
current = approximate_size(bytes_so_far).center(10)
total = approximate_size(total_size).center(10)
sys.stdout.write('D: {0}% -{1}/{2}\r'.format(percent, current, total))
def md5sum(filename, blocksize=8192):
'''
Returns the MD5 checksum of a file
'''
with open(filename, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(blocksize)
if not data:
break
m.update(data)
return m.hexdigest()
def download(link, outdir='.', chunk_size=4096):
'''
This is the Main function, which downloads a given link and saves on outdir (default = current directory)
'''
url = None
fh = None
bytes_so_far = 0
filename = filename_from_url(link) or "."
# get filename for temp file in current directory
(fd_tmp, tmpfile) = tempfile.mkstemp(
".tmp", prefix=filename + ".", dir=outdir)
os.close(fd_tmp)
os.unlink(tmpfile)
try:
url = ulib.urlopen(link)
fh = open(tmpfile, mode='wb')
headers = url.info()
try:
total_size = int(headers['Content-Length'])
except (ValueError, KeyError, TypeError):
total_size = 'unknown'
try:
md5_header = headers['Content-MD5']
except (ValueError, KeyError, TypeError):
md5_header = None
# Define which callback we're gonna use
if total_size != 'unknown':
if CONSOLE_WIDTH >= 45:
reporthook = report_bar
else:
reporthook = report_onlysize
else:
reporthook = report_unknown
# Below are the registers to calculate network transfer rate
time_register = time()
speed = 0.0
bytes_register = 0.0
# Loop that reads in chunks, calculates speed and does the callback to
# print the progress
while True:
chunk = url.read(chunk_size)
# Update Download Speed every 1 second
if time() - time_register > 1:
speed = (bytes_so_far - bytes_register) / \
(time() - time_register)
time_register = time() # Set register properly for future use
# Set register properly for future use
bytes_register = bytes_so_far
bytes_so_far += len(chunk)
if not chunk:
break
fh.write(chunk)
reporthook(bytes_so_far, chunk_size, total_size, speed)
except KeyboardInterrupt:
print('\n\nCtrl + C: Download aborted by user')
print('Partial downloaded file:\n{0}'.format(os.path.abspath(tmpfile)))
sys.exit(1)
finally:
if url:
url.close()
if fh:
fh.close()
filenamealt = filename_from_headers(headers)
if filenamealt:
filename = filenamealt
# add numeric '(x)' suffix if filename already exists
if os.path.exists(os.path.join(outdir, filename)):
filename = filename_fix_existing(filename, outdir)
filename = os.path.join(outdir, filename)
shutil.move(tmpfile, filename)
# Check if sizes matches
if total_size != 'unknown' and total_size != bytes_so_far:
print(
'\n\nWARNING!! Downloaded file size mismatches... Probably corrupted...')
# Check md5 if it was in html header
if md5_header:
print('\nValidating MD5 checksum...')
if md5_header == md5sum(filename):
print('MD5 checksum passed!')
else:
print('MD5 checksum do NOT passed!!!')
return filename
if __name__ == '__main__':
if len(sys.argv) == 1 or sys.argv[1] in {'-h', '--help'}:
print('Usage: {0} <URL>'.format(sys.argv[0]))
args = [str(elem) for elem in sys.argv[1:]]
for link in args:
print('Downloading ' + link)
filename = download(link)
print('\nSaved under {0}'.format(filename))
Enviando mensagens para celulares TIM
Preço do Dólar, Bitcoin e Euro em Python
Cotação do Dólar com requests_html
Como extrair chaves TOTP 2FA a partir de QRCODE (Google Authenticator)
Linux em 2025: Segurança prática para o usuário
Desktop Linux em alta: novos apps, distros e privacidade marcam o sábado
IA chega ao desktop e impulsiona produtividade no mundo Linux
Novos apps de produtividade, avanços em IA e distros em ebulição agitam o universo Linux
Digitando underscore com "shift" + "barra de espaços"
Como ativar a lixeira e recuperar aquivos deletados em um servidor Linux
Como mudar o nome de dispositivos Bluetooth via linha de comando
Alternativas ao Multilogin para gerenciamento de múltiplas contas/prof... (0)
Captação de áudio no zorin linux começa a diminuir com o tempo (0)
dpkg: erro: gatilho de arquivo duplicado chamado pelo arquivo de nome (0)
estou na 22.1 e não é atualizado pra 22.4 via "sudo full-upgrade&... (6)









