本文整理汇总了Python中urlgrabber.grabber.URLGrabber.urlgrab方法的典型用法代码示例。如果您正苦于以下问题:Python URLGrabber.urlgrab方法的具体用法?Python URLGrabber.urlgrab怎么用?Python URLGrabber.urlgrab使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类urlgrabber.grabber.URLGrabber
的用法示例。
在下文中一共展示了URLGrabber.urlgrab方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: WebGrabber
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
class WebGrabber(Singleton):
g = None
def __init__(self,config = {}):
self.gotLibUrlGrabber = False
try:
from urlgrabber.grabber import URLGrabber
except:
writeError('This script is better with URLBrabber.')
writeError('See http://linux.duke.edu/projects/urlgrabber/')
self.gotLibUrlGrabber = False
if not self.gotLibUrlGrabber:
return
if config.has_key('proxy'):
writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
self.g = URLGrabber(proxies= {'http' : config['proxy']})
else:
writeInfo("URLGrabbersansProxy")
self.g = URLGrabber()
def getWebFile(self,url, dest):
if not self.gotLibUrlGrabber:
import urllib
fd = open(dest,"wb")
fd.write(urllib.urlopen(url).read())
fd.close()
else:
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
self.g.urlgrab(url, filename=dest)
示例2: fetchPackages
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def fetchPackages(self, destination=None):
"""Downloads packages to destination directory """
from urlgrabber.grabber import URLGrabber
from urlgrabber.progress import TextMeter
from os import path, chdir
if destination:
chdir(destination)
else:
chdir(self.dest_dir)
### URLGrabber objects ###
t = TextMeter()
g = URLGrabber(progress_obj=t)
### Start Iteration over list of packages' URIs ###
for uri in self.getPackageList():
pisifile = uri.split("/")[-1]
if path.exists(pisifile):
print pisifile, "--- No Update! Skipping..."
continue
try:
g.urlgrab(uri)
except:
print "Error while downloading file %s" % pisifile
break
print "Finished."
示例3: Fetcher
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
class Fetcher(object):
def __init__(self, remote):
self.remote = remote
self.g = URLGrabber(prefix=self.remote)
def fetch_to_file(self, src, dest):
tmp = dest + '.part'
try:
self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0')
except URLGrabError as e:
raise IOError(str(e))
os.rename(tmp, dest)
def fetch(self, src='/'):
try:
contents = self.g.urlread(src).strip()
except URLGrabError as e:
raise IOError(str(e))
return contents
def listdir(self, dir='/'):
lfn = os.path.join(dir, '.listing')
contents = self.fetch(lfn)
return [ s.strip() for s in contents.split() if s.strip() != '' ]
# Pickling support -- only pickle the remote URL
def __getstate__(self):
return self.remote
def __setstate__(self, remote):
self.__init__(remote)
示例4: update_categories
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def update_categories(username, subscriptions):
g = URLGrabber()
folder = BASE + '/' + username
if not os.path.exists(folder):
os.mkdir(folder)
cats = get_categories(username)
visited = set()
for sub in subscriptions:
if sub.name in visited:
continue
elif sub.name in cats:
del cats[sub.name]
visited.add(sub.name)
continue
else:
print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname)
ft = sub.thumbnail[-3:]
nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft)
g.urlgrab(sub.thumbnail, filename=nf)
for sub in cats:
print 'Removing thumbnail for %s'%sub
if cats[sub] is None:
old_fn = '%s/%s*'%(folder, sub)
else:
old_fn = '%s/%s/%s*'%(folder, cats[sub], sub)
for fl in glob.glob(old_fn):
print '\t', fl
os.remove(fl)
示例5: urlgrab
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def urlgrab(self, url, *args, **kwargs):
self.check_offline_mode()
# This is for older versions of urlgrabber which are packaged in Debian
# and Ubuntu and cannot handle filenames as a normal Python string but need
# a unicode string.
return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
示例6: downloadFile
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def downloadFile(url, filename, subdir):
BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename))
maxBytesPerSecond=0 # 2**19 ==> 0.5 MByte/s
# 0 ==> not restricted
grabber = URLGrabber( progress_obj=None
, throttle=maxBytesPerSecond
, reget='simple'
, retry=5
, retrycodes=[-1,4,5,6,7,12,14]
, timeout=30
, user_agent='bong download manager/1.0'
)
statinfo = os.stat(BongEnvironment.settings['recdir'])
targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir)
if not os.path.isdir(targetdir):
os.mkdir(targetdir)
if os.name == 'posix':
os.chmod(targetdir, 0777)
os.chown(targetdir, statinfo.st_uid, statinfo.st_gid)
targetfile = os.path.join(targetdir, filename)
t1 = time.time()
try:
local_filename = grabber.urlgrab(url, targetfile)
except URLGrabError, e:
BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile))
return False
示例7: _getTreeInfo
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def _getTreeInfo(self, url, proxy_url, sslverify):
""" Retrieve treeinfo and return the path to the local file.
:param baseurl: url of the repo
:type baseurl: string
:param proxy_url: Optional full proxy URL of or ""
:type proxy_url: string
:param sslverify: True if SSL certificate should be varified
:type sslverify: bool
:returns: Path to retrieved .treeinfo file or None
:rtype: string or None
"""
if not url:
return None
log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
url, proxy_url, sslverify)
ugopts = {"ssl_verify_peer": sslverify,
"ssl_verify_host": sslverify}
proxies = {}
if proxy_url:
try:
proxy = ProxyString(proxy_url)
proxies = {"http": proxy.url,
"https": proxy.url}
except ProxyStringError as e:
log.info("Failed to parse proxy for _getTreeInfo %s: %s",
proxy_url, e)
ug = URLGrabber()
try:
treeinfo = ug.urlgrab("%s/.treeinfo" % url,
"/tmp/.treeinfo", copy_local=True,
proxies=proxies, **ugopts)
except URLGrabError as e:
try:
treeinfo = ug.urlgrab("%s/treeinfo" % url,
"/tmp/.treeinfo", copy_local=True,
proxies=proxies, **ugopts)
except URLGrabError as e:
log.info("Error downloading treeinfo: %s", e)
treeinfo = None
return treeinfo
示例8: run
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def run(self):
#Check if file exists
if os.path.isfile(self.file):
os.chmod(self.file, stat.S_IWUSR)
os.remove(self.file)
##Init url/path pointers
#response = urllib2.urlopen(self.url)
#total_size = response.info().getheader('Content-Length').strip()
#self.total_size = int(total_size)
#freespace
#freespace = get_free_space(self.app, path)
#check if enough freespace
#if self.freespace < total_size and self.freespace != 0:
# self.app.gui.ShowDialogNotification('Not enough freespace to download the item')
# self.active = False
# return
self.app.gui.SetVisible(4000, True)
progress = TextMeter(self.app)
try:
Log(self.app, 'Download started' )
g = URLGrabber(reget='simple')
g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename)
#Create info file as json
json_dumps(self.infodata, self.infopath)
self.app.gui.ShowDialogNotification('Download Complete')
except:
Log(self.app, traceback.format_exc() )
self.app.gui.ShowDialogNotification('Error during download')
self.app.gui.SetVisible(4000, False)
self.active = False
Log(self.app, 'Download finished' )
示例9: download
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def download(url, filename=None, associated_task=None, web_proxy = None):
if associated_task:
associated_task.description = _("Downloading %s") % os.path.basename(url)
associated_task.unit = "KB"
log.debug("downloading %s > %s" % (url, filename))
progress_obj = DownloadProgress(associated_task)
if web_proxy:
web_proxy={'http':web_proxy}
urlgrabber = URLGrabber(
reget = 'simple',
proxies = web_proxy,
progress_obj = progress_obj)
if os.path.isdir(filename):
basename = os.path.basename(url)
filename = os.path.join(filename, basename)
filename = urlgrabber.urlgrab(url, filename=filename)
return filename
示例10: download_file
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def download_file(url, dirname):
"""
Download @url and save to @dirname.
@return - filename of saved file
"""
# pycurl is picky about Unicode URLs, see rhbz #515797
url = url.encode('ascii', 'ignore')
if not os.path.exists(dirname):
os.makedirs(dirname)
basename = os.path.basename(url)
filename = "%s/%s" % (dirname, basename)
if os.path.exists(filename):
raise Exception("File %s already exists! Not downloading!" % filename)
g = URLGrabber(reget=None)
local_filename = g.urlgrab(url, filename)
return local_filename
示例11: get_file_if_size_diff
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def get_file_if_size_diff(url, d):
fn = url.split('/')[-1]
out_fnp = os.path.join(d, fn)
g = URLGrabber(reget = "simple")
locFnp = g.urlgrab(url, out_fnp)
return locFnp
示例12: preInstall
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def preInstall(self, *args, **kwargs):
""" Download image and loopback mount it.
This is called after partitioning is setup, we now have space
to grab the image. Download it to ROOT_PATH and provide feedback
during the download (using urlgrabber callback).
"""
# Setup urlgrabber and call back to download image to ROOT_PATH
progress = URLGrabberProgress()
ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl,
"ssl_verify_host": not self.data.method.noverifyssl,
"proxies" : self._proxies,
"progress_obj" : progress,
"copy_local" : True}
error = None
try:
ug = URLGrabber()
ug.urlgrab(self.data.method.url, self.image_path, **ugopts)
except URLGrabError as e:
log.error("Error downloading liveimg: %s", e)
error = e
else:
if not os.path.exists(self.image_path):
error = "Failed to download %s, file doesn't exist" % self.data.method.url
log.error(error)
if error:
exn = PayloadInstallError(str(error))
if errorHandler.cb(exn) == ERROR_RAISE:
raise exn
# Used to make install progress % look correct
self._adj_size = os.stat(self.image_path)[stat.ST_SIZE]
if self.data.method.checksum:
progressQ.send_message(_("Checking image checksum"))
sha256 = hashlib.sha256()
with open(self.image_path, "rb") as f:
while True:
data = f.read(1024*1024)
if not data:
break
sha256.update(data)
filesum = sha256.hexdigest()
log.debug("sha256 of %s is %s", self.data.method.url, filesum)
if lowerASCII(self.data.method.checksum) != filesum:
log.error("%s does not match checksum.", self.data.method.checksum)
exn = PayloadInstallError("Checksum of image does not match")
if errorHandler.cb(exn) == ERROR_RAISE:
raise exn
# Mount the image and check to see if it is a LiveOS/*.img
# style squashfs image. If so, move it to IMAGE_DIR and mount the real
# root image on INSTALL_TREE
blivet.util.mount(self.image_path, INSTALL_TREE, fstype="auto", options="ro")
if os.path.exists(INSTALL_TREE+"/LiveOS"):
# Find the first .img in the directory and mount that on INSTALL_TREE
img_files = glob.glob(INSTALL_TREE+"/LiveOS/*.img")
if img_files:
img_file = os.path.basename(sorted(img_files)[0])
# move the mount to IMAGE_DIR
os.makedirs(IMAGE_DIR, 0755)
# work around inability to move shared filesystems
iutil.execWithRedirect("mount",
["--make-rprivate", "/"])
iutil.execWithRedirect("mount",
["--move", INSTALL_TREE, IMAGE_DIR])
blivet.util.mount(IMAGE_DIR+"/LiveOS/"+img_file, INSTALL_TREE,
fstype="auto", options="ro")
示例13: max
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
self.progress(("=" if self.started else "+") * max(0, self.count - oldCount), suffix)
self.started = True
def end(self, totalRead):
self.update(totalRead, "OK")
progressIndicator = ProgressIndicator()
grabber = URLGrabber(
reget="simple",
timeout=self.timeout,
progress_obj=progressIndicator,
user_agent=userAgent,
http_headers=tuple((str(cookie["name"]), str(cookie["value"])) for cookie in cookies),
)
try:
grabber.urlgrab(link, filename=targetFileName)
downloadOK = True
except URLGrabError, e:
self.errors += 1
self.logger.error("Download failed: %s", e)
except KeyboardInterrupt:
self.errors += 1
self.logger.error("Download interrupted")
if downloadOK:
localSize = getFileSize(targetFileName)
if not localSize:
self.errors += 1
downloadOK = False
self.logger.error("Downloaded file seems corrupt")
elif linkSize:
if localSize > linkSize:
示例14: writeInfo
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
if config.has_key('proxy'):
writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
self.g = URLGrabber(proxies= {'http' : config['proxy']})
else:
writeInfo("URLGrabbersansProxy")
self.g = URLGrabber()
def getWebFile(self,url, dest):
if not self.gotLibUrlGrabber:
import urllib
fd = open(dest,"wb")
fd.write(urllib.urlopen(url).read())
fd.close()
else:
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
self.g.urlgrab(url, filename=dest)
if __name__ == '__main__':
g = URLGrabber(proxies={'http' : 'http://proxy.free.fr:3128'})
url = 'http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip'
g.urlgrab(url, filename='moncul.zip')
g1 = WebGrabber(config={'proxy':'http://proxy.free.fr:3128'})
g2 = WebGrabber()
print "g1 is g2 %s" % (g1 is g2)
g1.getWebFile('http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip','moncul.zip')
print "Done."
示例15: urlparse
# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
try:
url_redirect = urllib2.urlopen(urljoin(options.gmb_url, url))
url_redirect_parsed = urlparse(url_redirect.geturl())
filename = url_redirect_parsed.path[1:] # Strip /
except urllib2.HTTPError, e:
if e.code == 403:
print 'A 403 Forbidden error was raised when accessing the download. Is your bundle key correct?'
else:
print e
sys.exit(1)
finally:
url_redirect.close()
try:
grabber.urlgrab(url, filename=filename)
except URLGrabError, e:
if e.errno == 14:
print 'Encountered HTTP error: %s' % (e.strerror)
else:
print 'Encountered error %d: %s' % (e.errno, e.strerror)
else:
if options.extract:
print 'Extracting %s...' % filename,
# Create directory
dir_name = filename[:filename.rfind('.')]
dir_name = dir_name.replace('+', ' ')
try:
os.mkdir(dir_name)
except OSError, e: