本文整理匯總了Python中urlgrabber.grabber.URLGrabber類的典型用法代碼示例。如果您正苦於以下問題:Python URLGrabber類的具體用法?Python URLGrabber怎麽用?Python URLGrabber使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了URLGrabber類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: downloadFile
def downloadFile(url, filename, subdir):
BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename))
maxBytesPerSecond=0 # 2**19 ==> 0.5 MByte/s
# 0 ==> not restricted
grabber = URLGrabber( progress_obj=None
, throttle=maxBytesPerSecond
, reget='simple'
, retry=5
, retrycodes=[-1,4,5,6,7,12,14]
, timeout=30
, user_agent='bong download manager/1.0'
)
statinfo = os.stat(BongEnvironment.settings['recdir'])
targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir)
if not os.path.isdir(targetdir):
os.mkdir(targetdir)
if os.name == 'posix':
os.chmod(targetdir, 0777)
os.chown(targetdir, statinfo.st_uid, statinfo.st_gid)
targetfile = os.path.join(targetdir, filename)
t1 = time.time()
try:
local_filename = grabber.urlgrab(url, targetfile)
except URLGrabError, e:
BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile))
return False
示例2: update_categories
def update_categories(username, subscriptions):
g = URLGrabber()
folder = BASE + '/' + username
if not os.path.exists(folder):
os.mkdir(folder)
cats = get_categories(username)
visited = set()
for sub in subscriptions:
if sub.name in visited:
continue
elif sub.name in cats:
del cats[sub.name]
visited.add(sub.name)
continue
else:
print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname)
ft = sub.thumbnail[-3:]
nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft)
g.urlgrab(sub.thumbnail, filename=nf)
for sub in cats:
print 'Removing thumbnail for %s'%sub
if cats[sub] is None:
old_fn = '%s/%s*'%(folder, sub)
else:
old_fn = '%s/%s/%s*'%(folder, cats[sub], sub)
for fl in glob.glob(old_fn):
print '\t', fl
os.remove(fl)
示例3: _retrievePublicKey
def _retrievePublicKey(self, keyurl, repo=None):
"""
Retrieve a key file
@param keyurl: url to the key to retrieve
Returns a list of dicts with all the keyinfo
"""
key_installed = False
# Go get the GPG key from the given URL
try:
url = yum.misc.to_utf8(keyurl)
if repo is None:
rawkey = urlgrabber.urlread(url, limit=9999)
else:
# If we have a repo. use the proxy etc. configuration for it.
# In theory we have a global proxy config. too, but meh...
# external callers should just update.
ug = URLGrabber(bandwidth = repo.bandwidth,
retry = repo.retries,
throttle = repo.throttle,
progress_obj = repo.callback,
proxies=repo.proxy_dict)
ug.opts.user_agent = default_grabber.opts.user_agent
rawkey = ug.urlread(url, text=repo.id + "/gpgkey")
except urlgrabber.grabber.URLGrabError, e:
raise ChannelException('GPG key retrieval failed: ' +
yum.i18n.to_unicode(str(e)))
示例4: WebGrabber
class WebGrabber(Singleton):
g = None
def __init__(self,config = {}):
self.gotLibUrlGrabber = False
try:
from urlgrabber.grabber import URLGrabber
except:
writeError('This script is better with URLBrabber.')
writeError('See http://linux.duke.edu/projects/urlgrabber/')
self.gotLibUrlGrabber = False
if not self.gotLibUrlGrabber:
return
if config.has_key('proxy'):
writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
self.g = URLGrabber(proxies= {'http' : config['proxy']})
else:
writeInfo("URLGrabbersansProxy")
self.g = URLGrabber()
def getWebFile(self,url, dest):
if not self.gotLibUrlGrabber:
import urllib
fd = open(dest,"wb")
fd.write(urllib.urlopen(url).read())
fd.close()
else:
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
self.g.urlgrab(url, filename=dest)
示例5: Fetcher
class Fetcher(object):
def __init__(self, remote):
self.remote = remote
self.g = URLGrabber(prefix=self.remote)
def fetch_to_file(self, src, dest):
tmp = dest + '.part'
try:
self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0')
except URLGrabError as e:
raise IOError(str(e))
os.rename(tmp, dest)
def fetch(self, src='/'):
try:
contents = self.g.urlread(src).strip()
except URLGrabError as e:
raise IOError(str(e))
return contents
def listdir(self, dir='/'):
lfn = os.path.join(dir, '.listing')
contents = self.fetch(lfn)
return [ s.strip() for s in contents.split() if s.strip() != '' ]
# Pickling support -- only pickle the remote URL
def __getstate__(self):
return self.remote
def __setstate__(self, remote):
self.__init__(remote)
示例6: test_make_callback
def test_make_callback(self):
"""grabber.URLGrabber._make_callback() tests"""
def cb(e): pass
tup_cb = (cb, ('stuff'), {'some': 'dict'})
g = URLGrabber()
self.assertEquals(g._make_callback(cb), (cb, (), {}))
self.assertEquals(g._make_callback(tup_cb), tup_cb)
示例7: fetchPackages
def fetchPackages(self, destination=None):
"""Downloads packages to destination directory """
from urlgrabber.grabber import URLGrabber
from urlgrabber.progress import TextMeter
from os import path, chdir
if destination:
chdir(destination)
else:
chdir(self.dest_dir)
### URLGrabber objects ###
t = TextMeter()
g = URLGrabber(progress_obj=t)
### Start Iteration over list of packages' URIs ###
for uri in self.getPackageList():
pisifile = uri.split("/")[-1]
if path.exists(pisifile):
print pisifile, "--- No Update! Skipping..."
continue
try:
g.urlgrab(uri)
except:
print "Error while downloading file %s" % pisifile
break
print "Finished."
示例8: moosWeb2dict
def moosWeb2dict(vehicle_host, vehicle_port):
def moosHTML2dict(data):
soup = BeautifulSoup(data)
istrtd = (lambda tag : tag.name == "tr" and len(tag.findAll("td")) > 0)
ret = {}
for tr in soup.table.table.findAll(istrtd):
tds = tr.findAll("td")
vartag = tds[0].a
if 0 < len(vartag) and "pending" != tds[2].contents[0]:
key = vartag.contents[0]
val = tds[6].contents[0]
ret[str(key)] = str(val)
return ret
UG = URLGrabber()
#fetch new page
data = UG.urlread("http://" + remote_vehicle + ":" + str(vehicle_port))
#paul newman writes shitty HTML; we must fix it
p = re.compile('<A href = ([^>]*)>')
fixed_data = p.sub(r'<A href="\1">', data)
return moosHTML2dict(fixed_data)
示例9: __init__
def __init__(self, awsAccessKey, awsSecretKey, baseurl):
self.logger = logging.getLogger("yum.verbose.main")
self.logger.log(logginglevels.DEBUG_4, "s3: creating empty URLGrabber instance")
URLGrabber.__init__(self)
self.logger.log(logginglevels.DEBUG_4, "s3: BotoGrabber init BASE_URL=%s" % baseurl)
if not baseurl:
raise Exception("s3: BotoGrabberInit got blank baseurl")
try:
baseurl = baseurl[0]
except:
pass
self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey)
self.baseurl = urlparse(baseurl)
if hasattr(self.baseurl, 'netloc'):
self.bucket_name = self.baseurl.netloc
self.key_prefix = self.baseurl.path[1:]
else:
self.bucket_name = self.baseurl[1]
self.key_prefix = self.baseurl[2]
if self.key_prefix.startswith("/"):
self.key_prefix = self.key_prefix[1:]
m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name)
if (m):
self.bucket_name = m.group(1)
if sys.stdout.isatty():
print "%s - %s" % (self.bucket_name, self.key_prefix)
示例10: __init__
def __init__(self, pakfire, *args, **kwargs):
kwargs.update({
"quote" : 0,
"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,
"ssl_verify_host" : False,
"ssl_verify_peer" : False,
})
if isinstance(pakfire, _Config):
config = pakfire
else:
config = pakfire.config
self.config = config
# Set throttle setting.
bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
if bandwidth_throttle:
try:
bandwidth_throttle = int(bandwidth_throttle)
except ValueError:
log.error("Configuration value for bandwidth_throttle is invalid.")
bandwidth_throttle = 0
kwargs.update({ "throttle" : bandwidth_throttle })
# Configure HTTP proxy.
http_proxy = config.get("downloader", "http_proxy")
if http_proxy:
kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})
URLGrabber.__init__(self, *args, **kwargs)
示例11: __init__
def __init__(self, awsAccessKey, awsSecretKey, baseurl):
self.logger.debug("BotoGrabber init BASE_URL=%s" % baseurl)
URLGrabber.__init__(self)
self._handle_baseurl(baseurl)
self._handle_s3(awsAccessKey, awsSecretKey)
self._dump_attributes()
interactive_notify("%s - %s" % (self.bucket_name, self.key_prefix))
示例12: ProxyHTTPAuthTests
class ProxyHTTPAuthTests(BaseProxyTests):
def setUp(self):
self.url = ref_http
if not self.have_proxy():
self.skip()
self.g = URLGrabber()
def test_good_password(self):
self.g.urlopen(self.url, proxies=self.good_proxies)
def test_bad_password(self):
self.assertRaises(URLGrabError, self.g.urlopen,
self.url, proxies=self.bad_proxies)
示例13: urlgrab
def urlgrab(self, url, *args, **kwargs):
self.check_offline_mode()
# This is for older versions of urlgrabber which are packaged in Debian
# and Ubuntu and cannot handle filenames as a normal Python string but need
# a unicode string.
return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
示例14: _getTreeInfo
def _getTreeInfo(self, url, proxy_url, sslverify):
""" Retrieve treeinfo and return the path to the local file.
:param baseurl: url of the repo
:type baseurl: string
:param proxy_url: Optional full proxy URL of or ""
:type proxy_url: string
:param sslverify: True if SSL certificate should be varified
:type sslverify: bool
:returns: Path to retrieved .treeinfo file or None
:rtype: string or None
"""
if not url:
return None
log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
url, proxy_url, sslverify)
ugopts = {"ssl_verify_peer": sslverify,
"ssl_verify_host": sslverify}
proxies = {}
if proxy_url:
try:
proxy = ProxyString(proxy_url)
proxies = {"http": proxy.url,
"https": proxy.url}
except ProxyStringError as e:
log.info("Failed to parse proxy for _getTreeInfo %s: %s",
proxy_url, e)
ug = URLGrabber()
try:
treeinfo = ug.urlgrab("%s/.treeinfo" % url,
"/tmp/.treeinfo", copy_local=True,
proxies=proxies, **ugopts)
except URLGrabError as e:
try:
treeinfo = ug.urlgrab("%s/treeinfo" % url,
"/tmp/.treeinfo", copy_local=True,
proxies=proxies, **ugopts)
except URLGrabError as e:
log.info("Error downloading treeinfo: %s", e)
treeinfo = None
return treeinfo
示例15: download
def download(url, filename=None, associated_task=None, web_proxy = None):
if associated_task:
associated_task.description = _("Downloading %s") % os.path.basename(url)
associated_task.unit = "KB"
log.debug("downloading %s > %s" % (url, filename))
progress_obj = DownloadProgress(associated_task)
if web_proxy:
web_proxy={'http':web_proxy}
urlgrabber = URLGrabber(
reget = 'simple',
proxies = web_proxy,
progress_obj = progress_obj)
if os.path.isdir(filename):
basename = os.path.basename(url)
filename = os.path.join(filename, basename)
filename = urlgrabber.urlgrab(url, filename=filename)
return filename