当前位置: 首页>>代码示例>>Python>>正文


Python URLGrabber.urlgrab方法代码示例

本文整理汇总了Python中urlgrabber.grabber.URLGrabber.urlgrab方法的典型用法代码示例。如果您正苦于以下问题:Python URLGrabber.urlgrab方法的具体用法?Python URLGrabber.urlgrab怎么用?Python URLGrabber.urlgrab使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在urlgrabber.grabber.URLGrabber的用法示例。


在下文中一共展示了URLGrabber.urlgrab方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: WebGrabber

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
class WebGrabber(Singleton):
    g = None
    
    def __init__(self,config = {}):
        self.gotLibUrlGrabber = False
        try:
            from urlgrabber.grabber import URLGrabber
        except:
            writeError('This script is better with URLBrabber.')
            writeError('See http://linux.duke.edu/projects/urlgrabber/')
            self.gotLibUrlGrabber = False
            
        if not self.gotLibUrlGrabber:
            return
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies= {'http' : config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self,url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest,"wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
            self.g.urlgrab(url, filename=dest)
开发者ID:naparuba,项目名称:pyndsgest,代码行数:32,代码来源:webGrabber.py

示例2: fetchPackages

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
    def fetchPackages(self, destination=None):
        """Downloads packages to destination directory """
        from urlgrabber.grabber import URLGrabber
        from urlgrabber.progress import TextMeter
        from os import path, chdir

        if destination:
            chdir(destination)
        else:
            chdir(self.dest_dir)

        ### URLGrabber objects ###
        t = TextMeter()
        g = URLGrabber(progress_obj=t)

        ### Start Iteration over list of packages' URIs ###
        for uri in self.getPackageList():
            pisifile = uri.split("/")[-1]
            if path.exists(pisifile):
                print pisifile, "--- No Update! Skipping..."
                continue
            try:
                g.urlgrab(uri)
            except:
                print "Error while downloading file %s" % pisifile
                break
        print "Finished."
开发者ID:havan,项目名称:porsuk,代码行数:29,代码来源:pisi_crawler.py

示例3: Fetcher

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
	class Fetcher(object):
		def __init__(self, remote):
			self.remote = remote
			self.g = URLGrabber(prefix=self.remote)

		def fetch_to_file(self, src, dest):
			tmp = dest + '.part'
			try:
				self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0')
			except URLGrabError as e:
				raise IOError(str(e))
			os.rename(tmp, dest)

		def fetch(self, src='/'):
			try:
				contents = self.g.urlread(src).strip()
			except URLGrabError as e:
				raise IOError(str(e))
			return contents

		def listdir(self, dir='/'):
			lfn = os.path.join(dir, '.listing')

			contents = self.fetch(lfn)

			return [ s.strip() for s in contents.split() if s.strip() != '' ]

		# Pickling support -- only pickle the remote URL
		def __getstate__(self):
			return self.remote
		def __setstate__(self, remote):
			self.__init__(remote)
开发者ID:banados,项目名称:lsd,代码行数:34,代码来源:fetcher.py

示例4: update_categories

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def update_categories(username, subscriptions):
    g = URLGrabber()
    folder = BASE + '/' + username
    if not os.path.exists(folder):
        os.mkdir(folder)

    cats = get_categories(username)
    visited = set()

    for sub in subscriptions:
        if sub.name in visited:
            continue
        elif sub.name in cats:
            del cats[sub.name]
            visited.add(sub.name)
            continue
        else:
            print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname)
            ft = sub.thumbnail[-3:]
            nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft)
            g.urlgrab(sub.thumbnail, filename=nf)

    for sub in cats:
        print 'Removing thumbnail for %s'%sub
        if cats[sub] is None:
            old_fn = '%s/%s*'%(folder, sub)
        else:
            old_fn = '%s/%s/%s*'%(folder, cats[sub], sub)
        for fl in glob.glob(old_fn):
            print '\t', fl
            os.remove(fl)
开发者ID:DLu,项目名称:probablyscripts,代码行数:33,代码来源:classes.py

示例5: urlgrab

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
	def urlgrab(self, url, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
开发者ID:ipfire,项目名称:pakfire,代码行数:9,代码来源:downloader.py

示例6: downloadFile

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def downloadFile(url, filename, subdir):
    BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename))
    maxBytesPerSecond=0        #  2**19   ==> 0.5 MByte/s 
                               #  0       ==> not restricted
    grabber = URLGrabber( progress_obj=None
                        , throttle=maxBytesPerSecond        
                        , reget='simple'
                        , retry=5
                        , retrycodes=[-1,4,5,6,7,12,14]
                        , timeout=30
                        , user_agent='bong download manager/1.0'
                        )
    
    statinfo = os.stat(BongEnvironment.settings['recdir'])
    
    targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir)
    if not os.path.isdir(targetdir):
        os.mkdir(targetdir)
        if os.name == 'posix':
            os.chmod(targetdir, 0777)
            os.chown(targetdir, statinfo.st_uid, statinfo.st_gid)

    targetfile = os.path.join(targetdir, filename)
    
    t1 = time.time()
    try:
        local_filename = grabber.urlgrab(url, targetfile)
    except URLGrabError, e:
        BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile))
        return False
开发者ID:ambo4,项目名称:BONG.TV-Download-Manager,代码行数:32,代码来源:BongDownload.py

示例7: _getTreeInfo

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
    def _getTreeInfo(self, url, proxy_url, sslverify):
        """ Retrieve treeinfo and return the path to the local file.

            :param baseurl: url of the repo
            :type baseurl: string
            :param proxy_url: Optional full proxy URL of or ""
            :type proxy_url: string
            :param sslverify: True if SSL certificate should be varified
            :type sslverify: bool
            :returns: Path to retrieved .treeinfo file or None
            :rtype: string or None
        """
        if not url:
            return None

        log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
                  url, proxy_url, sslverify)

        ugopts = {"ssl_verify_peer": sslverify,
                  "ssl_verify_host": sslverify}

        proxies = {}
        if proxy_url:
            try:
                proxy = ProxyString(proxy_url)
                proxies = {"http": proxy.url,
                           "https": proxy.url}
            except ProxyStringError as e:
                log.info("Failed to parse proxy for _getTreeInfo %s: %s",
                         proxy_url, e)

        ug = URLGrabber()
        try:
            treeinfo = ug.urlgrab("%s/.treeinfo" % url,
                                  "/tmp/.treeinfo", copy_local=True,
                                  proxies=proxies, **ugopts)
        except URLGrabError as e:
            try:
                treeinfo = ug.urlgrab("%s/treeinfo" % url,
                                      "/tmp/.treeinfo", copy_local=True,
                                      proxies=proxies, **ugopts)
            except URLGrabError as e:
                log.info("Error downloading treeinfo: %s", e)
                treeinfo = None

        return treeinfo
开发者ID:akozumpl,项目名称:anaconda,代码行数:48,代码来源:__init__.py

示例8: run

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
    def run(self):
        #Check if file exists
        if os.path.isfile(self.file):
            os.chmod(self.file, stat.S_IWUSR)
            os.remove(self.file)

        ##Init url/path pointers
        #response     = urllib2.urlopen(self.url)
        #total_size   = response.info().getheader('Content-Length').strip()
        #self.total_size   = int(total_size)

        #freespace
        #freespace = get_free_space(self.app, path)

        #check if enough freespace
        #if self.freespace < total_size and self.freespace != 0:
        #    self.app.gui.ShowDialogNotification('Not enough freespace to download the item')
        #    self.active = False
        #    return

        self.app.gui.SetVisible(4000, True)
        progress = TextMeter(self.app)
        try:
            Log(self.app, 'Download started' )
            g = URLGrabber(reget='simple')
            g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename)

            #Create info file as json
            json_dumps(self.infodata, self.infopath)
            self.app.gui.ShowDialogNotification('Download Complete')
        except:
            Log(self.app, traceback.format_exc() )
            self.app.gui.ShowDialogNotification('Error during download')

        self.app.gui.SetVisible(4000, False)
        self.active = False
        Log(self.app, 'Download finished' )
开发者ID:D34dmeat,项目名称:boxeehack,代码行数:39,代码来源:download.py

示例9: download

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def download(url, filename=None, associated_task=None, web_proxy = None):
    if associated_task:
        associated_task.description = _("Downloading %s") % os.path.basename(url)
        associated_task.unit = "KB"
    log.debug("downloading %s > %s" % (url, filename))
    progress_obj = DownloadProgress(associated_task)
    if web_proxy:
        web_proxy={'http':web_proxy}
    urlgrabber = URLGrabber(
        reget = 'simple',
        proxies = web_proxy,
        progress_obj = progress_obj)
    if os.path.isdir(filename):
        basename = os.path.basename(url)
        filename = os.path.join(filename, basename)
    filename = urlgrabber.urlgrab(url, filename=filename)
    return filename
开发者ID:hechaoyuyu,项目名称:swinst,代码行数:19,代码来源:downloader.py

示例10: download_file

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def download_file(url, dirname):
    """
        Download @url and save to @dirname.
        @return - filename of saved file
    """
    # pycurl is picky about Unicode URLs, see rhbz #515797
    url = url.encode('ascii', 'ignore')

    if not os.path.exists(dirname):
        os.makedirs(dirname)

    basename = os.path.basename(url)
    filename = "%s/%s" % (dirname, basename)

    if os.path.exists(filename):
        raise Exception("File %s already exists! Not downloading!" % filename)

    g = URLGrabber(reget=None)
    local_filename = g.urlgrab(url, filename)
    return local_filename
开发者ID:Acidburn0zzz,项目名称:difio,代码行数:22,代码来源:grabber.py

示例11: get_file_if_size_diff

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
def get_file_if_size_diff(url, d):
    fn = url.split('/')[-1]
    out_fnp = os.path.join(d, fn)
    g = URLGrabber(reget = "simple")
    locFnp = g.urlgrab(url, out_fnp)
    return locFnp
开发者ID:umass-bib,项目名称:guides,代码行数:8,代码来源:downloadSraFromTable.py

示例12: preInstall

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
    def preInstall(self, *args, **kwargs):
        """ Download image and loopback mount it.

            This is called after partitioning is setup, we now have space
            to grab the image. Download it to ROOT_PATH and provide feedback
            during the download (using urlgrabber callback).
        """
        # Setup urlgrabber and call back to download image to ROOT_PATH
        progress = URLGrabberProgress()
        ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl,
                  "ssl_verify_host": not self.data.method.noverifyssl,
                  "proxies" : self._proxies,
                  "progress_obj" : progress,
                  "copy_local" : True}

        error = None
        try:
            ug = URLGrabber()
            ug.urlgrab(self.data.method.url, self.image_path, **ugopts)
        except URLGrabError as e:
            log.error("Error downloading liveimg: %s", e)
            error = e
        else:
            if not os.path.exists(self.image_path):
                error = "Failed to download %s, file doesn't exist" % self.data.method.url
                log.error(error)

        if error:
            exn = PayloadInstallError(str(error))
            if errorHandler.cb(exn) == ERROR_RAISE:
                raise exn

        # Used to make install progress % look correct
        self._adj_size = os.stat(self.image_path)[stat.ST_SIZE]

        if self.data.method.checksum:
            progressQ.send_message(_("Checking image checksum"))
            sha256 = hashlib.sha256()
            with open(self.image_path, "rb") as f:
                while True:
                    data = f.read(1024*1024)
                    if not data:
                        break
                    sha256.update(data)
            filesum = sha256.hexdigest()
            log.debug("sha256 of %s is %s", self.data.method.url, filesum)

            if lowerASCII(self.data.method.checksum) != filesum:
                log.error("%s does not match checksum.", self.data.method.checksum)
                exn = PayloadInstallError("Checksum of image does not match")
                if errorHandler.cb(exn) == ERROR_RAISE:
                    raise exn

        # Mount the image and check to see if it is a LiveOS/*.img
        # style squashfs image. If so, move it to IMAGE_DIR and mount the real
        # root image on INSTALL_TREE
        blivet.util.mount(self.image_path, INSTALL_TREE, fstype="auto", options="ro")
        if os.path.exists(INSTALL_TREE+"/LiveOS"):
            # Find the first .img in the directory and mount that on INSTALL_TREE
            img_files = glob.glob(INSTALL_TREE+"/LiveOS/*.img")
            if img_files:
                img_file = os.path.basename(sorted(img_files)[0])

                # move the mount to IMAGE_DIR
                os.makedirs(IMAGE_DIR, 0755)
                # work around inability to move shared filesystems
                iutil.execWithRedirect("mount",
                                       ["--make-rprivate", "/"])
                iutil.execWithRedirect("mount",
                                       ["--move", INSTALL_TREE, IMAGE_DIR])
                blivet.util.mount(IMAGE_DIR+"/LiveOS/"+img_file, INSTALL_TREE,
                                  fstype="auto", options="ro")
开发者ID:akozumpl,项目名称:anaconda,代码行数:74,代码来源:livepayload.py

示例13: max

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
                            self.progress(("=" if self.started else "+") * max(0, self.count - oldCount), suffix)
                            self.started = True

                        def end(self, totalRead):
                            self.update(totalRead, "OK")

                    progressIndicator = ProgressIndicator()
                    grabber = URLGrabber(
                        reget="simple",
                        timeout=self.timeout,
                        progress_obj=progressIndicator,
                        user_agent=userAgent,
                        http_headers=tuple((str(cookie["name"]), str(cookie["value"])) for cookie in cookies),
                    )
                    try:
                        grabber.urlgrab(link, filename=targetFileName)
                        downloadOK = True
                    except URLGrabError, e:
                        self.errors += 1
                        self.logger.error("Download failed: %s", e)
                    except KeyboardInterrupt:
                        self.errors += 1
                        self.logger.error("Download interrupted")
                    if downloadOK:
                        localSize = getFileSize(targetFileName)
                        if not localSize:
                            self.errors += 1
                            downloadOK = False
                            self.logger.error("Downloaded file seems corrupt")
                        elif linkSize:
                            if localSize > linkSize:
开发者ID:tungpd,项目名称:vimeo-crawler,代码行数:33,代码来源:VimeoCrawler.py

示例14: writeInfo

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies= {'http' : config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self,url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest,"wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
            self.g.urlgrab(url, filename=dest)


if __name__ == '__main__':
    g = URLGrabber(proxies={'http' : 'http://proxy.free.fr:3128'})
    url = 'http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip'
    g.urlgrab(url, filename='moncul.zip')

    g1 = WebGrabber(config={'proxy':'http://proxy.free.fr:3128'})
    g2 = WebGrabber()
    print "g1 is g2 %s" % (g1 is g2)
    
    g1.getWebFile('http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip','moncul.zip')
    
    print "Done."
开发者ID:naparuba,项目名称:pyndsgest,代码行数:32,代码来源:webGrabber.py

示例15: urlparse

# 需要导入模块: from urlgrabber.grabber import URLGrabber [as 别名]
# 或者: from urlgrabber.grabber.URLGrabber import urlgrab [as 别名]
        try:
            url_redirect = urllib2.urlopen(urljoin(options.gmb_url, url))
            url_redirect_parsed = urlparse(url_redirect.geturl())
            filename = url_redirect_parsed.path[1:] # Strip /
        except urllib2.HTTPError, e:
            if e.code == 403:
                print 'A 403 Forbidden error was raised when accessing the download. Is your bundle key correct?'
            else:
                print e

            sys.exit(1)
        finally:
            url_redirect.close()

        try:
            grabber.urlgrab(url, filename=filename)
        except URLGrabError, e:
            if e.errno == 14:
                print 'Encountered HTTP error: %s' % (e.strerror)
            else:
                print 'Encountered error %d: %s' % (e.errno, e.strerror)
        else:
            if options.extract:
                print 'Extracting %s...' % filename,

                # Create directory
                dir_name = filename[:filename.rfind('.')]
                dir_name = dir_name.replace('+', ' ')
                try:
                    os.mkdir(dir_name)
                except OSError, e:
开发者ID:Terr,项目名称:utils,代码行数:33,代码来源:download_game_music_bundles.py


注:本文中的urlgrabber.grabber.URLGrabber.urlgrab方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。