当前位置: 首页>>代码示例>>Python>>正文


Python utils.Downloader类代码示例

本文整理汇总了Python中dlstats.utils.Downloader的典型用法代码示例。如果您正苦于以下问题:Python Downloader类的具体用法?Python Downloader怎么用?Python Downloader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Downloader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _load_structure_dataflows

    def _load_structure_dataflows(self, force=False):

        if self._dataflows and not force:
            return

        self.provider_verify()

        url = "http://www.bdm.insee.fr/series/sdmx/dataflow/%s" % self.provider_name

        if self.refresh_meta is False:
            self._dataflows = self._structure_get("dataflows")

            if self._dataflows:
                self.xml_dsd.dataflows = self._dataflows
                logger.info("load structure [dataflows] from metadata for url[%s]" % url)
                return

        download = Downloader(url=url,
                              filename="dataflow.xml",
                              store_filepath=self.store_path,
                              headers=SDMX_METADATA_HEADERS,
                              use_existing_file=self.use_existing_file,
                              client=self.requests_client)
        filepath = download.get_filepath()
        self.for_delete.append(filepath)
        self.xml_dsd.process(filepath)
        self._dataflows = self.xml_dsd.dataflows

        self._structure_put("dataflows", url, **self._dataflows)
开发者ID:Widukind,项目名称:dlstats,代码行数:29,代码来源:insee.py

示例2: _load_file

    def _load_file(self):

        filename = "data-%s.zip" % (self.dataset_code)
        download = Downloader(
            url=self.url,
            filename=filename,
            store_filepath=self.get_store_path(),
            use_existing_file=self.fetcher.use_existing_file,
        )
        self.filepath, response = download.get_filepath_and_response()

        if self.filepath:
            self.fetcher.for_delete.append(self.filepath)

        release_date_str = response.headers["Last-Modified"]
        # Last-Modified: Tue, 05 Apr 2016 15:05:11 GMT
        self.release_date = clean_datetime(datetime.strptime(release_date_str, "%a, %d %b %Y %H:%M:%S GMT"))

        if self.dataset.last_update and self.dataset.last_update >= self.release_date:
            comments = "update-date[%s]" % self.release_date
            raise errors.RejectUpdatedDataset(
                provider_name=self.provider_name, dataset_code=self.dataset_code, comments=comments
            )

        self.dataset.last_update = self.release_date
开发者ID:srault95,项目名称:dlstats,代码行数:25,代码来源:world_bank.py

示例3: _load_datas

 def _load_datas(self):
     
     store_filepath = self.get_store_path()
     # TODO: timeout, replace
     download = Downloader(url=self.dataset_url, filename=self.filename, store_filepath=store_filepath)
         
     return(download.get_filepath())
开发者ID:MichelJuillard,项目名称:dlstats,代码行数:7,代码来源:esri.py

示例4: _load_datas

 def _load_datas(self, datas=None):
     
     kwargs = {}
     
     if not datas:
         # TODO: timeout, replace
         download = Downloader(url=self.url,
                               store_filepath=self.store_path, 
                               filename=self.filename,
                               use_existing_file=self.fetcher.use_existing_file)
         
         zip_filepath = download.get_filepath()
         self.fetcher.for_delete.append(zip_filepath)
         filepath = extract_zip_file(zip_filepath)
         self.fetcher.for_delete.append(zip_filepath)
         
         kwargs['filepath'] = filepath
     else:
         kwargs['fileobj'] = io.StringIO(datas, newline="\n")
     
     kwargs['date_format'] = "%a %b %d %H:%M:%S %Z %Y"
     kwargs['headers_line'] = DATASETS[self.dataset.dataset_code]['lines']['headers']
     self._file, self._rows, self.headers, self.release_date, self.dimension_keys, self.periods = local_read_csv(**kwargs)
     
     self.dataset.dimension_keys = self.dimension_keys
     
     self.dataset.last_update = self.release_date
     
     self.start_date = get_ordinal_from_period(self.periods[0], freq=self.frequency)
     self.end_date = get_ordinal_from_period(self.periods[-1], freq=self.frequency)
开发者ID:gitter-badger,项目名称:dlstats,代码行数:30,代码来源:bis.py

示例5: _load_dsd

    def _load_dsd(self):
        """
        #TODO: il y a une DSD pour chaque groupe de séries (soit environ 400),
        - download 1 dsd partage par plusieurs dataset
        - 668 datase
        """

        url = "http://www.bdm.insee.fr/series/sdmx/datastructure/INSEE/%s?references=children" % self.dsd_id
        download = Downloader(url=url,
                              filename="dsd-%s.xml" % self.dsd_id,
                              headers=SDMX_METADATA_HEADERS,
                              store_filepath=self.store_path,
                              use_existing_file=self.fetcher.use_existing_file,
                              client=self.fetcher.requests_client)

        filepath, response = download.get_filepath_and_response()

        if response:
            if response.status_code == HTTP_ERROR_LONG_RESPONSE:
                self._load_dsd_by_element()
                return
            elif response.status_code >= 400:
                raise response.raise_for_status()

        if not os.path.exists(filepath):
            self._load_dsd_by_element()
            return

        self.fetcher.for_delete.append(filepath)
        self.xml_dsd.process(filepath)
        self._set_dataset()
开发者ID:Widukind,项目名称:dlstats,代码行数:31,代码来源:insee.py

示例6: _load

    def _load(self):
        
        #TODO: DSD
        """
        url = "xxx/%s" % self.dataset_code
        download = Downloader(url=url, 
                              filename="dataflow-%s.xml" % self.dataset_code)
        self.xml_dsd.process(download.get_filepath())
        """

        url = "https://www.destatis.de/sddsplus/%s.xml" % self.dataset_code
        download = Downloader(url=url, 
                              filename="data-%s.xml" % self.dataset_code)

        self.xml_data = XMLData(provider_name=self.provider_name,
                                dataset_code=self.dataset_code,
                                ns_tag_data=self.ns_tag_data,
                                #dimension_keys=self.xml_dsd.dimension_keys
                                )
        
        #TODO: response and exception
        try:
            filepath, response = download.get_filepath_and_response()        
        except requests.exceptions.HTTPError as err:
            logger.critical("AUTRE ERREUR HTTP : %s" % err.response.status_code)
            raise
            
        self.rows = self.xml_data.process(filepath)
开发者ID:Menandalbee,项目名称:dlstats,代码行数:28,代码来源:destatis.py

示例7: _load

    def _load(self):

        download = Downloader(
            url=self.dataset_url,
            filename="data-%s.zip" % self.dataset_code,
            store_filepath=self.store_path,
            use_existing_file=self.fetcher.use_existing_file,
        )

        filepaths = extract_zip_file(download.get_filepath())
        dsd_fp = filepaths[self.dataset_code + ".dsd.xml"]
        data_fp = filepaths[self.dataset_code + ".sdmx.xml"]

        self.fetcher.for_delete.append(dsd_fp)
        self.fetcher.for_delete.append(data_fp)

        self.xml_dsd.process(dsd_fp)
        self._set_dataset()

        self.xml_data = XMLData(
            provider_name=self.provider_name,
            dataset_code=self.dataset_code,
            xml_dsd=self.xml_dsd,
            dsd_id=self.dataset_code,
            # TODO: frequencies_supported=FREQUENCIES_SUPPORTED
        )
        self.rows = self.xml_data.process(data_fp)
开发者ID:srault95,项目名称:dlstats,代码行数:27,代码来源:eurostat.py

示例8: weo_urls

    def weo_urls(self):
        download = Downloader(url='http://www.imf.org/external/ns/cs.aspx?id=28',
                              filename="weo.html",
                              store_filepath=self.store_path)
        
        filepath = download.get_filepath()
        with open(filepath, 'rb') as fp:
            webpage = fp.read()
        
        self.fetcher.for_delete.append(filepath)
            
        #TODO: replace by beautifoulsoup ?
        html = etree.HTML(webpage)
        hrefs = html.xpath("//div[@id = 'content-main']/h4/a['href']")
        links = [href.values() for href in hrefs]
        
        #The last links of the WEO webpage lead to data we dont want to pull.
        links = links[:-16]
        #These are other links we don't want.
        links.pop(-8)
        links.pop(-10)
        links = [link[0][:-10]+'download.aspx' for link in links]

        output = []
    
        for link in links:
            webpage = requests.get(link)
            html = etree.HTML(webpage.text)
            final_link = html.xpath("//div[@id = 'content']//table//a['href']")
            output.append(link[:-13]+final_link[0].values()[0])
            
        # we need to handle the issue in chronological order
        return sorted(output)
开发者ID:Menandalbee,项目名称:dlstats,代码行数:33,代码来源:imf.py

示例9: _load_xls

 def _load_xls(self):
     url_xls = make_xls_url(self.dataset_code)
     download = Downloader(url=url_xls, 
                       filename=self.dataset_code + '_info.xls',
                       store_filepath=self.get_store_path(),
                       use_existing_file=self.fetcher.use_existing_file)
     filepath = download.get_filepath()
     return filepath
开发者ID:Menandalbee,项目名称:dlstats,代码行数:8,代码来源:bdf.py

示例10: _load_datas

    def _load_datas(self):

        store_filepath = self.get_store_path()
        download = Downloader(url=self.dataset_url, 
                              filename=self.filename, 
                              store_filepath=store_filepath)

        '''Return 2 filepath (dsd and data)'''    
        return (extract_zip_file(download.get_filepath()))
开发者ID:MichelJuillard,项目名称:dlstats,代码行数:9,代码来源:eurostat.py

示例11: _load_datas

 def _load_datas(self):
     # TODO: timeout, replace
     download = Downloader(url=self.dataset_url, 
                           filename=self.dataset_code,
                           store_filepath=self.store_path,
                           use_existing_file=self.fetcher.use_existing_file)
     filepath = download.get_filepath()
     self.fetcher.for_delete.append(filepath)
     return filepath
开发者ID:ThomasRoca,项目名称:dlstats,代码行数:9,代码来源:esri.py

示例12: _get_agenda

    def _get_agenda(self):
        download = Downloader(url=AGENDA['url'],
                              filename=AGENDA['filename'],
                              store_filepath=self.store_path)
        filepath = download.get_filepath()        

        with open(filepath, 'rb') as fp:
            content = fp.read()
            self.for_delete.append(filepath)
            return content
开发者ID:gitter-badger,项目名称:dlstats,代码行数:10,代码来源:bis.py

示例13: _get_data_by_dimension

    def _get_data_by_dimension(self):

        self.xml_data = XMLData(
            provider_name=self.provider_name,
            dataset_code=self.dataset_code,
            xml_dsd=self.xml_dsd,
            dsd_id=self.dsd_id,
            frequencies_supported=FREQUENCIES_SUPPORTED,
        )

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        position, _key, dimension_values = select_dimension(dimension_keys, dimensions)

        count_dimensions = len(dimension_keys)

        for dimension_value in dimension_values:

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            # http://sdw-wsrest.ecb.int/service/data/IEAQ/A............
            url = "http://sdw-wsrest.ecb.int/service/data/%s/%s" % (self.dataset_code, key)
            if not self._is_good_url(url, good_codes=[200, HTTP_ERROR_NOT_MODIFIED]):
                print("bypass url[%s]" % url)
                continue

            headers = SDMX_DATA_HEADERS

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(
                url=url,
                filename=filename,
                store_filepath=self.store_path,
                headers=headers,
                use_existing_file=self.fetcher.use_existing_file,
                # client=self.fetcher.requests_client
            )
            filepath, response = download.get_filepath_and_response()

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response:
                self._add_url_cache(url, response.status_code)
            elif response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

        yield None, None
开发者ID:srault95,项目名称:dlstats,代码行数:55,代码来源:ecb.py

示例14: _get_data_by_dimension

    def _get_data_by_dimension(self):

        dimension_keys, dimensions = self._get_dimensions_from_dsd()

        choice = "avg"
        if self.dataset_code in ["IPC-2015-COICOP"]:
            choice = "max"

        position, _key, dimension_values = select_dimension(dimension_keys,
                                                            dimensions,
                                                            choice=choice)

        count_dimensions = len(dimension_keys)

        logger.info("choice[%s] - filterkey[%s] - count[%s] - provider[%s] - dataset[%s]" % (choice, _key, len(dimension_values), self.provider_name, self.dataset_code))

        for dimension_value in dimension_values:
            '''Pour chaque valeur de la dimension, generer une key d'url'''

            key = get_key_for_dimension(count_dimensions, position, dimension_value)

            url = "http://www.bdm.insee.fr/series/sdmx/data/%s/%s" % (self.dataset_code, key)
            if self._is_good_url(url) is False:
                logger.warning("bypass not good url[%s]" % url)
                continue

            filename = "data-%s-%s.xml" % (self.dataset_code, key.replace(".", "_"))
            download = Downloader(url=url,
                                  filename=filename,
                                  store_filepath=self.store_path,
                                  use_existing_file=self.fetcher.use_existing_file,
                                  #NOT USE FOR INSEE client=self.fetcher.requests_client
                                  )
            filepath, response = download.get_filepath_and_response()

            if not response is None:
                self._add_url_cache(url, response.status_code)

            if filepath and os.path.exists(filepath):
                self.fetcher.for_delete.append(filepath)
            elif not filepath or not os.path.exists(filepath):
                continue

            if response and response.status_code == HTTP_ERROR_NO_RESULT:
                continue
            elif response and response.status_code >= 400:
                raise response.raise_for_status()

            for row, err in self.xml_data.process(filepath):
                yield row, err

            #self.dataset.update_database(save_only=True)

        yield None, None
开发者ID:Widukind,项目名称:dlstats,代码行数:54,代码来源:insee.py

示例15: _load_dsd

 def _load_dsd(self):
     url = self._get_url_dsd()
     download = Downloader(store_filepath=self.store_path,
                           url=url, 
                           filename="dsd-%s.xml" % self.dataset_code,
                           use_existing_file=self.fetcher.use_existing_file,
                           client=self.fetcher.requests_client)
     filepath = download.get_filepath()
     self.fetcher.for_delete.append(filepath)
     
     self.xml_dsd.process(filepath)
     self._set_dataset()
开发者ID:gitter-badger,项目名称:dlstats,代码行数:12,代码来源:oecd.py


注:本文中的dlstats.utils.Downloader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。