当前位置: 首页>>代码示例>>Python>>正文


Python Socrata.datasets方法代码示例

本文整理汇总了Python中sodapy.Socrata.datasets方法的典型用法代码示例。如果您正苦于以下问题:Python Socrata.datasets方法的具体用法?Python Socrata.datasets怎么用?Python Socrata.datasets使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sodapy.Socrata的用法示例。


在下文中一共展示了Socrata.datasets方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: SocrataRepository

# 需要导入模块: from sodapy import Socrata [as 别名]
# 或者: from sodapy.Socrata import datasets [as 别名]
class SocrataRepository(HarvestRepository):
    """ Socrata Repository """

    def setRepoParams(self, repoParams):
        self.metadataprefix = "socrata"
        super(SocrataRepository, self).setRepoParams(repoParams)
        # sodapy doesn't like http/https preceding URLs
        self.socratarepo = Socrata(self.url, self.socrata_app_token)
        self.domain_metadata = []


    def _crawl(self):
        kwargs = {
            "repo_id": self.repository_id, "repo_url": self.url, "repo_set": self.set, "repo_name": self.name, "repo_type": "socrata", 
            "enabled": self.enabled, "repo_thumbnail": self.thumbnail, "item_url_pattern": self.item_url_pattern,
            "abort_after_numerrors": self.abort_after_numerrors, "max_records_updated_per_run": self.max_records_updated_per_run,
            "update_log_after_numitems": self.update_log_after_numitems, "record_refresh_days": self.record_refresh_days,
            "repo_refresh_days": self.repo_refresh_days, "homepage_url": self.homepage_url
        }
        self.repository_id = self.db.update_repo(**kwargs)
        records = self.socratarepo.datasets()

        item_count = 0
        for rec in records:
            result = self.db.write_header(rec["resource"]["id"], self.repository_id)
            item_count = item_count + 1
            if (item_count % self.update_log_after_numitems == 0):
                tdelta = time.time() - self.tstart + 0.1
                self.logger.info("Done {} item headers after {} ({:.1f} items/sec)".format(item_count, self.formatter.humanize(tdelta), item_count/tdelta) )

        self.logger.info("Found {} items in feed".format(item_count) )

    def format_socrata_to_oai(self, socrata_record, local_identifier):
        record = {}

        record["title"] = socrata_record["name"]
        record["description"] = socrata_record.get("description", "")
        record["tags"] = socrata_record.get("tags", "")
        record["identifier"] = local_identifier
        record["creator"] = socrata_record.get("attribution", self.name)
        record["pub_date"] = datetime.fromtimestamp(socrata_record["publicationDate"]).strftime('%Y-%m-%d')
        record["contact"] = self.contact
        record["series"] = socrata_record.get("category", "")

        return record

    def _rate_limited(max_per_second):
        """ Decorator that make functions not be called faster than a set rate """
        threading = __import__('threading')
        lock = threading.Lock()
        min_interval = 1.0 / float(max_per_second)

        def decorate(func):
            last_time_called = [0.0]

            @wraps(func)
            def rate_limited_function(*args, **kwargs):
                lock.acquire()
                elapsed = time.clock() - last_time_called[0]
                left_to_wait = min_interval - elapsed

                if left_to_wait > 0:
                    time.sleep(left_to_wait)

                lock.release()

                ret = func(*args, **kwargs)
                last_time_called[0] = time.clock()
                return ret

            return rate_limited_function

        return decorate

    @_rate_limited(5)
    def _update_record(self,record):

        try:            
            socrata_record = self.socratarepo.get_metadata(record['local_identifier'])
            oai_record = self.format_socrata_to_oai(socrata_record,record['local_identifier'])
            if oai_record:
                self.db.write_record(oai_record, self.repository_id, self.metadataprefix.lower(), self.domain_metadata)
            return True

        except Exception as e:
            self.logger.error("Updating record {} failed: {}".format(record['local_identifier'], e))
            # Touch the record so we do not keep requesting it on every run
            self.db.touch_record(record)
            self.error_count = self.error_count + 1
            if self.error_count < self.abort_after_numerrors:
                return True

        return False
开发者ID:axfelix,项目名称:globus_oai,代码行数:95,代码来源:SocrataRepository.py


注:本文中的sodapy.Socrata.datasets方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。