当前位置: 首页>>代码示例>>Python>>正文


Python validator.DatetimeValidator类代码示例

本文整理汇总了Python中billy.scrape.validator.DatetimeValidator的典型用法代码示例。如果您正苦于以下问题:Python DatetimeValidator类的具体用法?Python DatetimeValidator怎么用?Python DatetimeValidator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DatetimeValidator类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, metadata, no_cache=False, output_dir=None,
                 strict_validation=None, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param no_cache: if True, will ignore any cached downloads
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        if no_cache:
            kwargs['cache_dir'] = None
        elif 'cache_dir' not in kwargs:
            kwargs['cache_dir'] = settings.BILLY_CACHE_DIR

        if 'error_dir' not in kwargs:
            kwargs['error_dir'] = settings.BILLY_ERROR_DIR

        if 'timeout' not in kwargs:
            kwargs['timeout'] = settings.SCRAPELIB_TIMEOUT

        if 'requests_per_minute' not in kwargs:
            kwargs['requests_per_minute'] = None

        if 'retry_attempts' not in kwargs:
            kwargs['retry_attempts'] = settings.SCRAPELIB_RETRY_ATTEMPTS

        if 'retry_wait_seconds' not in kwargs:
            kwargs['retry_wait_seconds'] = \
                    settings.SCRAPELIB_RETRY_WAIT_SECONDS

        super(Scraper, self).__init__(**kwargs)

        for f in settings.BILLY_LEVEL_FIELDS[self.level]:
            if not hasattr(self, f):
                raise Exception('%s scrapers must have a %s attribute' % (
                    self.level, f))

        self.metadata = metadata
        self.output_dir = output_dir

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning
开发者ID:donaldsmith2060,项目名称:openstates,代码行数:54,代码来源:__init__.py

示例2: __init__

    def __init__(self, metadata, output_dir=None, strict_validation=None,
                 fastmode=False):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """
        super(Scraper, self).__init__()

        # scrapelib overrides
        self.timeout = settings.SCRAPELIB_TIMEOUT
        self.cache_storage = scrapelib.FileCache(settings.BILLY_CACHE_DIR)
        self.requests_per_minute = settings.SCRAPELIB_RPM
        self.retry_attempts = settings.SCRAPELIB_RETRY_ATTEMPTS
        self.retry_wait_seconds = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        if fastmode:
            self.requests_per_minute = 0
            self.cache_write_only = False

        self.filter_bill_id = False
        self.metadata = metadata
        self.output_dir = output_dir
        self.output_names = set()

        # make output_dir
        if self.output_dir is None:
            _log.debug("output_dir is none")
            raise Exception("output_dir missing")

        _log.debug("output_dir %s" % self.output_dir)
        print ("output_dir: %s" % self.output_dir)
        os.path.isdir(self.output_dir) or os.path.makedirs(self.output_dir)

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()
        self._schema = {}
        self._load_schemas()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.info = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning
        self.error = self.logger.error
        self.critical = self.logger.critical
开发者ID:jmdupont,项目名称:billy,代码行数:52,代码来源:__init__.py

示例3: __init__

    def __init__(self, metadata, output_dir=None, strict_validation=None,
                 fastmode=False, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        kwargs['cache_obj'] = scrapelib.FileCache(settings.BILLY_CACHE_DIR)
        kwargs['requests_per_minute'] = settings.SCRAPELIB_RPM
        kwargs['timeout'] = settings.SCRAPELIB_TIMEOUT
        kwargs['retry_attempts'] = settings.SCRAPELIB_RETRY_ATTEMPTS
        kwargs['retry_wait_seconds'] = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        if fastmode:
            kwargs['requests_per_minute'] = 0
            kwargs['cache_write_only'] = False

        super(Scraper, self).__init__(**kwargs)

        self.metadata = metadata
        self.output_dir = output_dir
        self.output_names = set()

        # make output_dir
        os.path.isdir(self.output_dir) or os.path.makedirs(self.output_dir)

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()
        self._schema = {}
        self._load_schemas()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.info = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning
        self.error = self.logger.error
        self.critical = self.logger.critical
开发者ID:VersaHQ,项目名称:billy,代码行数:46,代码来源:__init__.py

示例4: Scraper

class Scraper(scrapelib.Scraper):
    """ Base class for all Scrapers

    Provides several useful methods for retrieving URLs and checking
    arguments against metadata.
    """

    latest_only = False

    def __init__(self, metadata, output_dir=None, strict_validation=None,
                 fastmode=False):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """
        super(Scraper, self).__init__()

        # scrapelib overrides
        self.timeout = settings.SCRAPELIB_TIMEOUT
        self.cache_storage = scrapelib.FileCache(settings.BILLY_CACHE_DIR)
        self.requests_per_minute = settings.SCRAPELIB_RPM
        self.retry_attempts = settings.SCRAPELIB_RETRY_ATTEMPTS
        self.retry_wait_seconds = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        if fastmode:
            self.requests_per_minute = 0
            self.cache_write_only = False

        self.metadata = metadata
        self.output_dir = output_dir
        self.output_names = set()

        # make output_dir
        os.path.isdir(self.output_dir) or os.path.makedirs(self.output_dir)

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()
        self._schema = {}
        self._load_schemas()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.info = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning
        self.error = self.logger.error
        self.critical = self.logger.critical

    def _load_schemas(self):
        """ load all schemas into schema dict """

        types = ('bill', 'committee', 'person', 'vote', 'event', 'speech')

        for type in types:
            schema_path = os.path.join(os.path.split(__file__)[0],
                                       '../schemas/%s.json' % type)
            self._schema[type] = json.load(open(schema_path))
            self._schema[type]['properties'][settings.LEVEL_FIELD] = {
                'minLength': 2, 'type': 'string'}

        # bills & votes
        self._schema['bill']['properties']['session']['enum'] = \
            self.all_sessions()
        self._schema['vote']['properties']['session']['enum'] = \
            self.all_sessions()

        # legislators
        terms = [t['name'] for t in self.metadata['terms']]
        # ugly break here b/c this line is nearly impossible to split
        self._schema['person']['properties']['roles'][
            'items']['properties']['term']['enum'] = terms

    @property
    def object_count(self):
        # number of distinct output filenames
        return len(self.output_names)

    def validate_json(self, obj):
        try:
            self.validator.validate(obj, self._schema[obj['_type']])
        except ValueError as ve:
            self.warning(str(ve))
            if self.strict_validation:
                raise ve

    def all_sessions(self):
        sessions = []
        for t in self.metadata['terms']:
            sessions.extend(t['sessions'])
        return sessions

    def validate_session(self, session, latest_only=False):
        """ Check that a session is present in the metadata dictionary.
#.........这里部分代码省略.........
开发者ID:opengovernment,项目名称:billy,代码行数:101,代码来源:__init__.py

示例5: main

def main():

    parser = argparse.ArgumentParser(
        description='Scrape data for state, saving data to disk.',
        parents=[base_arg_parser],
    )

    parser.add_argument('state', type=str,
                        help='state scraper module (eg. nc)')
    parser.add_argument('-s', '--session', action='append', dest='sessions',
                        help='session(s) to scrape')
    parser.add_argument('-t', '--term', action='append', dest='terms',
                        help='term(s) to scrape')
    parser.add_argument('--upper', action='store_true', dest='upper',
                        default=False, help='scrape upper chamber')
    parser.add_argument('--lower', action='store_true', dest='lower',
                        default=False, help='scrape lower chamber')
    parser.add_argument('--bills', action='store_true', dest='bills',
                        default=False, help="scrape bill data")
    parser.add_argument('--legislators', action='store_true',
                        dest='legislators', default=False,
                        help="scrape legislator data")
    parser.add_argument('--committees', action='store_true', dest='committees',
                        default=False, help="scrape committee data")
    parser.add_argument('--votes', action='store_true', dest='votes',
                        default=False, help="scrape vote data")
    parser.add_argument('--events', action='store_true', dest='events',
                        default=False, help='scrape event data')
    parser.add_argument('--alldata', action='store_true', dest='alldata',
                        default=False,
                        help="scrape all available types of data")
    parser.add_argument('--strict', action='store_true', dest='strict',
                        default=False, help="fail immediately when"
                        "encountering validation warning")
    parser.add_argument('-n', '--no_cache', action='store_true',
                        dest='no_cache', help="don't use web page cache")
    parser.add_argument('--fastmode', help="scrape in fast mode",
                        action="store_true", default=False)
    parser.add_argument('-r', '--rpm', action='store', type=int, dest='rpm',
                        default=60),
    parser.add_argument('--timeout', action='store', type=int, dest='timeout',
                        default=10)

    args = parser.parse_args()

    settings.update(args)

    # set up search path
    sys.path.insert(0, os.path.join(os.path.dirname(__file__),
                                    '../../openstates'))

    # get metadata
    metadata = __import__(args.state, fromlist=['metadata']).metadata
    state = metadata['abbreviation']

    configure_logging(args.verbose, args.state)

    # make output dir
    args.output_dir = os.path.join(settings.BILLY_DATA_DIR, args.state)
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise e

    # write metadata
    try:
        schema_path = os.path.join(os.path.split(__file__)[0],
                                   '../schemas/metadata.json')
        schema = json.load(open(schema_path))

        validator = DatetimeValidator()
        validator.validate(metadata, schema)
    except ValueError as e:
        logging.getLogger('billy').warning('metadata validation error: '
                                                 + str(e))

    with open(os.path.join(args.output_dir, 'state_metadata.json'), 'w') as f:
        json.dump(metadata, f, cls=JSONDateEncoder)

    # determine time period to run for
    if args.terms:
        for term in metadata['terms']:
            if term in args.terms:
                args.sessions.extend(term['sessions'])
    args.sessions = set(args.sessions or [])

    # determine chambers
    args.chambers = []
    if args.upper:
        args.chambers.append('upper')
    if args.lower:
        args.chambers.append('lower')
    if not args.chambers:
        args.chambers = ['upper', 'lower']

    if not (args.bills or args.legislators or args.votes or
            args.committees or args.events or args.alldata):
        raise ScrapeError("Must specify at least one of --bills, "
                          "--legislators, --committees, --votes, --events, "
#.........这里部分代码省略.........
开发者ID:addamh,项目名称:openstates,代码行数:101,代码来源:scrape.py

示例6: DatetimeValidator

    # make output dir
    args.output_dir = os.path.join(settings.BILLY_DATA_DIR, state)
    try:
        os.makedirs(args.output_dir)
    except OSError, e:
        if e.errno != 17:
            raise e

    # write metadata
    try:
        schema_path = os.path.join(os.path.split(__file__)[0],
                                   '../schemas/metadata.json')
        schema = json.load(open(schema_path))

        validator = DatetimeValidator()
        validator.validate(metadata, schema)
    except ValueError, e:
        logging.getLogger('billy').warning('metadata validation error: '
                                                 + str(e))

    with open(os.path.join(args.output_dir, 'state_metadata.json'), 'w') as f:
        json.dump(metadata, f, cls=JSONDateEncoder)

    # determine time period to run for
    if args.terms:
        for term in metadata['terms']:
            if term in args.terms:
                args.sessions.extend(term['sessions'])
    args.sessions = set(args.sessions or [])
开发者ID:IanWhalen,项目名称:openstates,代码行数:29,代码来源:scrape.py

示例7: main


#.........这里部分代码省略.........
                          'alldata']

            if 'events' in metadata['feature_flags']:
                args.types.append('events')

            if 'speeches' in metadata['feature_flags']:
                args.types.append('speeches')

        plan = """billy-update abbr=%s
    actions=%s
    types=%s
    sessions=%s
    terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
                   ','.join(args.sessions), ','.join(args.terms))
        logging.getLogger('billy').info(plan)

        scrape_data = {}

        if 'scrape' in args.actions:
            _clear_scraped_data(args.output_dir)

            # validate then write metadata
            if hasattr(module, 'session_list'):
                session_list = module.session_list()
            else:
                session_list = []
            check_sessions(metadata, session_list)

            try:
                schema_path = os.path.join(os.path.split(__file__)[0],
                                           '../schemas/metadata.json')
                schema = json.load(open(schema_path))

                validator = DatetimeValidator()
                validator.validate(metadata, schema)
            except ValueError as e:
                logging.getLogger('billy').warning(
                    'metadata validation error: ' + str(e))

            run_record = []
            exec_record = {
                "run_record": run_record,
                "args": sys.argv,
            }

            lex = None
            exc_traceback = None

            # start to run scrapers
            exec_start = dt.datetime.utcnow()

            # scraper order matters
            order = ('legislators', 'committees', 'votes', 'bills',
                     'events', 'speeches')
            _traceback = None
            try:
                for stype in order:
                    if stype in args.types:
                        run_record += _run_scraper(stype, args, metadata)
            except Exception as e:
                _traceback = _, _, exc_traceback = sys.exc_info()
                run_record += [{"exception": e, "type": stype}]
                lex = e

            exec_end = dt.datetime.utcnow()
            exec_record['started'] = exec_start
开发者ID:VersaHQ,项目名称:billy,代码行数:67,代码来源:update.py

示例8: Scraper

class Scraper(scrapelib.Scraper):
    """ Base class for all Scrapers

    Provides several useful methods for retrieving URLs and checking
    arguments against metadata.
    """

    __metaclass__ = ScraperMeta

    latest_only = False

    def __init__(self, metadata, output_dir=None, strict_validation=None, fastmode=False, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        kwargs["cache_obj"] = scrapelib.FileCache(settings.BILLY_CACHE_DIR)
        kwargs["requests_per_minute"] = settings.SCRAPELIB_RPM
        kwargs["timeout"] = settings.SCRAPELIB_TIMEOUT
        kwargs["retry_attempts"] = settings.SCRAPELIB_RETRY_ATTEMPTS
        kwargs["retry_wait_seconds"] = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        if fastmode:
            kwargs["requests_per_minute"] = 0
            kwargs["cache_write_only"] = False

        super(Scraper, self).__init__(**kwargs)

        self.metadata = metadata
        self.output_dir = output_dir
        self.output_names = set()

        # make output_dir
        os.path.isdir(self.output_dir) or os.path.makedirs(self.output_dir)

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()
        self._schema = {}
        self._load_schemas()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.info = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning
        self.error = self.logger.error
        self.critical = self.logger.critical

    def _load_schemas(self):
        """ load all schemas into schema dict """

        types = ("bill", "committee", "person", "vote", "event", "speech")

        for type in types:
            schema_path = os.path.join(os.path.split(__file__)[0], "../schemas/%s.json" % type)
            self._schema[type] = json.load(open(schema_path))
            self._schema[type]["properties"][settings.LEVEL_FIELD] = {"maxLength": 2, "minLength": 2, "type": "string"}

        # bills & votes
        self._schema["bill"]["properties"]["session"]["enum"] = self.all_sessions()
        self._schema["vote"]["properties"]["session"]["enum"] = self.all_sessions()

        # legislators
        terms = [t["name"] for t in self.metadata["terms"]]
        self._schema["person"]["properties"]["roles"]["items"]["properties"]["term"]["enum"] = terms

    @property
    def object_count(self):
        # number of distinct output filenames
        return len(self.output_names)

    def validate_json(self, obj):
        try:
            self.validator.validate(obj, self._schema[obj["_type"]])
        except ValueError as ve:
            self.warning(str(ve))
            if self.strict_validation:
                raise ve

    def all_sessions(self):
        sessions = []
        for t in self.metadata["terms"]:
            sessions.extend(t["sessions"])
        return sessions

    def validate_session(self, session, latest_only=False):
        """ Check that a session is present in the metadata dictionary.

        raises :exc:`~billy.scrape.NoDataForPeriod` if session is invalid

        :param session:  string representing session to check
#.........这里部分代码省略.........
开发者ID:rhymeswithcycle,项目名称:billy,代码行数:101,代码来源:__init__.py

示例9: Scraper

class Scraper(scrapelib.Scraper):
    """ Base class for all Scrapers

    Provides several useful methods for retrieving URLs and checking
    arguments against metadata.
    """

    __metaclass__ = ScraperMeta

    latest_only = False

    def __init__(self, metadata, output_dir=None, strict_validation=None,
                 fastmode=False, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this scraper
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        kwargs['cache_obj'] = scrapelib.FileCache(settings.BILLY_CACHE_DIR)
        kwargs['requests_per_minute'] = settings.SCRAPELIB_RPM
        kwargs['timeout'] = settings.SCRAPELIB_TIMEOUT
        kwargs['retry_attempts'] = settings.SCRAPELIB_RETRY_ATTEMPTS
        kwargs['retry_wait_seconds'] = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        if fastmode:
            kwargs['requests_per_minute'] = 0
            kwargs['cache_write_only'] = False

        super(Scraper, self).__init__(**kwargs)

        for f in settings.BILLY_LEVEL_FIELDS[self.level]:
            if not hasattr(self, f):
                raise Exception('%s scrapers must have a %s attribute' % (
                    self.level, f))

        self.metadata = metadata
        self.output_dir = output_dir

        # make output_dir
        os.path.isdir(self.output_dir) or os.path.makedirs(self.output_dir)

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning

    def validate_json(self, obj):
        if not hasattr(self, '_schema'):
            self._schema = self._get_schema()
        try:
            self.validator.validate(obj, self._schema)
        except ValueError as ve:
            self.warning(str(ve))
            if self.strict_validation:
                raise ve

    def all_sessions(self):
        sessions = []
        for t in self.metadata['terms']:
            sessions.extend(t['sessions'])
        return sessions

    def validate_session(self, session):
        """ Check that a session is present in the metadata dictionary.

        raises :exc:`~billy.scrape.NoDataForPeriod` if session is invalid

        :param session:  string representing session to check
        """
        for t in self.metadata['terms']:
            if session in t['sessions']:
                return True
        raise NoDataForPeriod(session)

    def validate_term(self, term, latest_only=False):
        """ Check that a term is present in the metadata dictionary.

        raises :exc:`~billy.scrape.NoDataForPeriod` if term is invalid

        :param term:        string representing term to check
        :param latest_only: if True, will raise exception if term is not
                            the current term (default: False)
        """

        if latest_only:
            if term == self.metadata['terms'][-1]['name']:
                return True
            else:
                raise NoDataForPeriod(term)
#.........这里部分代码省略.........
开发者ID:annerajb,项目名称:billy,代码行数:101,代码来源:__init__.py

示例10: Scraper

class Scraper(scrapelib.Scraper):
    """ Base class for all Scrapers

    Provides several useful methods for retrieving URLs and checking
    arguments against metadata.
    """

    __metaclass__ = ScraperMeta

    def __init__(self, metadata, no_cache=False, output_dir=None,
                 strict_validation=None, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this state
        :param no_cache: if True, will ignore any cached downloads
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        if no_cache:
            kwargs['cache_dir'] = None
        elif 'cache_dir' not in kwargs:
            kwargs['cache_dir'] = settings.BILLY_CACHE_DIR

        if 'error_dir' not in kwargs:
            kwargs['error_dir'] = settings.BILLY_ERROR_DIR

        if 'timeout' not in kwargs:
            kwargs['timeout'] = settings.SCRAPELIB_TIMEOUT

        if 'requests_per_minute' not in kwargs:
            kwargs['requests_per_minute'] = None

        if 'retry_attempts' not in kwargs:
            kwargs['retry_attempts'] = settings.SCRAPELIB_RETRY_ATTEMPTS

        if 'retry_wait_seconds' not in kwargs:
            kwargs['retry_wait_seconds'] = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        super(Scraper, self).__init__(**kwargs)

        if not hasattr(self, 'state'):
            raise Exception('Scrapers must have a state attribute')

        self.metadata = metadata
        self.output_dir = output_dir

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning

    def validate_json(self, obj):
        if not hasattr(self, '_schema'):
            self._schema = self._get_schema()
        try:
            self.validator.validate(obj, self._schema)
        except ValueError, ve:
            self.warning(str(ve))
            if self.strict_validation:
                raise ve
开发者ID:djangolackey,项目名称:openstates,代码行数:70,代码来源:__init__.py

示例11: Scraper

class Scraper(scrapelib.Scraper):
    """ Base class for all Scrapers

    Provides several useful methods for retrieving URLs and checking
    arguments against metadata.
    """

    __metaclass__ = ScraperMeta

    def __init__(self, metadata, no_cache=False, output_dir=None,
                 strict_validation=None, **kwargs):
        """
        Create a new Scraper instance.

        :param metadata: metadata for this state
        :param no_cache: if True, will ignore any cached downloads
        :param output_dir: the data directory to use
        :param strict_validation: exit immediately if validation fails
        """

        # configure underlying scrapelib object
        if no_cache:
            kwargs['cache_dir'] = None
        elif 'cache_dir' not in kwargs:
            kwargs['cache_dir'] = settings.BILLY_CACHE_DIR

        if 'error_dir' not in kwargs:
            kwargs['error_dir'] = settings.BILLY_ERROR_DIR

        if 'timeout' not in kwargs:
            kwargs['timeout'] = settings.SCRAPELIB_TIMEOUT

        if 'requests_per_minute' not in kwargs:
            kwargs['requests_per_minute'] = None

        if 'retry_attempts' not in kwargs:
            kwargs['retry_attempts'] = settings.SCRAPELIB_RETRY_ATTEMPTS

        if 'retry_wait_seconds' not in kwargs:
            kwargs['retry_wait_seconds'] = settings.SCRAPELIB_RETRY_WAIT_SECONDS

        super(Scraper, self).__init__(**kwargs)

        if not hasattr(self, 'state'):
            raise Exception('Scrapers must have a state attribute')

        self.metadata = metadata
        self.output_dir = output_dir

        # validation
        self.strict_validation = strict_validation
        self.validator = DatetimeValidator()

        self.follow_robots = False

        # logging convenience methods
        self.logger = logging.getLogger("billy")
        self.log = self.logger.info
        self.debug = self.logger.debug
        self.warning = self.logger.warning

    def validate_json(self, obj):
        if not hasattr(self, '_schema'):
            self._schema = self._get_schema()
        try:
            self.validator.validate(obj, self._schema)
        except ValueError as ve:
            self.warning(str(ve))
            if self.strict_validation:
                raise ve

    def all_sessions(self):
        sessions = []
        for t in self.metadata['terms']:
            sessions.extend(t['sessions'])
        return sessions

    def validate_session(self, session):
        """ Check that a session is present in the metadata dictionary.

        raises :exc:`~billy.scrape.NoDataForPeriod` if session is invalid

        :param session:  string representing session to check
        """
        for t in self.metadata['terms']:
            if session in t['sessions']:
                return True
        raise NoDataForPeriod(session)

    def validate_term(self, term, latest_only=False):
        """ Check that a term is present in the metadata dictionary.

        raises :exc:`~billy.scrape.NoDataForPeriod` if term is invalid

        :param term:        string representing term to check
        :param latest_only: if True, will raise exception if term is not
                            the current term (default: False)
        """

        if latest_only:
#.........这里部分代码省略.........
开发者ID:AlokKothari,项目名称:openstates,代码行数:101,代码来源:__init__.py

示例12: main


#.........这里部分代码省略.........
            if 'speeches' in metadata['feature_flags']:
                args.types.append('speeches')

        plan = """billy-update abbr=%s
    actions=%s
    types=%s
    sessions=%s
    terms=%s""" % (args.module, ','.join(args.actions), ','.join(args.types),
                   ','.join(args.sessions), ','.join(args.terms))
        _log.info(plan)
        scrape_data = {}

        if args.billid is False:
            _log.debug("No billid filter.")
        else:
            _log.debug("Search for billid: %s" % args.billid)

        if 'scrape' in args.actions:
            _clear_scraped_data(args.output_dir)

            # validate then write metadata
            if hasattr(module, 'session_list'):
                session_list = module.session_list()
            else:
                session_list = []
            check_sessions(metadata, session_list)

            _log.debug("Session List %s" % session_list)
            try:
                schema_path = os.path.join(
                    os.path.split(__file__)[0],
                    '../schemas/metadata.json')
                schema = json.load(open(schema_path))
                validator = DatetimeValidator()
                validator.validate(metadata, schema)
            except ValueError as e:
                _log.warning(
                    'metadata validation error: ' + str(e))

            with open(os.path.join(args.output_dir, 'metadata.json'),
                      'w') as f:
                json.dump(metadata, f, cls=JSONDateEncoder)

            run_record = []
            exec_record = {
                "run_record": run_record,
                "args": sys.argv,
                "state": abbrev
            }

            lex = None
            exc_traceback = None

            # start to run scrapers
            exec_start = dt.datetime.utcnow()

            # scraper order matters
            if args.billid is False:
                order = (
                    'legislators',
                    'committees',
                    'votes',
                    'bills',
                    'events',
                    'speeches')
            else:
开发者ID:jmdupont,项目名称:billy,代码行数:67,代码来源:update.py


注:本文中的billy.scrape.validator.DatetimeValidator类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。