当前位置: 首页>>代码示例>>Python>>正文


Python detection.AutoDetectLanguage类代码示例

本文整理汇总了Python中dateparser.languages.detection.AutoDetectLanguage的典型用法代码示例。如果您正苦于以下问题:Python AutoDetectLanguage类的具体用法?Python AutoDetectLanguage怎么用?Python AutoDetectLanguage使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了AutoDetectLanguage类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_should_accept_dates_in_different_languages

    def test_should_accept_dates_in_different_languages(self):
        date_fixtures = [
            (u'13 Ago, 2014', datetime(2014, 8, 13)),
            (u'13 Septiembre, 2014', datetime(2014, 9, 13)),
            (u'13 Setembro, 2014', datetime(2014, 9, 13)),
        ]
        parser = AutoDetectLanguage(None, allow_redetection=True)

        for date_string, correct_date in date_fixtures:
            parsed_date = parser.parse(date_string, None)
            self.assertEqual(correct_date.date(), parsed_date.date())
开发者ID:mevigour,项目名称:dateparser,代码行数:11,代码来源:test_date_parser.py

示例2: __init__

    def __init__(self, languages=None, allow_redetect_language=False):
        if isinstance(languages, (list, tuple, collections.Set)):
            available_language_map = default_language_loader.get_language_map()

            if all([language in available_language_map for language in languages]):
                languages = [available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(available_language_map.keys())
                raise ValueError("Unknown language(s) %r" % ', '.join(unsupported_languages))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)"  % type(languages))

        if allow_redetect_language:
            self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
                                                        allow_redetection=True)
        elif languages:
            self.language_detector = ExactLanguages(languages=languages)
        else:
            self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)
开发者ID:mevigour,项目名称:dateparser,代码行数:19,代码来源:date.py

示例3: AutoDetectLanguageTest

class AutoDetectLanguageTest(unittest.TestCase):

    def setUp(self):
        self.parser = AutoDetectLanguage()

    def test_detect_language(self):
        self.assertItemsEqual(['es', 'pt'],
                              map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 abril 2010')))
        self.assertItemsEqual(['es'],
                              map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 junio 2010')))

    @unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests '
                   'for parsing this dates should be created separately to not reduce the coverage')
    def test_should_reduce_possible_languages_and_reject_different(self):
        dates_in_spanish = [
            (u'13 Ago, 2014', datetime(2014, 8, 13)),
            (u'13 Septiembre, 2014', datetime(2014, 9, 13)),
        ]

        for date_string, correct_date in dates_in_spanish:
            parsed_date = self.parser.parse(date_string, None)
            self.assertEqual(correct_date.date(), parsed_date.date())

        with self.assertRaisesRegexp(ValueError, 'Invalid date'):
            portuguese_date = u'13 Setembro, 2014'
            self.parser.parse(portuguese_date, None)

    @unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests '
                   'for parsing this dates should be created separately to not reduce the coverage')
    def test_should_accept_dates_in_different_languages(self):
        date_fixtures = [
            (u'13 Ago, 2014', datetime(2014, 8, 13)),
            (u'13 Septiembre, 2014', datetime(2014, 9, 13)),
            (u'13 Setembro, 2014', datetime(2014, 9, 13)),
        ]
        parser = AutoDetectLanguage(None, allow_redetection=True)

        for date_string, correct_date in date_fixtures:
            parsed_date = parser.parse(date_string, None)
            self.assertEqual(correct_date.date(), parsed_date.date())
开发者ID:mevigour,项目名称:dateparser,代码行数:40,代码来源:test_date_parser.py

示例4: DateDataParser

class DateDataParser(object):

    def __init__(self, languages=None, allow_redetect_language=False):
        if isinstance(languages, (list, tuple, collections.Set)):
            available_language_map = default_language_loader.get_language_map()

            if all([language in available_language_map for language in languages]):
                languages = [available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(available_language_map.keys())
                raise ValueError("Unknown language(s) %r" % ', '.join(unsupported_languages))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)"  % type(languages))

        if allow_redetect_language:
            self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
                                                        allow_redetection=True)
        elif languages:
            self.language_detector = ExactLanguages(languages=languages)
        else:
            self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)

    def get_date_data(self, date_string, date_formats=None):
        """ Return a dictionary with a date object and a period.
        Period values can be a 'day' (default), 'week', 'month', 'year'.
        It aims to solve the following issue:
        In example, a forum could displays "2 weeks ago" in the thread list
        (in the thread itself there's the right date) so the engine
        will translate "2 weeks ago" to a certain date.
        The next thread summary displays "3 weeks ago" which is translated
        to a other date seven days before first date.
        A valid date_string between both dates won't be scraped because
        it's not an exact date match. The period field helps to build
        better date range detection.

        TODO: Timezone issues

        """
        date_string = date_string.strip()
        date_string = sanitize_date(date_string)

        for language in self.language_detector.iterate_applicable_languages(
                date_string, modify=True):
            parsed_date = _DateLanguageParser.parse(language, date_string, date_formats)
            if parsed_date:
                return parsed_date
        else:
            return {'date_obj': None, 'period': 'day'}
开发者ID:mevigour,项目名称:dateparser,代码行数:48,代码来源:date.py

示例5: AutoDetectLanguageTest

class AutoDetectLanguageTest(BaseTestCase):
    def setUp(self):
        super(AutoDetectLanguageTest, self).setUp()

        # Just a known subset so we can rely on test outcomes. Feel free to add, but not exclude or change order.
        self.known_languages = ['en', 'fr', 'es', 'pt', 'ru', 'tr', 'cs']

        self.parser = NotImplemented
        self.detected_languages = NotImplemented

    @parameterized.expand([
        param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']),
        param(date_strings=["11 junio 2010"], expected_languages=['es']),
        param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']),
    ])
    def test_detect_languages(self, date_strings, expected_languages):
        self.given_parser(languages=self.known_languages)
        self.when_all_languages_are_detected(date_strings)
        self.then_detected_languages_are(expected_languages)

    @parameterized.expand([
        param(date_strings=["11 abril 2010"], expected_language='es'),
        param(date_strings=["11 junio 2010"], expected_language='es'),
        param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'),
    ])
    def test_exclude_ineligible_languages_with_modify(self, date_strings, expected_language):
        self.given_parser(languages=self.known_languages)
        self.when_one_language_is_detected(date_strings, modify=True)
        self.then_detected_languages_are([expected_language])
        self.then_parser_languages_are(self.known_languages[self.known_languages.index(expected_language):])

    @parameterized.expand([
        param(date_strings=["11 abril 2010"], expected_language='es'),
        param(date_strings=["11 junio 2010"], expected_language='es'),
        param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'),
    ])
    def test_do_not_exclude_ineligible_languages_without_modify(self, date_strings, expected_language):
        self.given_parser(languages=self.known_languages)
        self.when_one_language_is_detected(date_strings, modify=False)
        self.then_detected_languages_are([expected_language])
        self.then_parser_languages_are(self.known_languages)

    @parameterized.expand([
        param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']),
        param(date_strings=["11 junio 2010"], expected_languages=['es']),
        param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']),
        param(date_strings=["13 Srpen, 2014"], expected_languages=['cs']),
    ])
    def test_do_not_exclude_ineligible_languages_when_all_ineligible(self, date_strings, expected_languages):
        self.given_parser(languages=self.known_languages)
        self.when_all_languages_are_detected(date_strings, modify=True)
        self.then_detected_languages_are(expected_languages)
        self.then_parser_languages_are(self.known_languages)

    @parameterized.expand([
        param(language='es', date_strings=["13 Setembro, 2014"]),
        param(language='cs', date_strings=["'11 Ağustos, 2014'"]),
    ])
    def test_reject_dates_in_other_languages_without_redetection(self, language, date_strings):
        self.given_parser(languages=self.known_languages)
        self.given_parser_languages_are([language])
        self.when_all_languages_are_detected(date_strings)
        self.then_detected_languages_are([])

    @parameterized.expand([
        param(detected_languages=['es'], date_strings=['13 Juillet, 2014'], expected_languages=['fr']),
        param(detected_languages=['es'], date_strings=['11 Ağustos, 2014'], expected_languages=['tr']),
    ])
    def test_accept_dates_in_other_languages_with_redetection_enabled(
        self, detected_languages, date_strings, expected_languages
    ):
        self.given_parser(languages=self.known_languages, allow_redetection=True)
        self.given_parser_languages_are(detected_languages)
        self.when_all_languages_are_detected(date_strings)
        self.then_detected_languages_are(expected_languages)

    def test_accept_numeric_dates_without_redetection(self,):
        self.given_parser(languages=self.known_languages)
        self.given_parser_languages_are(['es'])
        self.when_all_languages_are_detected(['13/08/2014'])
        self.then_detected_languages_are(['es'])

    def given_parser(self, languages=None, allow_redetection=False):
        if languages is not None:
            language_map = default_language_loader.get_language_map()
            languages = [language_map[language]
                         for language in languages]
        self.parser = AutoDetectLanguage(languages, allow_redetection=allow_redetection)

    def given_parser_languages_are(self, languages):
        language_map = default_language_loader.get_language_map()
        self.parser.languages = [language_map[language]
                                 for language in languages]

    def when_all_languages_are_detected(self, date_strings, modify=False):
        assert not isinstance(date_strings, six.string_types)
        for date_string in date_strings:
            if settings.NORMALIZE:
                date_string = normalize_unicode(date_string)
            detected_languages = list(self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings))
#.........这里部分代码省略.........
开发者ID:brechmos,项目名称:dateparser,代码行数:101,代码来源:test_date_parser.py

示例6: given_parser

 def given_parser(self, languages=None, allow_redetection=False):
     if languages is not None:
         language_map = default_language_loader.get_language_map()
         languages = [language_map[language]
                      for language in languages]
     self.parser = AutoDetectLanguage(languages, allow_redetection=allow_redetection)
开发者ID:brechmos,项目名称:dateparser,代码行数:6,代码来源:test_date_parser.py

示例7: DateDataParser

class DateDataParser(object):
    """
    Class which handles language detection, translation and subsequent generic parsing of
    string representing date and/or time.

    :param languages:
            A list of two letters language codes, e.g. ['en', 'es'].
            If languages are given, it will not attempt to detect the language.
    :type languages: list

    :param allow_redetect_language:
            Enables/disables language re-detection.
    :type allow_redetect_language: bool

    :param settings:
           Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
    :type settings: dict

    :return: A parser instance

    :raises:
            ValueError - Unknown Language, TypeError - Languages argument must be a list
    """
    language_loader = None

    @apply_settings
    def __init__(self, languages=None, allow_redetect_language=False, settings=None):
        self._settings = settings
        available_language_map = self._get_language_loader().get_language_map()

        if isinstance(languages, (list, tuple, collections.Set)):

            if all([language in available_language_map for language in languages]):
                languages = [available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(available_language_map.keys())
                raise ValueError(
                    "Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if allow_redetect_language:
            self.language_detector = AutoDetectLanguage(
                languages if languages else list(available_language_map.values()),
                allow_redetection=True)
        elif languages:
            self.language_detector = ExactLanguages(languages=languages)
        else:
            self.language_detector = AutoDetectLanguage(
                list(available_language_map.values()), allow_redetection=False)

    def get_date_data(self, date_string, date_formats=None):
        """
        Parse string representing date and/or time in recognizable localized formats.
        Supports parsing multiple languages and timezones.

        :param date_string:
            A string representing date and/or time in a recognizably valid format.
        :type date_string: str|unicode
        :param date_formats:
            A list of format strings using directives as given
            `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
            The parser applies formats one by one, taking into account the detected languages.
        :type date_formats: list

        :return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example:
            {'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'}

        :raises: ValueError - Unknown Language

        .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'.

        *Period* represents the granularity of date parsed from the given string.

        In the example below, since no day information is present, the day is assumed to be current
        day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
        Hence, the level of precision is ``month``:

            >>> DateDataParser().get_date_data(u'March 2015')
            {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'}

        Similarly, for date strings with no day and month information present, level of precision
        is ``year`` and day ``16`` and month ``6`` are from *current_date*.

            >>> DateDataParser().get_date_data(u'2014')
            {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'}

        Dates with time zone indications or UTC offsets are returned in UTC time unless
        specified using `Settings`_.

            >>> DateDataParser().get_date_data(u'23 March 2000, 1:21 PM CET')
            {'date_obj': datetime.datetime(2000, 3, 23, 14, 21), 'period': 'day'}

        """
        try:
            date_string = date_string.strip()
        except AttributeError:
            raise TypeError('Input type must be str or unicode')
        if self._settings.NORMALIZE:
            date_string = normalize_unicode(date_string)
#.........这里部分代码省略.........
开发者ID:mrgrigorii,项目名称:dateparser,代码行数:101,代码来源:date.py

示例8: DateDataParser

class DateDataParser(object):
    """
    Class which handles language detection, translation and subsequent generic parsing of
    string representing date and/or time.

    :param languages:
            A list of two letters language codes.e.g. ['en', 'es'].
            If languages are given, it will not attempt to detect the language.
    :type languages: list

    :param allow_redetect_language:
            Enables/disables language re-detection.
    :type allow_redetect_language: bool

    :return: A parser instance

    :raises:
            ValueError - Unknown Language, TypeError - Languages argument must be a list
    """

    def __init__(self, languages=None, allow_redetect_language=False):
        if isinstance(languages, (list, tuple, collections.Set)):
            available_language_map = default_language_loader.get_language_map()

            if all([language in available_language_map for language in languages]):
                languages = [available_language_map[language] for language in languages]
            else:
                unsupported_languages = set(languages) - set(available_language_map.keys())
                raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
        elif languages is not None:
            raise TypeError("languages argument must be a list (%r given)" % type(languages))

        if allow_redetect_language:
            self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
                                                        allow_redetection=True)
        elif languages:
            self.language_detector = ExactLanguages(languages=languages)
        else:
            self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)

    def get_date_data(self, date_string, date_formats=None):
        """
        Parse string representing date and/or time in recognizeable localized formats.
        Supports parsing multiple languages.

        :param date_string:
            A string representing date and/or time in a recognizably valid format.
        :type date_string: str|unicode
        :param date_formats:
            A list of format strings using directives as given
            `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
            The parser applies formats one by one, taking into account the detected languages.
        :type date_formats: list

        :return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example:
            {'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'}

        :raises: ValueError - Unknown Language

        .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'.

        *Period* represent the granularity of date parsed from the given string.

        In the example below, since no day information is present, the day is assumed to be current
        day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
        Hence, the level of precision is ``month``.

            >>> DateDataParser().get_date_data(u'March 2015')
            {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'}

        Similarly, for date strings with no day and month information present, level of precision
        is ``year`` and day ``16`` and month ``6`` are from *current_date*.

            >>> DateDataParser().get_date_data(u'2014')
            {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'}

        TODO: Timezone issues

        """
        date_string = date_string.strip()
        date_string = sanitize_date(date_string)

        for language in self.language_detector.iterate_applicable_languages(
                date_string, modify=True):
            parsed_date = _DateLanguageParser.parse(language, date_string, date_formats)
            if parsed_date:
                return parsed_date
        else:
            return {'date_obj': None, 'period': 'day'}
开发者ID:MojoJolo,项目名称:dateparser,代码行数:89,代码来源:date.py

示例9: setUp

 def setUp(self):
     self.parser = AutoDetectLanguage()
开发者ID:mevigour,项目名称:dateparser,代码行数:2,代码来源:test_date_parser.py


注:本文中的dateparser.languages.detection.AutoDetectLanguage类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。