本文整理汇总了Python中dateparser.languages.detection.AutoDetectLanguage.iterate_applicable_languages方法的典型用法代码示例。如果您正苦于以下问题:Python AutoDetectLanguage.iterate_applicable_languages方法的具体用法?Python AutoDetectLanguage.iterate_applicable_languages怎么用?Python AutoDetectLanguage.iterate_applicable_languages使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dateparser.languages.detection.AutoDetectLanguage
的用法示例。
在下文中一共展示了AutoDetectLanguage.iterate_applicable_languages方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: AutoDetectLanguageTest
# 需要导入模块: from dateparser.languages.detection import AutoDetectLanguage [as 别名]
# 或者: from dateparser.languages.detection.AutoDetectLanguage import iterate_applicable_languages [as 别名]
class AutoDetectLanguageTest(unittest.TestCase):
def setUp(self):
self.parser = AutoDetectLanguage()
def test_detect_language(self):
self.assertItemsEqual(['es', 'pt'],
map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 abril 2010')))
self.assertItemsEqual(['es'],
map(attrgetter('shortname'), self.parser.iterate_applicable_languages('11 junio 2010')))
@unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests '
'for parsing this dates should be created separately to not reduce the coverage')
def test_should_reduce_possible_languages_and_reject_different(self):
dates_in_spanish = [
(u'13 Ago, 2014', datetime(2014, 8, 13)),
(u'13 Septiembre, 2014', datetime(2014, 9, 13)),
]
for date_string, correct_date in dates_in_spanish:
parsed_date = self.parser.parse(date_string, None)
self.assertEqual(correct_date.date(), parsed_date.date())
with self.assertRaisesRegexp(ValueError, 'Invalid date'):
portuguese_date = u'13 Setembro, 2014'
self.parser.parse(portuguese_date, None)
@unittest.skip('This test should only be testing detecting languages, not parsing them. Although tests '
'for parsing this dates should be created separately to not reduce the coverage')
def test_should_accept_dates_in_different_languages(self):
date_fixtures = [
(u'13 Ago, 2014', datetime(2014, 8, 13)),
(u'13 Septiembre, 2014', datetime(2014, 9, 13)),
(u'13 Setembro, 2014', datetime(2014, 9, 13)),
]
parser = AutoDetectLanguage(None, allow_redetection=True)
for date_string, correct_date in date_fixtures:
parsed_date = parser.parse(date_string, None)
self.assertEqual(correct_date.date(), parsed_date.date())
示例2: DateDataParser
# 需要导入模块: from dateparser.languages.detection import AutoDetectLanguage [as 别名]
# 或者: from dateparser.languages.detection.AutoDetectLanguage import iterate_applicable_languages [as 别名]
class DateDataParser(object):
def __init__(self, languages=None, allow_redetect_language=False):
if isinstance(languages, (list, tuple, collections.Set)):
available_language_map = default_language_loader.get_language_map()
if all([language in available_language_map for language in languages]):
languages = [available_language_map[language] for language in languages]
else:
unsupported_languages = set(languages) - set(available_language_map.keys())
raise ValueError("Unknown language(s) %r" % ', '.join(unsupported_languages))
elif languages is not None:
raise TypeError("languages argument must be a list (%r given)" % type(languages))
if allow_redetect_language:
self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
allow_redetection=True)
elif languages:
self.language_detector = ExactLanguages(languages=languages)
else:
self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)
def get_date_data(self, date_string, date_formats=None):
""" Return a dictionary with a date object and a period.
Period values can be a 'day' (default), 'week', 'month', 'year'.
It aims to solve the following issue:
In example, a forum could displays "2 weeks ago" in the thread list
(in the thread itself there's the right date) so the engine
will translate "2 weeks ago" to a certain date.
The next thread summary displays "3 weeks ago" which is translated
to a other date seven days before first date.
A valid date_string between both dates won't be scraped because
it's not an exact date match. The period field helps to build
better date range detection.
TODO: Timezone issues
"""
date_string = date_string.strip()
date_string = sanitize_date(date_string)
for language in self.language_detector.iterate_applicable_languages(
date_string, modify=True):
parsed_date = _DateLanguageParser.parse(language, date_string, date_formats)
if parsed_date:
return parsed_date
else:
return {'date_obj': None, 'period': 'day'}
示例3: AutoDetectLanguageTest
# 需要导入模块: from dateparser.languages.detection import AutoDetectLanguage [as 别名]
# 或者: from dateparser.languages.detection.AutoDetectLanguage import iterate_applicable_languages [as 别名]
class AutoDetectLanguageTest(BaseTestCase):
def setUp(self):
super(AutoDetectLanguageTest, self).setUp()
# Just a known subset so we can rely on test outcomes. Feel free to add, but not exclude or change order.
self.known_languages = ['en', 'fr', 'es', 'pt', 'ru', 'tr', 'cs']
self.parser = NotImplemented
self.detected_languages = NotImplemented
@parameterized.expand([
param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']),
param(date_strings=["11 junio 2010"], expected_languages=['es']),
param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']),
])
def test_detect_languages(self, date_strings, expected_languages):
self.given_parser(languages=self.known_languages)
self.when_all_languages_are_detected(date_strings)
self.then_detected_languages_are(expected_languages)
@parameterized.expand([
param(date_strings=["11 abril 2010"], expected_language='es'),
param(date_strings=["11 junio 2010"], expected_language='es'),
param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'),
])
def test_exclude_ineligible_languages_with_modify(self, date_strings, expected_language):
self.given_parser(languages=self.known_languages)
self.when_one_language_is_detected(date_strings, modify=True)
self.then_detected_languages_are([expected_language])
self.then_parser_languages_are(self.known_languages[self.known_languages.index(expected_language):])
@parameterized.expand([
param(date_strings=["11 abril 2010"], expected_language='es'),
param(date_strings=["11 junio 2010"], expected_language='es'),
param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_language='es'),
])
def test_do_not_exclude_ineligible_languages_without_modify(self, date_strings, expected_language):
self.given_parser(languages=self.known_languages)
self.when_one_language_is_detected(date_strings, modify=False)
self.then_detected_languages_are([expected_language])
self.then_parser_languages_are(self.known_languages)
@parameterized.expand([
param(date_strings=["11 abril 2010"], expected_languages=['es', 'pt']),
param(date_strings=["11 junio 2010"], expected_languages=['es']),
param(date_strings=["13 Ago, 2014", "13 Septiembre, 2014"], expected_languages=['es']),
param(date_strings=["13 Srpen, 2014"], expected_languages=['cs']),
])
def test_do_not_exclude_ineligible_languages_when_all_ineligible(self, date_strings, expected_languages):
self.given_parser(languages=self.known_languages)
self.when_all_languages_are_detected(date_strings, modify=True)
self.then_detected_languages_are(expected_languages)
self.then_parser_languages_are(self.known_languages)
@parameterized.expand([
param(language='es', date_strings=["13 Setembro, 2014"]),
param(language='cs', date_strings=["'11 Ağustos, 2014'"]),
])
def test_reject_dates_in_other_languages_without_redetection(self, language, date_strings):
self.given_parser(languages=self.known_languages)
self.given_parser_languages_are([language])
self.when_all_languages_are_detected(date_strings)
self.then_detected_languages_are([])
@parameterized.expand([
param(detected_languages=['es'], date_strings=['13 Juillet, 2014'], expected_languages=['fr']),
param(detected_languages=['es'], date_strings=['11 Ağustos, 2014'], expected_languages=['tr']),
])
def test_accept_dates_in_other_languages_with_redetection_enabled(
self, detected_languages, date_strings, expected_languages
):
self.given_parser(languages=self.known_languages, allow_redetection=True)
self.given_parser_languages_are(detected_languages)
self.when_all_languages_are_detected(date_strings)
self.then_detected_languages_are(expected_languages)
def test_accept_numeric_dates_without_redetection(self,):
self.given_parser(languages=self.known_languages)
self.given_parser_languages_are(['es'])
self.when_all_languages_are_detected(['13/08/2014'])
self.then_detected_languages_are(['es'])
def given_parser(self, languages=None, allow_redetection=False):
if languages is not None:
language_map = default_language_loader.get_language_map()
languages = [language_map[language]
for language in languages]
self.parser = AutoDetectLanguage(languages, allow_redetection=allow_redetection)
def given_parser_languages_are(self, languages):
language_map = default_language_loader.get_language_map()
self.parser.languages = [language_map[language]
for language in languages]
def when_all_languages_are_detected(self, date_strings, modify=False):
assert not isinstance(date_strings, six.string_types)
for date_string in date_strings:
if settings.NORMALIZE:
date_string = normalize_unicode(date_string)
detected_languages = list(self.parser.iterate_applicable_languages(date_string, modify=modify, settings=settings))
#.........这里部分代码省略.........
示例4: DateDataParser
# 需要导入模块: from dateparser.languages.detection import AutoDetectLanguage [as 别名]
# 或者: from dateparser.languages.detection.AutoDetectLanguage import iterate_applicable_languages [as 别名]
#.........这里部分代码省略.........
"""
language_loader = None
@apply_settings
def __init__(self, languages=None, allow_redetect_language=False, settings=None):
self._settings = settings
available_language_map = self._get_language_loader().get_language_map()
if isinstance(languages, (list, tuple, collections.Set)):
if all([language in available_language_map for language in languages]):
languages = [available_language_map[language] for language in languages]
else:
unsupported_languages = set(languages) - set(available_language_map.keys())
raise ValueError(
"Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
elif languages is not None:
raise TypeError("languages argument must be a list (%r given)" % type(languages))
if allow_redetect_language:
self.language_detector = AutoDetectLanguage(
languages if languages else list(available_language_map.values()),
allow_redetection=True)
elif languages:
self.language_detector = ExactLanguages(languages=languages)
else:
self.language_detector = AutoDetectLanguage(
list(available_language_map.values()), allow_redetection=False)
def get_date_data(self, date_string, date_formats=None):
"""
Parse string representing date and/or time in recognizable localized formats.
Supports parsing multiple languages and timezones.
:param date_string:
A string representing date and/or time in a recognizably valid format.
:type date_string: str|unicode
:param date_formats:
A list of format strings using directives as given
`here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
The parser applies formats one by one, taking into account the detected languages.
:type date_formats: list
:return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example:
{'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'}
:raises: ValueError - Unknown Language
.. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'.
*Period* represents the granularity of date parsed from the given string.
In the example below, since no day information is present, the day is assumed to be current
day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
Hence, the level of precision is ``month``:
>>> DateDataParser().get_date_data(u'March 2015')
{'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'}
Similarly, for date strings with no day and month information present, level of precision
is ``year`` and day ``16`` and month ``6`` are from *current_date*.
>>> DateDataParser().get_date_data(u'2014')
{'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'}
Dates with time zone indications or UTC offsets are returned in UTC time unless
specified using `Settings`_.
>>> DateDataParser().get_date_data(u'23 March 2000, 1:21 PM CET')
{'date_obj': datetime.datetime(2000, 3, 23, 14, 21), 'period': 'day'}
"""
try:
date_string = date_string.strip()
except AttributeError:
raise TypeError('Input type must be str or unicode')
if self._settings.NORMALIZE:
date_string = normalize_unicode(date_string)
date_string = sanitize_date(date_string)
for language in self.language_detector.iterate_applicable_languages(
date_string, modify=True, settings=self._settings):
parsed_date = _DateLanguageParser.parse(
language, date_string, date_formats, settings=self._settings)
if parsed_date:
return parsed_date
else:
return {'date_obj': None, 'period': 'day'}
def get_date_tuple(self, *args, **kwargs):
date_tuple = collections.namedtuple('DateData', 'date_obj period')
date_data = self.get_date_data(*args, **kwargs)
return date_tuple(**date_data)
@classmethod
def _get_language_loader(cls):
if not cls.language_loader:
cls.language_loader = LanguageDataLoader()
return cls.language_loader
示例5: DateDataParser
# 需要导入模块: from dateparser.languages.detection import AutoDetectLanguage [as 别名]
# 或者: from dateparser.languages.detection.AutoDetectLanguage import iterate_applicable_languages [as 别名]
class DateDataParser(object):
"""
Class which handles language detection, translation and subsequent generic parsing of
string representing date and/or time.
:param languages:
A list of two letters language codes.e.g. ['en', 'es'].
If languages are given, it will not attempt to detect the language.
:type languages: list
:param allow_redetect_language:
Enables/disables language re-detection.
:type allow_redetect_language: bool
:return: A parser instance
:raises:
ValueError - Unknown Language, TypeError - Languages argument must be a list
"""
def __init__(self, languages=None, allow_redetect_language=False):
if isinstance(languages, (list, tuple, collections.Set)):
available_language_map = default_language_loader.get_language_map()
if all([language in available_language_map for language in languages]):
languages = [available_language_map[language] for language in languages]
else:
unsupported_languages = set(languages) - set(available_language_map.keys())
raise ValueError("Unknown language(s): %s" % ', '.join(map(repr, unsupported_languages)))
elif languages is not None:
raise TypeError("languages argument must be a list (%r given)" % type(languages))
if allow_redetect_language:
self.language_detector = AutoDetectLanguage(languages=languages if languages else None,
allow_redetection=True)
elif languages:
self.language_detector = ExactLanguages(languages=languages)
else:
self.language_detector = AutoDetectLanguage(languages=None, allow_redetection=False)
def get_date_data(self, date_string, date_formats=None):
"""
Parse string representing date and/or time in recognizeable localized formats.
Supports parsing multiple languages.
:param date_string:
A string representing date and/or time in a recognizably valid format.
:type date_string: str|unicode
:param date_formats:
A list of format strings using directives as given
`here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
The parser applies formats one by one, taking into account the detected languages.
:type date_formats: list
:return: a dict mapping keys to :mod:`datetime.datetime` object and *period*. For example:
{'date_obj': datetime.datetime(2015, 6, 1, 0, 0), 'period': u'day'}
:raises: ValueError - Unknown Language
.. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year'.
*Period* represent the granularity of date parsed from the given string.
In the example below, since no day information is present, the day is assumed to be current
day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
Hence, the level of precision is ``month``.
>>> DateDataParser().get_date_data(u'March 2015')
{'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'}
Similarly, for date strings with no day and month information present, level of precision
is ``year`` and day ``16`` and month ``6`` are from *current_date*.
>>> DateDataParser().get_date_data(u'2014')
{'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'}
TODO: Timezone issues
"""
date_string = date_string.strip()
date_string = sanitize_date(date_string)
for language in self.language_detector.iterate_applicable_languages(
date_string, modify=True):
parsed_date = _DateLanguageParser.parse(language, date_string, date_formats)
if parsed_date:
return parsed_date
else:
return {'date_obj': None, 'period': 'day'}