本文整理汇总了Python中babelfish.Language.fromguessit方法的典型用法代码示例。如果您正苦于以下问题:Python Language.fromguessit方法的具体用法?Python Language.fromguessit怎么用?Python Language.fromguessit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类babelfish.Language
的用法示例。
在下文中一共展示了Language.fromguessit方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reverse
# 需要导入模块: from babelfish import Language [as 别名]
# 或者: from babelfish.Language import fromguessit [as 别名]
def reverse(self, name):
with_country = (GuessitConverter._with_country_regexp.match(name) or
GuessitConverter._with_country_regexp2.match(name))
name = u(name.lower())
if with_country:
lang = Language.fromguessit(with_country.group(1).strip())
lang.country = babelfish.Country.fromguessit(with_country.group(2).strip())
return (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script or None)
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:
return self.guessit_exceptions[name]
except KeyError:
pass
for conv in [babelfish.Language,
babelfish.Language.fromalpha3b,
babelfish.Language.fromalpha2,
babelfish.Language.fromname,
babelfish.Language.fromopensubtitles]:
try:
c = conv(name)
return c.alpha3, c.country, c.script
except (ValueError, babelfish.LanguageReverseError):
pass
raise babelfish.LanguageReverseError(name)
示例2: find_possible_languages
# 需要导入模块: from babelfish import Language [as 别名]
# 或者: from babelfish.Language import fromguessit [as 别名]
def find_possible_languages(string):
"""Find possible languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
words = find_words(string)
valid_words = []
for word in words:
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitleLanguage'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(suffix)]
key = 'subtitleLanguage'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if not lang_word in LNG_COMMON_WORDS:
try:
lang = Language.fromguessit(lang_word)
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
if lang == 'mul' or hasattr(lang, 'alpha2'):
valid_words.append((key, lang, lang_word, word))
except babelfish.Error:
pass
return valid_words
示例3: guess_language
# 需要导入模块: from babelfish import Language [as 别名]
# 或者: from babelfish.Language import fromguessit [as 别名]
def guess_language(text): # pragma: no cover
"""Guess the language in which a body of text is written.
This uses the external guess-language python module, and will fail and return
Language(Undetermined) if it is not installed.
"""
try:
from guess_language import guessLanguage
return Language.fromguessit(guessLanguage(text))
except ImportError:
log.error('Cannot detect the language of the given text body, missing dependency: guess-language')
log.error('Please install it from PyPI, by doing eg: pip install guess-language')
return UNDETERMINED
示例4: search_language
# 需要导入模块: from babelfish import Language [as 别名]
# 或者: from babelfish.Language import fromguessit [as 别名]
def search_language(string, lang_filter=None):
"""Looks for language patterns, and if found return the language object,
its group span and an associated confidence.
you can specify a list of allowed languages using the lang_filter argument,
as in lang_filter = [ 'fr', 'eng', 'spanish' ]
>>> search_language('movie [en].avi')['language']
<Language [en]>
>>> search_language('the zen fat cat and the gay mad men got a new fan', lang_filter = ['en', 'fr', 'es'])
"""
if lang_filter:
lang_filter = set(Language.fromguessit(lang) for lang in lang_filter)
confidence = 1.0 # for all of them
for prop, language, lang, word in find_possible_languages(string):
pos = string.find(word)
end = pos + len(word)
if lang_filter and language not in lang_filter:
continue
# only allow those languages that have a 2-letter code, those that
# don't are too esoteric and probably false matches
#if language.lang not in lng3_to_lng2:
# continue
# confidence depends on alpha2, alpha3, english name, ...
if len(lang) == 2:
confidence = 0.8
elif len(lang) == 3:
confidence = 0.9
elif prop == 'subtitleLanguage':
confidence = 0.6 # Subtitle prefix found with language
else:
# Note: we could either be really confident that we found a
# language or assume that full language names are too
# common words and lower their confidence accordingly
confidence = 0.3 # going with the low-confidence route here
return Guess({prop: language}, confidence=confidence, input=string, span=(pos, end))
return None
示例5: find_possible_languages
# 需要导入模块: from babelfish import Language [as 别名]
# 或者: from babelfish.Language import fromguessit [as 别名]
def find_possible_languages(string, allowed_languages=None):
"""Find possible languages in the string
:return: list of tuple (property, Language, lang_word, word)
"""
common_words = None
if allowed_languages:
common_words = LNG_COMMON_WORDS_STRICT
else:
common_words = LNG_COMMON_WORDS
words = find_words(string)
valid_words = []
for word in words:
lang_word = word.lower()
key = 'language'
for prefix in subtitle_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
key = 'subtitleLanguage'
for suffix in subtitle_suffixes:
if lang_word.endswith(suffix):
lang_word = lang_word[:len(suffix)]
key = 'subtitleLanguage'
for prefix in lang_prefixes:
if lang_word.startswith(prefix):
lang_word = lang_word[len(prefix):]
if lang_word not in common_words and word.lower() not in common_words:
try:
lang = Language.fromguessit(lang_word)
if allowed_languages:
if lang.name.lower() in allowed_languages or lang.alpha2.lower() in allowed_languages or lang.alpha3.lower() in allowed_languages:
valid_words.append((key, lang, lang_word, word))
# Keep language with alpha2 equivalent. Others are probably
# uncommon languages.
elif lang == 'mul' or hasattr(lang, 'alpha2'):
valid_words.append((key, lang, lang_word, word))
except babelfish.Error:
pass
return valid_words