本文整理汇总了Python中translate.lang.data.normalize_code函数的典型用法代码示例。如果您正苦于以下问题:Python normalize_code函数的具体用法?Python normalize_code怎么用?Python normalize_code使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了normalize_code函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: translate_unit
def translate_unit(self, unit_source, source_langs, target_langs):
"""return TM suggestions for unit_source"""
if isinstance(unit_source, bytes):
unit_source = unit_source.decode("utf-8")
if isinstance(source_langs, list):
source_langs = [data.normalize_code(lang) for lang in source_langs]
source_langs = ','.join(source_langs)
else:
source_langs = data.normalize_code(source_langs)
if isinstance(target_langs, list):
target_langs = [data.normalize_code(lang) for lang in target_langs]
target_langs = ','.join(target_langs)
else:
target_langs = data.normalize_code(target_langs)
minlen = min_levenshtein_length(len(unit_source), self.min_similarity)
maxlen = max_levenshtein_length(len(unit_source), self.min_similarity,
self.max_length)
# split source into words, remove punctuation and special
# chars, keep words that are at least 3 chars long
unit_words = STRIP_REGEXP.sub(' ', unit_source).split()
unit_words = list(filter(lambda word: len(word) > 2, unit_words))
if self.fulltext and len(unit_words) > 3:
logging.debug("fulltext matching")
query = """SELECT s.text, t.text, s.context, s.lang, t.lang FROM sources s JOIN targets t ON s.sid = t.sid JOIN fulltext f ON s.sid = f.docid
WHERE s.lang IN (?) AND t.lang IN (?) AND s.length BETWEEN ? AND ?
AND fulltext MATCH ?"""
search_str = " OR ".join(unit_words)
self.cursor.execute(query, (source_langs, target_langs, minlen,
maxlen, search_str))
else:
logging.debug("nonfulltext matching")
query = """SELECT s.text, t.text, s.context, s.lang, t.lang FROM sources s JOIN targets t ON s.sid = t.sid
WHERE s.lang IN (?) AND t.lang IN (?)
AND s.length >= ? AND s.length <= ?"""
self.cursor.execute(query, (source_langs, target_langs, minlen,
maxlen))
results = []
for row in self.cursor:
quality = self.comparer.similarity(unit_source, row[0],
self.min_similarity)
if quality >= self.min_similarity:
results.append({
'source': row[0],
'target': row[1],
'context': row[2],
'quality': quality,
})
results.sort(key=lambda match: match['quality'], reverse=True)
results = results[:self.max_candidates]
logging.debug("results: %s", six.text_type(results))
return results
示例2: get_language_supported
def get_language_supported(lang_code, supported):
normalized = data.normalize_code(data.simplify_to_common(lang_code))
if normalized in supported:
return normalized
# FIXME: horribly slow way of dealing with languages with @ in them
for lang in supported.keys():
if normalized == data.normalize_code(lang):
return lang
return None
示例3: gettargetlanguage
def gettargetlanguage(self):
"""Get the target language for this .qph file.
:return: ISO code e.g. af, fr, pt_BR
:rtype: String
"""
return data.normalize_code(self.header.get('language'))
示例4: get_alt_src_langs
def get_alt_src_langs(request, user, translation_project):
language = translation_project.language
project = translation_project.project
source_language = project.source_language
langs = user.alt_src_langs.exclude(
id__in=(language.id, source_language.id)
).filter(translationproject__project=project)
if not user.alt_src_langs.count():
from pootle_language.models import Language
accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
for accept_lang, unused in parse_accept_lang_header(accept):
if accept_lang == '*':
continue
simplified = data.simplify_to_common(accept_lang)
normalized = to_locale(data.normalize_code(simplified))
code = to_locale(accept_lang)
if (normalized in
('en', 'en_US', source_language.code, language.code) or
code in ('en', 'en_US', source_language.code, language.code)):
continue
langs = Language.objects.filter(
code__in=(normalized, code),
translationproject__project=project,
)
if langs.count():
break
return langs
示例5: get_alt_src_langs
def get_alt_src_langs(request, profile, translation_project):
language = translation_project.language
project = translation_project.project
source_language = project.source_language
langs = profile.alt_src_langs.exclude(id__in=(language.id, source_language.id)).filter(
translationproject__project=project
)
if not profile.alt_src_langs.count():
from pootle_language.models import Language
accept = request.META.get("HTTP_ACCEPT_LANGUAGE", "")
for accept_lang, unused in parse_accept_lang_header(accept):
if accept_lang == "*":
continue
normalized = to_locale(data.normalize_code(data.simplify_to_common(accept_lang)))
code = to_locale(accept_lang)
if normalized in ("en", "en_US", source_language.code, language.code) or code in (
"en",
"en_US",
source_language.code,
language.code,
):
continue
langs = Language.objects.filter(code__in=(normalized, code), translationproject__project=project)
if langs.count():
break
return langs
示例6: get_alt_src_langs
def get_alt_src_langs(request, user, translation_project):
if request.user.is_anonymous:
return
language = translation_project.language
project = translation_project.project
source_language = project.source_language
langs = list(
user.alt_src_langs.exclude(
id__in=(language.id, source_language.id)
).filter(
translationproject__project=project))
if langs:
return langs
accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
for accept_lang, __ in parse_accept_lang_header(accept):
if accept_lang == '*':
continue
normalized = to_locale(
data.normalize_code(
data.simplify_to_common(accept_lang)))
code = to_locale(accept_lang)
is_source_lang = any(
langcode in ('en', 'en_US', source_language.code, language.code)
for langcode in [code, normalized])
if is_source_lang:
continue
langs = list(
Language.objects.filter(
code__in=(normalized, code),
translationproject__project=project))
if langs:
return langs
示例7: gettargetlanguage
def gettargetlanguage(self):
"""Get the target language for this .ts file.
@return: ISO code e.g. af, fr, pt_BR
@rtype: String
"""
return data.normalize_code(self.header.get("language"))
示例8: get_lang_from_http_header
def get_lang_from_http_header(request, supported):
"""If the user's browser sends a list of preferred languages in the
HTTP_ACCEPT_LANGUAGE header, parse it into a list. Then walk through
the list, and for each entry, we check whether we have a matching
pootle translation project. If so, we return it.
If nothing is found, return None."""
accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
for accept_lang, unused in trans_real.parse_accept_lang_header(accept):
if accept_lang == '*':
return None
normalized = data.normalize_code(data.simplify_to_common(accept_lang, supported))
if normalized in ['en-us', 'en']:
return None
if normalized in supported:
return normalized
#FIXME: horribly slow way of dealing with languages with @ in them
for lang in supported.keys():
if normalized == data.normalize_code(lang):
return lang
return None
示例9: add_dict
def add_dict(self, unit, source_lang, target_lang, commit=True):
"""inserts units represented as dictionaries in database"""
source_lang = data.normalize_code(source_lang)
target_lang = data.normalize_code(target_lang)
try:
try:
self.cursor.execute("INSERT INTO sources (text, context, lang, length) VALUES(?, ?, ?, ?)",
(unit["source"],
unit["context"],
source_lang,
len(unit["source"])))
sid = self.cursor.lastrowid
except dbapi2.IntegrityError:
# source string already exists in db, run query to find sid
self.cursor.execute("SELECT sid FROM sources WHERE text=? AND context=? and lang=?",
(unit["source"],
unit["context"],
source_lang))
sid = self.cursor.fetchone()
(sid,) = sid
try:
# FIXME: get time info from translation store
# FIXME: do we need so store target length?
self.cursor.execute("INSERT INTO targets (sid, text, lang, time) VALUES (?, ?, ?, ?)",
(sid,
unit["target"],
target_lang,
int(time.time())))
except dbapi2.IntegrityError:
# target string already exists in db, do nothing
pass
if commit:
self.connection.commit()
except Exception:
if commit:
self.connection.rollback()
raise
示例10: getsourcelanguage
def getsourcelanguage(self):
"""Get the source language for this .qph file.
We don't implement setsourcelanguage as users really shouldn't be
altering the source language in .qph files, it should be set correctly
by the extraction tools.
:return: ISO code e.g. af, fr, pt_BR
:rtype: String
"""
lang = data.normalize_code(self.header.get('sourcelanguage', "en"))
if lang == 'en-us':
return 'en'
return lang
示例11: get_alt_src_langs
def get_alt_src_langs(request, profile, language):
langs = profile.alt_src_langs.exclude(id=language.id)
if not langs.count():
accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
codes = []
for accept_lang, unused in parse_accept_lang_header(accept):
if accept_lang == '*':
continue
normalized = to_locale(data.normalize_code(data.simplify_to_common(accept_lang)))
if normalized in ['en_US', 'en', language.code]:
continue
codes.append(normalized)
if codes:
from pootle_language.models import Language
langs = Language.objects.filter(code__in=codes)
return langs
示例12: getsourcelanguage
def getsourcelanguage(self):
"""Get the source language for this .ts file.
The 'sourcelanguage' attribute was only added to the TS format in
Qt v4.5. We return 'en' if there is no sourcelanguage set.
We don't implement setsourcelanguage as users really shouldn't be
altering the source language in .ts files, it should be set correctly
by the extraction tools.
:return: ISO code e.g. af, fr, pt_BR
:rtype: String
"""
lang = data.normalize_code(self.header.get('sourcelanguage', "en"))
if lang == 'en-us':
return 'en'
return lang
示例13: create_suggestions
def create_suggestions(self, suggestion):
# Skip any suggestions where the suggested translation contains parenthesis
if re.match(r"\(.*\)", suggestion["text"]):
return []
units = []
for proj in suggestion["projects"]:
# Skip fuzzy matches:
if proj["flags"] != 0:
continue
source = proj["orig_phrase"].strip()
# Skip strings that are too short
if len(source) < MIN_TERM_LENGTH:
continue
# Skip any units containing parenthesis
if re.match(r"\(.*\)", source):
continue
unit = TranslationUnit(source)
target = suggestion["text"].strip()
# Skip phrases already found:
old_unit = self.store.findunit(proj["orig_phrase"])
if old_unit and old_unit.target == target:
continue
# We mostly want to work with lowercase strings, but in German (and
# some languages with a related writing style), this will probably
# irritate more often than help, since nouns are always written to
# start with capital letters.
target_lang_code = self.main_controller.lang_controller.target_lang.code
if not data.normalize_code(target_lang_code) in ("de", "de-de", "lb", "als", "ksh", "stq", "vmf"):
# unless the string contains multiple consecutive uppercase
# characters or using some type of camel case, we take it to
# lower case
if not is_case_sensitive(target):
target = target.lower()
unit.target = target
units.append(unit)
return units
示例14: _match_normalized_langcode
def _match_normalized_langcode(self, langcode):
languages_keys = self.languages.keys()
normalized_keys = [data.normalize_code(lang) for lang in languages_keys]
i = normalized_keys.index(data.normalize_code(langcode))
return languages_keys[i]
示例15: test_normalise_code
def test_normalise_code():
"""test the normalisation of language codes"""
assert data.normalize_code("af_ZA") == "af-za"
assert data.normalize_code("[email protected]") == "xx-latin"