本文整理汇总了Python中langdetect.detect_langs方法的典型用法代码示例。如果您正苦于以下问题:Python langdetect.detect_langs方法的具体用法?Python langdetect.detect_langs怎么用?Python langdetect.detect_langs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类langdetect
的用法示例。
在下文中一共展示了langdetect.detect_langs方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_lang
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def get_lang(text):
resu = None
try:
langs = langdetect.detect_langs(text)
for language in langs:
if language.lang == "ru":
language.prob += 0.2
if resu is None or resu < language:
resu = language
except langdetect.lang_detect_exception.LangDetectException:
pass
if resu is None:
return "ru"
return resu.lang
示例2: classify
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def classify(text, debug = False):
# identifier.set_languages(DETECT_LANGUAGES)
try:
lang1 = detect_langs(text)[0]
except UnicodeDecodeError:
lang1 = detect_langs(text.decode("utf-8"))[0]
prob = lang1.prob
lang = lang1.lang
if debug:
return (lang, prob)
if prob > 0.90:
return lang
return None
示例3: process_normalized_lang_map
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def process_normalized_lang_map(text):
try:
lang_map = {l.lang: l.prob
for l in langdetect.detect_langs(text or "")}
except langdetect.lang_detect_exception.LangDetectException:
lang_map = {}
normalized_lang_map = defaultdict(lambda: 0.0)
for lang in ALL_LANGS:
norm_lang = COMMON_LANGUAGE_MAP.get(lang, lang)
normalized_lang_map[norm_lang] += lang_map.get(lang, 0.0)
return normalized_lang_map
示例4: check_language
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def check_language(self, msg, target=None):
"""Check the language of the message.
Add the result to the metadata and and trigger the
rule if it is present in the config and the languages
are not in the ok list.
:return True if the message language is unwanted and False
otherwise
"""
prob = self["textcat_acceptable_prob"]
results = langdetect.detect_langs(msg.text)
self.ctxt.log.debug("TextCat results: %s", results)
langs = [lang.lang for lang in results if lang.prob > prob]
if len(langs) > self["textcat_max_languages"]:
self.ctxt.log.debug("Too many languages.")
return False
msg.plugin_tags["LANGUAGES"] = " ".join(langs)
ok_languages = self["ok_languages"]
if "all" in ok_languages:
# All good.
return False
for lang in langs:
if lang not in ok_languages:
return True
return False
示例5: languages_with_examples
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def languages_with_examples(self):
resp = {}
try:
for (source, posts) in self.altmetric_api_raw["posts"].iteritems():
for post in posts:
for key in ["title", "summary"]:
try:
num_words_in_post = len(post[key].split(" "))
top_detection = langdetect.detect_langs(post[key])[0]
if (num_words_in_post > 7) and (top_detection.prob > 0.90):
if top_detection.lang != "en":
language_name = get_language_from_abbreviation(top_detection.lang)
# print u"LANGUAGE:", language_name, top_detection.prob, post[key]
# overwrites. that's ok, we just want one example
resp[language_name] = post["url"]
except langdetect.lang_detect_exception.LangDetectException:
pass
except (KeyError, AttributeError, TypeError):
pass
return resp
示例6: run
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
"""Run the App main logic.
This method should contain the core logic of the App.
"""
text = self.tcex.playbook.read(self.args.text)
detected_language_code = detect_langs(text)[0].lang
detected_language_probability = detect_langs(text)[0].prob
self.tcex.playbook.create_output('detectedLanguageCode', detected_language_code, 'String')
self.tcex.playbook.create_output('detectedLanguageProbability', detected_language_probability, 'String')
self.exit_message = 'Detected the language as {} (with a probability of {})'.format(detected_language_code, detected_language_probability)
示例7: detect_lang
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def detect_lang(main, file):
text = ''
try:
with open(file['path'], 'r', encoding = file['encoding']) as f:
if main.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']:
for line in f:
text += line
else:
for i, line in enumerate(f):
if i < main.settings_custom['auto_detection']['detection_settings']['number_lines']:
text += line
else:
break
lang_code_639_1 = langid.classify(text)[0]
# Chinese (Simplified) & Chinese (Traditional)
if lang_code_639_1 == 'zh':
lang_code_639_1 = 'zh_cn'
for lang in sorted(langdetect.detect_langs(text), key = lambda item: -item.prob):
if lang.lang in ['zh-cn', 'zh-tw']:
lang_code_639_1 = lang.lang.replace('-', '_')
break
# Norwegian Bokmål
elif lang_code_639_1 == 'no':
lang_code_639_1 = 'nb'
# Serbian (Cyrillic)
elif lang_code_639_1 == 'sr':
lang_code_639_1 = 'sr_cyrl'
lang = wl_conversion.to_iso_639_3(main, lang_code_639_1)
success = True
except:
lang = main.settings_custom['auto_detection']['default_settings']['default_lang']
success = False
return lang, success
示例8: run
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
result = {
"title": "Application Does Not Encrypt Shared Preferences",
"details": "",
"severity": "Medium",
"report": False
}
if not self.device.installed(self.identifier):
return {"print": "Application not installed"}
Log.info("Starting the application")
self.device.start(self.identifier)
sleep(5)
Log.info("Finding files in application's data")
target_paths = ["{}/shared_prefs".format(file_path) for file_path in
self.device.data_paths(self.identifier)]
listed_files = []
report_files = []
for data_path in target_paths:
listed_files += self.device.find_files(data_path)
Log.info("Analysing application's data")
for filename in listed_files:
if filename:
file_content = self.device.file_content(filename)
lang = detect_langs(file_content)[0]
Log.debug("{} language {}: {}".format(filename,
lang.lang, lang.prob))
if lang.prob > float("0.{}".format(self.min_percentage)):
report_files += [filename]
if report_files:
result.update({
"report": True,
"details": "* Unencrypted Files:\n * {}".format("\n * ".join(
report_files))
})
return {
"{}_result".format(self.name()): result
}
示例9: run
# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
result = {
"title": "Application Saves Unencrypted Data In Keychain",
"details": "",
"severity": "Low",
"report": False
}
Log.info("Getting keychain's IDs")
ent_module = EModule()
ent_module.binary = self.binary
ent_result, entitlements = ent_module.run(), None
for key in ent_result:
if key.endswith("_entitlements"):
entitlements = ent_result[key]
if not entitlements:
return {"print": "Couldn't get entitlements from the bianry."}
keychain_id = self.identifier
if "keychain-access-groups" in entitlements:
keychain_id = entitlements["keychain-access-groups"]
keychain_module = KeychainModule()
keychain_module.device = self.device
keychain_module.output = None
keychain_result = keychain_module.run()
keychain_data = keychain_result["keychain_data"]
data = []
for key in keychain_data:
if (key["entitlement_group"] and \
keychain_id in key["entitlement_group"]) or (key["account"] and \
keychain_id in key["account"]) or (key["service"] and \
keychain_id in key["service"]):
data += [str(key['keychain_data'])]
report_data = []
for item in data:
lang = detect_langs(item)[0]
if lang.prob > float("0.{}".format(self.min_percentage)):
report_data += [item]
if report_data:
result.update({
"report": True,
"details": "The following data was found:\n* {}".format(
"\n* ".join(report_data))
})
return {
"{}_result".format(self.name()): result
}