当前位置: 首页>>代码示例>>Python>>正文


Python langdetect.detect_langs方法代码示例

本文整理汇总了Python中langdetect.detect_langs方法的典型用法代码示例。如果您正苦于以下问题:Python langdetect.detect_langs方法的具体用法?Python langdetect.detect_langs怎么用?Python langdetect.detect_langs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在langdetect的用法示例。


在下文中一共展示了langdetect.detect_langs方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_lang

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def get_lang(text):
        resu = None

        try:
            langs = langdetect.detect_langs(text)

            for language in langs:
                if language.lang == "ru":
                    language.prob += 0.2

                if resu is None or resu < language:
                    resu = language

        except langdetect.lang_detect_exception.LangDetectException:
            pass

        if resu is None:
            return "ru"

        return resu.lang 
开发者ID:ekonda,项目名称:sketal,代码行数:22,代码来源:outsource_sayer.py

示例2: classify

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def classify(text, debug = False):
	# identifier.set_languages(DETECT_LANGUAGES)
	try:
		lang1 = detect_langs(text)[0]
	except UnicodeDecodeError:
		lang1 = detect_langs(text.decode("utf-8"))[0]
	prob = lang1.prob
	lang = lang1.lang

	if debug:
		return (lang, prob)

	if prob > 0.90:
		return lang

	return None 
开发者ID:dirtyfilthy,项目名称:freshonions-torscraper,代码行数:18,代码来源:detect_language.py

示例3: process_normalized_lang_map

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def process_normalized_lang_map(text):
    try:
        lang_map = {l.lang: l.prob
                    for l in langdetect.detect_langs(text or "")}
    except langdetect.lang_detect_exception.LangDetectException:
        lang_map = {}

    normalized_lang_map = defaultdict(lambda: 0.0)
    for lang in ALL_LANGS:
        norm_lang = COMMON_LANGUAGE_MAP.get(lang, lang)
        normalized_lang_map[norm_lang] += lang_map.get(lang, 0.0)

    return normalized_lang_map 
开发者ID:wikimedia,项目名称:editquality,代码行数:15,代码来源:translatewiki.py

示例4: check_language

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def check_language(self, msg, target=None):
        """Check the language of the message.

        Add the result to the metadata and and trigger the
        rule if it is present in the config and the languages
        are not in the ok list.

        :return True if the message language is unwanted and False
        otherwise
        """
        prob = self["textcat_acceptable_prob"]
        results = langdetect.detect_langs(msg.text)
        self.ctxt.log.debug("TextCat results: %s", results)
        langs = [lang.lang for lang in results if lang.prob > prob]
        if len(langs) > self["textcat_max_languages"]:
            self.ctxt.log.debug("Too many languages.")
            return False
        msg.plugin_tags["LANGUAGES"] = " ".join(langs)
        ok_languages = self["ok_languages"]
        if "all" in ok_languages:
            # All good.
            return False
        for lang in langs:
            if lang not in ok_languages:
                return True
        return False 
开发者ID:SpamExperts,项目名称:OrangeAssassin,代码行数:28,代码来源:textcat.py

示例5: languages_with_examples

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def languages_with_examples(self):
        resp = {}

        try:
            for (source, posts) in self.altmetric_api_raw["posts"].iteritems():
                for post in posts:
                    for key in ["title", "summary"]:
                        try:
                            num_words_in_post = len(post[key].split(" "))
                            top_detection = langdetect.detect_langs(post[key])[0]
                            if (num_words_in_post > 7) and (top_detection.prob > 0.90):

                                if top_detection.lang != "en":
                                    language_name = get_language_from_abbreviation(top_detection.lang)
                                    # print u"LANGUAGE:", language_name, top_detection.prob, post[key]

                                    # overwrites.  that's ok, we just want one example
                                    resp[language_name] = post["url"]

                        except langdetect.lang_detect_exception.LangDetectException:
                            pass

        except (KeyError, AttributeError, TypeError):
            pass

        return resp 
开发者ID:ourresearch,项目名称:impactstory-tng,代码行数:28,代码来源:product.py

示例6: run

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
        """Run the App main logic.

        This method should contain the core logic of the App.
        """
        text = self.tcex.playbook.read(self.args.text)

        detected_language_code = detect_langs(text)[0].lang
        detected_language_probability = detect_langs(text)[0].prob

        self.tcex.playbook.create_output('detectedLanguageCode', detected_language_code, 'String')
        self.tcex.playbook.create_output('detectedLanguageProbability', detected_language_probability, 'String')
        self.exit_message = 'Detected the language as {} (with a probability of {})'.format(detected_language_code, detected_language_probability) 
开发者ID:ThreatConnect-Inc,项目名称:threatconnect-playbooks,代码行数:15,代码来源:app.py

示例7: detect_lang

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def detect_lang(main, file):
    text = ''

    try:
        with open(file['path'], 'r', encoding = file['encoding']) as f:
            if main.settings_custom['auto_detection']['detection_settings']['number_lines_no_limit']:
                for line in f:
                    text += line
            else:
                for i, line in enumerate(f):
                    if i < main.settings_custom['auto_detection']['detection_settings']['number_lines']:
                        text += line
                    else:
                        break

        lang_code_639_1 = langid.classify(text)[0]

        # Chinese (Simplified) & Chinese (Traditional)
        if lang_code_639_1 == 'zh':
            lang_code_639_1 = 'zh_cn'

            for lang in sorted(langdetect.detect_langs(text), key = lambda item: -item.prob):
                if lang.lang in ['zh-cn', 'zh-tw']:
                    lang_code_639_1 = lang.lang.replace('-', '_')

                    break
        # Norwegian Bokmål
        elif lang_code_639_1 == 'no':
            lang_code_639_1 = 'nb'

        # Serbian (Cyrillic)
        elif lang_code_639_1 == 'sr':
            lang_code_639_1 = 'sr_cyrl'

        lang = wl_conversion.to_iso_639_3(main, lang_code_639_1)

        success = True
    except:
        lang = main.settings_custom['auto_detection']['default_settings']['default_lang']

        success = False

    return lang, success 
开发者ID:BLKSerene,项目名称:Wordless,代码行数:45,代码来源:wl_detection.py

示例8: run

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
        result = {
            "title": "Application Does Not Encrypt Shared Preferences",
            "details": "",
            "severity": "Medium",
            "report": False
        }

        if not self.device.installed(self.identifier):
            return {"print": "Application not installed"}

        Log.info("Starting the application")
        self.device.start(self.identifier)
        sleep(5)

        Log.info("Finding files in application's data")
        target_paths = ["{}/shared_prefs".format(file_path) for file_path in
            self.device.data_paths(self.identifier)]

        listed_files = []
        report_files = []
        for data_path in target_paths:
            listed_files += self.device.find_files(data_path)

        Log.info("Analysing application's data")

        for filename in listed_files:
            if filename:
                file_content = self.device.file_content(filename)

                lang = detect_langs(file_content)[0]
                Log.debug("{} language {}: {}".format(filename,
                    lang.lang, lang.prob))

                if lang.prob > float("0.{}".format(self.min_percentage)):
                    report_files += [filename]

        if report_files:
            result.update({
                "report": True,
                "details": "* Unencrypted Files:\n * {}".format("\n * ".join(
                    report_files))
            })

        return {
            "{}_result".format(self.name()): result
        } 
开发者ID:nettitude,项目名称:scrounger,代码行数:49,代码来源:encrypted_shared_preferences.py

示例9: run

# 需要导入模块: import langdetect [as 别名]
# 或者: from langdetect import detect_langs [as 别名]
def run(self):
        result = {
            "title": "Application Saves Unencrypted Data In Keychain",
            "details": "",
            "severity": "Low",
            "report": False
        }

        Log.info("Getting keychain's IDs")

        ent_module = EModule()
        ent_module.binary = self.binary
        ent_result, entitlements = ent_module.run(), None
        for key in ent_result:
            if key.endswith("_entitlements"):
                entitlements = ent_result[key]

        if not entitlements:
            return {"print": "Couldn't get entitlements from the bianry."}

        keychain_id = self.identifier
        if "keychain-access-groups" in entitlements:
            keychain_id = entitlements["keychain-access-groups"]

        keychain_module = KeychainModule()
        keychain_module.device = self.device
        keychain_module.output = None
        keychain_result = keychain_module.run()
        keychain_data = keychain_result["keychain_data"]

        data = []
        for key in keychain_data:
            if (key["entitlement_group"] and \
            keychain_id in key["entitlement_group"]) or (key["account"] and \
            keychain_id in key["account"]) or (key["service"] and \
            keychain_id in key["service"]):
                data += [str(key['keychain_data'])]

        report_data = []
        for item in data:
            lang = detect_langs(item)[0]
            if lang.prob > float("0.{}".format(self.min_percentage)):
                report_data += [item]

        if report_data:
            result.update({
                "report": True,
                "details": "The following data was found:\n* {}".format(
                    "\n* ".join(report_data))
            })

        return {
            "{}_result".format(self.name()): result
        } 
开发者ID:nettitude,项目名称:scrounger,代码行数:56,代码来源:unencrypted_keychain_data.py


注:本文中的langdetect.detect_langs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。