当前位置: 首页>>代码示例>>Java>>正文


Java Language类代码示例

本文整理汇总了Java中com.cybozu.labs.langdetect.Language的典型用法代码示例。如果您正苦于以下问题:Java Language类的具体用法?Java Language怎么用?Java Language使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Language类属于com.cybozu.labs.langdetect包,在下文中一共展示了Language类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: detectLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
    List<DetectedLanguage> detectedLanguages = Lists.newArrayList();
    if (StringUtils.isEmpty(content)) {
        return detectedLanguages;
    }
    try {
        Detector detector = DetectorFactory.create();
        detector.append(content);
        ArrayList<Language> languages = detector.getProbabilities();
        for (Language language : languages) {
            detectedLanguages.add(new DetectedLanguage(language.lang, language.prob));
        }
    } catch (LangDetectException e) {
    }
    return detectedLanguages;
}
 
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:18,代码来源:MultiLangDetectLanguageIdentifierUpdateProcessor.java

示例2: sortProbability

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * @return lanugage candidates order by probabilities descendently
 */
private List<Language> sortProbability(double[] prob) {
    final List<Language> list = new ArrayList<Language>();
    for (int j = 0; j < prob.length; ++j) {
        double p = prob[j];
        if (p > PROB_THRESHOLD) {
            for (int i = 0; i <= list.size(); ++i) {
                if (i == list.size() || list.get(i).prob < p) {
                    list.add(i, new Language(langlist.get(j), p));
                    break;
                }
            }
        }
    }
    return list;
}
 
开发者ID:malcolmgreaves,项目名称:language-detection,代码行数:19,代码来源:Detector.java

示例3: detectLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
private List<DetectedLanguage> detectLanguage(String content) {
	if (content.trim().length() == 0) { // to be consistent with the tika impl?
		log.debug("No input text to detect language from, returning empty list");
		return Collections.emptyList();
	}

	try {
		Detector detector = DetectorFactory.create();
		detector.append(content);
		ArrayList<Language> langlist = detector.getProbabilities();
		ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
		for (Language l: langlist) 
		{
			if((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang)))
			{
			    solrLangList.add(new DetectedLanguage(l.lang, l.prob));
			}
		}
		return solrLangList;
	} catch (LangDetectException e) {
		log.debug("Could not determine language, returning empty list: ", e);
		return Collections.emptyList();
	}
}
 
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:25,代码来源:AbstractQParser.java

示例4: detect

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
public DetectionResult detect(String text) {
  Detector detector;
  try {
    detector = DetectorFactory.create();
  } catch (LangDetectException e) {
    // TODO(skreft): log the reason
    return UNKNOWN;
  }

  detector.append(text);
  List<Language> results = detector.getProbabilities();
  if (!results.isEmpty()) {
    Language bestLang = results.get(0);
    return new DetectionResult(bestLang.lang, bestLang.prob);
  }

  return UNKNOWN;
}
 
开发者ID:deezer,项目名称:weslang,代码行数:20,代码来源:DetectionServiceImplLanguageDetection.java

示例5: detectLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
  if (content.trim().length() == 0) { // to be consistent with the tika impl?
    log.debug("No input text to detect language from, returning empty list");
    return Collections.emptyList();
  }
  
  try {
    Detector detector = DetectorFactory.create();
    detector.append(content);
    ArrayList<Language> langlist = detector.getProbabilities();
    ArrayList<DetectedLanguage> solrLangList = new ArrayList<DetectedLanguage>();
    for (Language l: langlist) {
      solrLangList.add(new DetectedLanguage(l.lang, l.prob));
    }
    return solrLangList;
  } catch (LangDetectException e) {
    log.debug("Could not determine language, returning empty list: ", e);
    return Collections.emptyList();
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:LangDetectLanguageIdentifierUpdateProcessor.java

示例6: identifyLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
public String identifyLanguage(String html)
        throws IOException
{
    // extracting plain html text
    Document doc = Jsoup.parse(html);
    String text = doc.text();

    // we might have removed everything -> no lang
    if (text.isEmpty()) {
        return UNKNOWN_LANGUAGE;
    }

    try {
        Detector detector = DetectorFactory.create();
        detector.append(text);
        String detectedLang = detector.detect();

        ArrayList<Language> detectedProbabilities = detector.getProbabilities();

        if (detectedProbabilities.get(0).prob > PROBABILITY_THRESHOLD) {
            return detectedLang;
        }
        else {
            return UNKNOWN_LANGUAGE;
        }
    }
    catch (LangDetectException e) {
        return UNKNOWN_LANGUAGE;
    }
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:32,代码来源:CybozuLanguageIdentifier.java

示例7: detectLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
  try {
    Detector detector = DetectorFactory.create();
    detector.setMaxTextLength(maxTotalChars);

    for (String fieldName : inputFields) {
      log.debug("Appending field " + fieldName);
      if (doc.containsKey(fieldName)) {
        Collection<Object> fieldValues = doc.getFieldValues(fieldName);
        if (fieldValues != null) {
          for (Object content : fieldValues) {
            if (content instanceof String) {
              String stringContent = (String) content;
              if (stringContent.length() > maxFieldValueChars) {
                detector.append(stringContent.substring(0, maxFieldValueChars));
              } else {
                detector.append(stringContent);
              }
              detector.append(" ");
            } else {
              log.warn("Field " + fieldName + " not a String value, not including in detection");
            }
          }
        }
      }
    }
    ArrayList<Language> langlist = detector.getProbabilities();
    ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
    for (Language l: langlist) {
      solrLangList.add(new DetectedLanguage(l.lang, l.prob));
    }
    return solrLangList;
  } catch (LangDetectException e) {
    log.debug("Could not determine language, returning empty list: ", e);
    return Collections.emptyList();
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:39,代码来源:LangDetectLanguageIdentifierUpdateProcessor.java

示例8: isEnglish

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Try to detect the language of the text in the String.
 * 
 * @param page
 * @return true if the String contains English language, false otherwise
 */
public Boolean isEnglish(String content) {
    try {

        if (content == null || content.isEmpty()) {
            return false;
        }

        Detector detector = DetectorFactory.create();
        detector.append(content);
        ArrayList<Language> langs = detector.getProbabilities();

        if (langs.size() == 0) {
            return false;
        }

        for (Language l : langs) {
            if (l.lang.equals("en")) {
                return true;
            }
        }
        return false;
    } catch (Exception ex) {
        logger.warn("Problem while detecting language in text: " + content, ex);
        return false;
    }
}
 
开发者ID:ViDA-NYU,项目名称:ache,代码行数:33,代码来源:LangDetection.java

示例9: detect

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Detect language of the target text and return the language name which has the highest probability.
 *
 * @return detected language name which has most probability.
 * @throws LangDetectException code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
 */
public String detect() throws LangDetectException {
    final List<Language> probabilities = getProbabilities();
    if (probabilities.size() > 0) {
        return probabilities.get(0).lang;
    }
    return UNKNOWN_LANG;
}
 
开发者ID:malcolmgreaves,项目名称:language-detection,代码行数:14,代码来源:Detector.java

示例10: getProbabilities

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Get language candidates which have high probabilities
 *
 * @return possible languages list (whose probabilities are over PROB_THRESHOLD, ordered by probabilities descendently
 * @throws LangDetectException code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
 */
public List<Language> getProbabilities() throws LangDetectException {
    if (langprob == null) {
        detectBlock();
    }
    return sortProbability(langprob);
}
 
开发者ID:malcolmgreaves,项目名称:language-detection,代码行数:13,代码来源:Detector.java

示例11: classifyText

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
public LanguageClassification classifyText(String text) {
	if (text!=null && !"".equals(text)) {
		try {
			Detector detector = DetectorFactory.create();
			detector.append(text);
			Language lang = detector.getProbabilities().get(0);
			return new LanguageClassification(lang.lang,lang.prob);
		} catch (LangDetectException e) {
			//e.printStackTrace();
			return null;
		}
	} else {
		return new LanguageClassification("blank",0);
	}
}
 
开发者ID:computermacgyver,项目名称:twitter-mapred,代码行数:16,代码来源:LanguageClassificationService.java

示例12: detect

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Detect language of the target text and return the language name which has the highest probability.
 * @return detected language name which has most probability.
 * @throws LangDetectException
 *  code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
 */
public String detect() {
    List<Language> probabilities = getProbabilities();
    if (probabilities.size() > 0) {
        return probabilities.get(0).lang;
    }

    return UNKNOWN_LANG;
}
 
开发者ID:deezer,项目名称:weslang,代码行数:15,代码来源:Detector.java

示例13: sortProbability

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * @param probabilities HashMap
 * @return lanugage candidates order by probabilities descendently
 */
private List<Language> sortProbability(double[] prob) {
    List<Language> list = new ArrayList<Language>(prob.length);
    for(int i = 0; i < prob.length; ++i) {
        if (prob[i] > PROB_THRESHOLD) {
            list.add(new Language(langlist.get(i), prob[i]));
        }
    }
    Collections.sort(list, languageComparator);

    return list;
}
 
开发者ID:deezer,项目名称:weslang,代码行数:16,代码来源:Detector.java

示例14: detectLanguage

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Perform the detection 
 * 
 * @param text to test
 * @return the detected language
 */
public LanguageDetectionResult detectLanguage(String text) {
    try {
        Detector detector = DetectorFactory.create(0.15);
        // issue#47 correction
        detector.append(text.toLowerCase());
        ArrayList<Language> languages = detector.getProbabilities();
        Language detectedLanguage =  
                extractLangWithHighestProbability(languages);
        return new LanguageDetectionResult(detectedLanguage, text, languages.size()>1);
    } catch (LangDetectException ex) {
        LOGGER.warn(ex);
    }
    return null;
}
 
开发者ID:Tanaguru,项目名称:Tanaguru,代码行数:21,代码来源:LanguageDetector.java

示例15: extractLangWithHighestProbability

import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
 * Multiple results are returned in a list. This method parses the different
 * results and keeps the best regarding the relevancy value.
 *
 * @param languages
 * @return the language with the highest probability
 */
private Language extractLangWithHighestProbability(ArrayList<Language> languages) {
    double bestRelevancy = -1;
    Language langWinner = null;
    for (Language lang : languages) {
        if (lang.prob > bestRelevancy) {
            bestRelevancy = lang.prob;
            langWinner = lang;
        }
    }
    return langWinner;
}
 
开发者ID:Tanaguru,项目名称:Tanaguru,代码行数:19,代码来源:LanguageDetector.java


注:本文中的com.cybozu.labs.langdetect.Language类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。