本文整理汇总了Java中com.cybozu.labs.langdetect.Language类的典型用法代码示例。如果您正苦于以下问题:Java Language类的具体用法?Java Language怎么用?Java Language使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Language类属于com.cybozu.labs.langdetect包,在下文中一共展示了Language类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: detectLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
List<DetectedLanguage> detectedLanguages = Lists.newArrayList();
if (StringUtils.isEmpty(content)) {
return detectedLanguages;
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> languages = detector.getProbabilities();
for (Language language : languages) {
detectedLanguages.add(new DetectedLanguage(language.lang, language.prob));
}
} catch (LangDetectException e) {
}
return detectedLanguages;
}
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:18,代码来源:MultiLangDetectLanguageIdentifierUpdateProcessor.java
示例2: sortProbability
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* @return lanugage candidates order by probabilities descendently
*/
private List<Language> sortProbability(double[] prob) {
final List<Language> list = new ArrayList<Language>();
for (int j = 0; j < prob.length; ++j) {
double p = prob[j];
if (p > PROB_THRESHOLD) {
for (int i = 0; i <= list.size(); ++i) {
if (i == list.size() || list.get(i).prob < p) {
list.add(i, new Language(langlist.get(j), p));
break;
}
}
}
}
return list;
}
示例3: detectLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
private List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l: langlist)
{
if((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang)))
{
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例4: detect
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
public DetectionResult detect(String text) {
Detector detector;
try {
detector = DetectorFactory.create();
} catch (LangDetectException e) {
// TODO(skreft): log the reason
return UNKNOWN;
}
detector.append(text);
List<Language> results = detector.getProbabilities();
if (!results.isEmpty()) {
Language bestLang = results.get(0);
return new DetectionResult(bestLang.lang, bestLang.prob);
}
return UNKNOWN;
}
示例5: detectLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<DetectedLanguage>();
for (Language l: langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例6: identifyLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
public String identifyLanguage(String html)
throws IOException
{
// extracting plain html text
Document doc = Jsoup.parse(html);
String text = doc.text();
// we might have removed everything -> no lang
if (text.isEmpty()) {
return UNKNOWN_LANGUAGE;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
String detectedLang = detector.detect();
ArrayList<Language> detectedProbabilities = detector.getProbabilities();
if (detectedProbabilities.get(0).prob > PROBABILITY_THRESHOLD) {
return detectedLang;
}
else {
return UNKNOWN_LANGUAGE;
}
}
catch (LangDetectException e) {
return UNKNOWN_LANGUAGE;
}
}
示例7: detectLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
try {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(maxTotalChars);
for (String fieldName : inputFields) {
log.debug("Appending field " + fieldName);
if (doc.containsKey(fieldName)) {
Collection<Object> fieldValues = doc.getFieldValues(fieldName);
if (fieldValues != null) {
for (Object content : fieldValues) {
if (content instanceof String) {
String stringContent = (String) content;
if (stringContent.length() > maxFieldValueChars) {
detector.append(stringContent.substring(0, maxFieldValueChars));
} else {
detector.append(stringContent);
}
detector.append(" ");
} else {
log.warn("Field " + fieldName + " not a String value, not including in detection");
}
}
}
}
}
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l: langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例8: isEnglish
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Try to detect the language of the text in the String.
*
* @param page
* @return true if the String contains English language, false otherwise
*/
public Boolean isEnglish(String content) {
try {
if (content == null || content.isEmpty()) {
return false;
}
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langs = detector.getProbabilities();
if (langs.size() == 0) {
return false;
}
for (Language l : langs) {
if (l.lang.equals("en")) {
return true;
}
}
return false;
} catch (Exception ex) {
logger.warn("Problem while detecting language in text: " + content, ex);
return false;
}
}
示例9: detect
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Detect language of the target text and return the language name which has the highest probability.
*
* @return detected language name which has most probability.
* @throws LangDetectException code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
*/
public String detect() throws LangDetectException {
final List<Language> probabilities = getProbabilities();
if (probabilities.size() > 0) {
return probabilities.get(0).lang;
}
return UNKNOWN_LANG;
}
示例10: getProbabilities
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Get language candidates which have high probabilities
*
* @return possible languages list (whose probabilities are over PROB_THRESHOLD, ordered by probabilities descendently
* @throws LangDetectException code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
*/
public List<Language> getProbabilities() throws LangDetectException {
if (langprob == null) {
detectBlock();
}
return sortProbability(langprob);
}
示例11: classifyText
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
public LanguageClassification classifyText(String text) {
if (text!=null && !"".equals(text)) {
try {
Detector detector = DetectorFactory.create();
detector.append(text);
Language lang = detector.getProbabilities().get(0);
return new LanguageClassification(lang.lang,lang.prob);
} catch (LangDetectException e) {
//e.printStackTrace();
return null;
}
} else {
return new LanguageClassification("blank",0);
}
}
示例12: detect
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Detect language of the target text and return the language name which has the highest probability.
* @return detected language name which has most probability.
* @throws LangDetectException
* code = ErrorCode.CantDetectError : Can't detect because of no valid features in text
*/
public String detect() {
List<Language> probabilities = getProbabilities();
if (probabilities.size() > 0) {
return probabilities.get(0).lang;
}
return UNKNOWN_LANG;
}
示例13: sortProbability
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* @param probabilities HashMap
* @return lanugage candidates order by probabilities descendently
*/
private List<Language> sortProbability(double[] prob) {
List<Language> list = new ArrayList<Language>(prob.length);
for(int i = 0; i < prob.length; ++i) {
if (prob[i] > PROB_THRESHOLD) {
list.add(new Language(langlist.get(i), prob[i]));
}
}
Collections.sort(list, languageComparator);
return list;
}
示例14: detectLanguage
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Perform the detection
*
* @param text to test
* @return the detected language
*/
public LanguageDetectionResult detectLanguage(String text) {
try {
Detector detector = DetectorFactory.create(0.15);
// issue#47 correction
detector.append(text.toLowerCase());
ArrayList<Language> languages = detector.getProbabilities();
Language detectedLanguage =
extractLangWithHighestProbability(languages);
return new LanguageDetectionResult(detectedLanguage, text, languages.size()>1);
} catch (LangDetectException ex) {
LOGGER.warn(ex);
}
return null;
}
示例15: extractLangWithHighestProbability
import com.cybozu.labs.langdetect.Language; //导入依赖的package包/类
/**
* Multiple results are returned in a list. This method parses the different
* results and keeps the best regarding the relevancy value.
*
* @param languages
* @return the language with the highest probability
*/
private Language extractLangWithHighestProbability(ArrayList<Language> languages) {
double bestRelevancy = -1;
Language langWinner = null;
for (Language lang : languages) {
if (lang.prob > bestRelevancy) {
bestRelevancy = lang.prob;
langWinner = lang;
}
}
return langWinner;
}