本文整理汇总了Java中com.cybozu.labs.langdetect.Detector.detect方法的典型用法代码示例。如果您正苦于以下问题:Java Detector.detect方法的具体用法?Java Detector.detect怎么用?Java Detector.detect使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.cybozu.labs.langdetect.Detector
的用法示例。
在下文中一共展示了Detector.detect方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: detect
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public String detect(String text) {
String lang;
if (!ConstellioStringUtils.isEmpty(text)) {
try {
Detector detector = DetectorFactory.create();
detector.append(text);
lang = detector.detect();
} catch (Throwable t) {
LOGGER.warn("Problem while trying to detect lang for text (0,100): " + StringUtils.substring(text, 0, 100));
lang = null;
}
} else {
lang = null;
}
return lang;
}
示例2: execute
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
@Override
public void execute(IngestDocument ingestDocument) throws Exception {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(maxLength.bytesAsInt());
String content = ingestDocument.getFieldValue(field, String.class);
detector.append(content);
String language = detector.detect();
ingestDocument.setFieldValue(targetField, language);
}
示例3: identifyLanguage
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
@Override
public String identifyLanguage(String html)
throws IOException
{
// extracting plain html text
Document doc = Jsoup.parse(html);
String text = doc.text();
// we might have removed everything -> no lang
if (text.isEmpty()) {
return UNKNOWN_LANGUAGE;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
String detectedLang = detector.detect();
ArrayList<Language> detectedProbabilities = detector.getProbabilities();
if (detectedProbabilities.get(0).prob > PROBABILITY_THRESHOLD) {
return detectedLang;
}
else {
return UNKNOWN_LANGUAGE;
}
}
catch (LangDetectException e) {
return UNKNOWN_LANGUAGE;
}
}
示例4: analyze
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
/**
*
* @param inputDirectory:
* directory containing text files to be analyzed
* @param outputFile:
* file in which the analysis results are written
* @throws IOException
* @throws LangDetectException
*/
public String analyze(String string) {
// List<File> textFiles =
// FileUtils.listFilesRecursively(inputDirectory);
// Map<String, Integer> languageMap = new HashMap<String, Integer>();
try {
// MapUtils.addCount(languageMap, detector.detect());
// System.out.println(inputFile);
Detector detector = DetectorFactory.create();
detector.append(string);
return detector.detect();
} catch (LangDetectException e) {
// MapUtils.addCount(languageMap, "unknown");
return "unknown";
}
// BufferedWriter bufferedWriter = new BufferedWriter(new
// FileWriter(outputFile));
// bufferedWriter.write("number of files files: " + textFiles.size());
// bufferedWriter.newLine();
// bufferedWriter.newLine();
//
// for (Entry<String, Integer> languageMapEntry :
// MapUtils.entriesReverselySortedByValues(languageMap)) {
// bufferedWriter.write(languageMapEntry.getKey() + ": " +
// languageMapEntry.getValue());
// bufferedWriter.newLine();
// }
// bufferedWriter.close();
}
示例5: process
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
@Override
public void process(Post item) {
String lang = item.getLanguage();
if(lang == null) {
// detect lang
String text = null;
String title = item.getTitle();
String description = item.getDescription();
if(title != null) {
text = title;
}
else if (description != null) {
text = description;
}
else {
return;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
lang = detector.detect();
item.setLanguage(lang);
} catch (LangDetectException e) {
Logger.getLogger(LanguageDetector.class).info("No features in text: " + text);
}
}
}
示例6: filter
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
/**
* {@inheritDoc}
*/
public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
CrawlDatum datum, Inlinks inlinks) throws IndexingException {
if (conf == null) {
throw new IndexingException("Not Yet Initialization.");
}
if (cause != null) {
throw new IndexingException("Initialization Failed.", cause);
}
String lang = parse.getData().getParseMeta().get(Metadata.LANGUAGE);
if (lang == null) {
StringBuilder text = new StringBuilder();
text.append(parse.getData().getTitle()).append(" ")
.append(parse.getText());
try {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(textsize_upper_limit);
detector.append(text.toString());
lang = detector.detect();
} catch (LangDetectException e) {
throw new IndexingException("Detection failed.", e);
}
}
if (lang == null) lang = "unknown";
doc.add("lang", lang);
return doc;
}
示例7: langDetection
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public static final Locale langDetection(String text, int length) throws LangDetectException {
if (StringUtils.isEmpty(text))
return null;
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(length);
detector.append(text);
String lang = detector.detect();
return Lang.findLocaleDescription(lang);
}
示例8: landDetect
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public static String landDetect(String text) {
Detector detector;
try {
detector = DetectorFactory.create();
detector.append(text);
String lang = detector.detect();
return lang;
} catch (LangDetectException e) {
return null;
}
}
示例9: detect
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public String detect(String text) throws LangDetectException {
Detector detector = DetectorFactory.create();
detector.append(text);
return detector.detect();
}
示例10: detect
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public String detect(String text)
throws LangDetectException {
Detector detector = DetectorFactory.create();
detector.append(text);
return detector.detect();
}
示例11: detect
import com.cybozu.labs.langdetect.Detector; //导入方法依赖的package包/类
public static String detect(String text) throws LangDetectException {
Detector detector = DetectorFactory.create(0.5);
detector.append(text);
return detector.detect();
}