本文整理汇总了Java中com.cybozu.labs.langdetect.Detector类的典型用法代码示例。如果您正苦于以下问题:Java Detector类的具体用法?Java Detector怎么用?Java Detector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Detector类属于com.cybozu.labs.langdetect包,在下文中一共展示了Detector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getDetectorForLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
/**
* Gets a customized detector for a given language.
*
* TODO(P1) Adding priority on the language seems to be relatively useless.
* To be reviewed.
*
* @param language
* @return a {@link Detector} customized for that language
* @throws LangDetectException
*/
private Detector getDetectorForLanguage(String language) throws LangDetectException {
Detector detector = DetectorFactory.create();
HashMap<String, Double> priorityMap = new HashMap();
for (String supportedLanguage : getSupportedLanguages()) {
if (supportedLanguage.equals(language)) {
priorityMap.put(supportedLanguage, 0.8);
} else if (supportedLanguage.equals("en") && !"en".equals(language)) {
priorityMap.put(supportedLanguage, 0.5);
} else {
priorityMap.put(supportedLanguage, 0.1);
}
}
detector.setPriorMap(priorityMap);
return detector;
}
示例2: LanguageAnalyzer
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
public LanguageAnalyzer() throws LangDetectException, IOException {
// solution for loading detector profiles from jar taken from:
// http://stackoverflow.com/a/15332031
String dirname = "profiles/";
Enumeration<URL> en = Detector.class.getClassLoader().getResources(dirname);
List<String> profiles = new ArrayList<>();
if (en.hasMoreElements()) {
URL url = en.nextElement();
JarURLConnection urlcon = (JarURLConnection) url.openConnection();
try (JarFile jar = urlcon.getJarFile();) {
Enumeration<JarEntry> entries = jar.entries();
while (entries.hasMoreElements()) {
String entry = entries.nextElement().getName();
if (entry.startsWith(dirname)) {
try (InputStream in = Detector.class.getClassLoader().getResourceAsStream(entry);) {
profiles.add(IOUtils.toString(in, Charset.defaultCharset()));
}
}
}
}
}
if (DetectorFactory.getLangList().isEmpty()) {
DetectorFactory.loadProfile(profiles);
}
}
示例3: detectLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
List<DetectedLanguage> detectedLanguages = Lists.newArrayList();
if (StringUtils.isEmpty(content)) {
return detectedLanguages;
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> languages = detector.getProbabilities();
for (Language language : languages) {
detectedLanguages.add(new DetectedLanguage(language.lang, language.prob));
}
} catch (LangDetectException e) {
}
return detectedLanguages;
}
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:18,代码来源:MultiLangDetectLanguageIdentifierUpdateProcessor.java
示例4: detectLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
private List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l: langlist)
{
if((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang)))
{
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例5: detect
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
public DetectionResult detect(String text) {
Detector detector;
try {
detector = DetectorFactory.create();
} catch (LangDetectException e) {
// TODO(skreft): log the reason
return UNKNOWN;
}
detector.append(text);
List<Language> results = detector.getProbabilities();
if (!results.isEmpty()) {
Language bestLang = results.get(0);
return new DetectionResult(bestLang.lang, bestLang.prob);
}
return UNKNOWN;
}
示例6: detectLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<DetectedLanguage>();
for (Language l: langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例7: detect
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
public String detect(String text) {
String lang;
if (!ConstellioStringUtils.isEmpty(text)) {
try {
Detector detector = DetectorFactory.create();
detector.append(text);
lang = detector.detect();
} catch (Throwable t) {
LOGGER.warn("Problem while trying to detect lang for text (0,100): " + StringUtils.substring(text, 0, 100));
lang = null;
}
} else {
lang = null;
}
return lang;
}
示例8: execute
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
public void execute(IngestDocument ingestDocument) throws Exception {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(maxLength.bytesAsInt());
String content = ingestDocument.getFieldValue(field, String.class);
detector.append(content);
String language = detector.detect();
ingestDocument.setFieldValue(targetField, language);
}
示例9: identifyLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
public String identifyLanguage(String html)
throws IOException
{
// extracting plain html text
Document doc = Jsoup.parse(html);
String text = doc.text();
// we might have removed everything -> no lang
if (text.isEmpty()) {
return UNKNOWN_LANGUAGE;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
String detectedLang = detector.detect();
ArrayList<Language> detectedProbabilities = detector.getProbabilities();
if (detectedProbabilities.get(0).prob > PROBABILITY_THRESHOLD) {
return detectedLang;
}
else {
return UNKNOWN_LANGUAGE;
}
}
catch (LangDetectException e) {
return UNKNOWN_LANGUAGE;
}
}
示例10: analyze
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
/**
*
* @param inputDirectory:
* directory containing text files to be analyzed
* @param outputFile:
* file in which the analysis results are written
* @throws IOException
* @throws LangDetectException
*/
public String analyze(String string) {
// List<File> textFiles =
// FileUtils.listFilesRecursively(inputDirectory);
// Map<String, Integer> languageMap = new HashMap<String, Integer>();
try {
// MapUtils.addCount(languageMap, detector.detect());
// System.out.println(inputFile);
Detector detector = DetectorFactory.create();
detector.append(string);
return detector.detect();
} catch (LangDetectException e) {
// MapUtils.addCount(languageMap, "unknown");
return "unknown";
}
// BufferedWriter bufferedWriter = new BufferedWriter(new
// FileWriter(outputFile));
// bufferedWriter.write("number of files files: " + textFiles.size());
// bufferedWriter.newLine();
// bufferedWriter.newLine();
//
// for (Entry<String, Integer> languageMapEntry :
// MapUtils.entriesReverselySortedByValues(languageMap)) {
// bufferedWriter.write(languageMapEntry.getKey() + ": " +
// languageMapEntry.getValue());
// bufferedWriter.newLine();
// }
// bufferedWriter.close();
}
示例11: process
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
public void process(Post item) {
String lang = item.getLanguage();
if(lang == null) {
// detect lang
String text = null;
String title = item.getTitle();
String description = item.getDescription();
if(title != null) {
text = title;
}
else if (description != null) {
text = description;
}
else {
return;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
lang = detector.detect();
item.setLanguage(lang);
} catch (LangDetectException e) {
Logger.getLogger(LanguageDetector.class).info("No features in text: " + text);
}
}
}
示例12: detectLanguage
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(SolrInputDocument doc) {
try {
Detector detector = DetectorFactory.create();
detector.setMaxTextLength(maxTotalChars);
for (String fieldName : inputFields) {
log.debug("Appending field " + fieldName);
if (doc.containsKey(fieldName)) {
Collection<Object> fieldValues = doc.getFieldValues(fieldName);
if (fieldValues != null) {
for (Object content : fieldValues) {
if (content instanceof String) {
String stringContent = (String) content;
if (stringContent.length() > maxFieldValueChars) {
detector.append(stringContent.substring(0, maxFieldValueChars));
} else {
detector.append(stringContent);
}
detector.append(" ");
} else {
log.warn("Field " + fieldName + " not a String value, not including in detection");
}
}
}
}
}
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l: langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
示例13: isEnglish
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
/**
* Try to detect the language of the text in the String.
*
* @param page
* @return true if the String contains English language, false otherwise
*/
public Boolean isEnglish(String content) {
try {
if (content == null || content.isEmpty()) {
return false;
}
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langs = detector.getProbabilities();
if (langs.size() == 0) {
return false;
}
for (Language l : langs) {
if (l.lang.equals("en")) {
return true;
}
}
return false;
} catch (Exception ex) {
logger.warn("Problem while detecting language in text: " + content, ex);
return false;
}
}
示例14: Detector
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
/**
* Constructor.
* Detector instance can be constructed via {@link DetectorFactory#create()}.
*
* @param factory {@link DetectorFactory} instance (only DetectorFactory inside)
*/
public Detector(final DetectorFactory factory) {
this.wordLangProbMap = factory.getWordLangProbMap();
this.langlist = factory.getLangList();
this.rand = new Random();
this.rand.setSeed(factory.getSeed());
this.text = new StringBuilder();
}
示例15: classifyText
import com.cybozu.labs.langdetect.Detector; //导入依赖的package包/类
public LanguageClassification classifyText(String text) {
if (text!=null && !"".equals(text)) {
try {
Detector detector = DetectorFactory.create();
detector.append(text);
Language lang = detector.getProbabilities().get(0);
return new LanguageClassification(lang.lang,lang.prob);
} catch (LangDetectException e) {
//e.printStackTrace();
return null;
}
} else {
return new LanguageClassification("blank",0);
}
}