当前位置: 首页>>代码示例>>Java>>正文


Java LanguageProfile类代码示例

本文整理汇总了Java中com.optimaize.langdetect.profiles.LanguageProfile的典型用法代码示例。如果您正苦于以下问题:Java LanguageProfile类的具体用法?Java LanguageProfile怎么用?Java LanguageProfile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


LanguageProfile类属于com.optimaize.langdetect.profiles包,在下文中一共展示了LanguageProfile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getLanguageDetector

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private static LanguageDetector getLanguageDetector(){
    if (languageDetector == null){
        try {
            List<String> languages = new ArrayList<>();
            for(Language lg : Language.values())
                languages.add(lg.getAbrev().toLowerCase());

            List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages);
            languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .withProfiles(languageProfiles).build();
        }
        catch (IOException e) {
            LoggerFactory.getLogger(Translator.class).error("Translator.getLanguageDetector", e);
        }
    }
    return languageDetector;
}
 
开发者ID:Kaysoro,项目名称:KaellyBot,代码行数:18,代码来源:Translator.java

示例2: main

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
  if (args.length != 3) {
    System.out.println("Usage: " + LanguageDetectionTrainer.class.getName() + " <languageCode> <plainTextFile> <minimalFrequency>");
    System.exit(1);
  }
  String langCode = args[0];
  String fileName = args[1];
  int minimalFrequency = Integer.parseInt(args[2]);
  String text = IOUtils.toString(new FileReader(fileName));
  TextObjectFactory textObjectFactory = CommonTextObjectFactories.forIndexingCleanText();
  TextObject inputText = textObjectFactory.create().append(text);
  LanguageProfile languageProfile = new LanguageProfileBuilder(langCode)
          .ngramExtractor(NgramExtractors.standard())
          .minimalFrequency(minimalFrequency)
          .addText(inputText)
          .build();
  File outputDir = new File(System.getProperty("user.dir"));  // current dir
  new LanguageProfileWriter().writeToDirectory(languageProfile, outputDir);
  System.out.println("Language profile written to " + new File(outputDir, langCode).getAbsolutePath());
}
 
开发者ID:languagetool-org,项目名称:languagetool,代码行数:21,代码来源:LanguageDetectionTrainer.java

示例3: initLanguageDetector

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private void initLanguageDetector() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
    languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .withProfiles(languageProfiles)
            .build();
    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
}
 
开发者ID:victorward,项目名称:recruitervision,代码行数:8,代码来源:ParsingServiceImpl.java

示例4: doInitialize

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
	try{
		List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
		languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
				.withProfiles(languageProfiles)
				.build();
		
		textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
	}catch(IOException ioe){
		throw new ResourceInitializationException(ioe);
	}
}
 
开发者ID:dstl,项目名称:baleen,代码行数:14,代码来源:DocumentLanguage.java

示例5: testLanguageDetectorErrorRate

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
@Test
public void testLanguageDetectorErrorRate() throws IOException {
    //load target languages:
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(Arrays.asList(TARGET_LANGUAGES_FOR_YALDER));

    //build language detector:
    com.optimaize.langdetect.LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                    .withProfiles(languageProfiles)
                    .build();

    //create a text object factory
    TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText();
    // TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();

    SummaryStatistics stats = new SummaryStatistics();

    List<String> lines = EuroParlUtils.readLines();
    int numHits = 0;
    int numMisses = 0;

    for (String line : lines) {
        String[] pieces = line.split("\t", 2);
        String language = pieces[0];
        TextObject textObject = textObjectFactory.forText(pieces[1]);
        List<DetectedLanguage> result = languageDetector.getProbabilities(textObject);
        if (result.size() > 0 && result.get(0).getLocale().getLanguage().equals(language)) {
            numHits += 1;
        } else {
            numMisses += 1;
        }
    }

    double missPercentage = 100.0 * (double)numMisses/(double)(numMisses + numHits);
    stats.addValue(missPercentage);
    System.out.println(String.format("Total miss ratio = %.2f%%", missPercentage));

    System.out.println(String.format("Min = %.2f%%,  max =  %.2f%%, mean =  %.2f%%, std deviation = %f",
                    stats.getMin(), stats.getMax(), stats.getMean(), stats.getStandardDeviation()));
}
 
开发者ID:kkrugler,项目名称:yalder,代码行数:40,代码来源:OtherDetectorsTest.java

示例6: withProfile

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
 * @throws IllegalStateException if a profile for the same language was added already (must be a userland bug).
 */
public LanguageDetectorBuilder withProfile(LanguageProfile languageProfile) throws IllegalStateException {
    if (langsAdded.contains(languageProfile.getLocale())) {
        throw new IllegalStateException("A language profile for language "+languageProfile.getLocale()+" was added already!");
    }
    for (Integer gramLength : ngramExtractor.getGramLengths()) {
        if (!languageProfile.getGramLengths().contains(gramLength)) {
            throw new IllegalArgumentException("The NgramExtractor is set to handle "+gramLength+"-grams but the given language profile for "+languageProfile.getLocale()+" does not support this!");
        }
    }
    langsAdded.add(languageProfile.getLocale());
    languageProfiles.add(languageProfile);
    return this;
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:17,代码来源:LanguageDetectorBuilder.java

示例7: withProfiles

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
 * @throws IllegalStateException if a profile for the same language was added already (must be a userland bug).
 */
public LanguageDetectorBuilder withProfiles(Iterable<LanguageProfile> languageProfiles) throws IllegalStateException {
    for (LanguageProfile languageProfile : languageProfiles) {
        withProfile(languageProfile);
    }
    return this;
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:10,代码来源:LanguageDetectorBuilder.java

示例8: removeLanguageProfile

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
 * Remove potential LanguageProfiles, e.g. in combination with {@link #loadAllBuiltInLanguageProfiles()}.
 * @param isoString the ISO string of the LanguageProfile to be removed.
 */
public LanguageProfileValidator removeLanguageProfile(final String isoString) {
    Iterables.removeIf(this.languageProfiles, new Predicate<LanguageProfile>() {
        @Override
        public boolean apply(LanguageProfile languageProfile) {
            return languageProfile.getLocale().getLanguage().equals(isoString);
        }
    });
    return this;
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:14,代码来源:LanguageProfileValidator.java

示例9: create

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
 * @param gramLengths for example [1,2,3]
 * @throws java.lang.IllegalArgumentException if languageProfiles or gramLengths is empty, or if one of the
 *         languageProfiles does not have the grams of the required sizes.
 */
@NotNull
public static NgramFrequencyData create(@NotNull Collection<LanguageProfile> languageProfiles, @NotNull Collection<Integer> gramLengths) throws IllegalArgumentException {
    if (languageProfiles.isEmpty()) throw new IllegalArgumentException("No languageProfiles provided!");
    if (gramLengths.isEmpty()) throw new IllegalArgumentException("No gramLengths provided!");

    Map<String, double[]> wordLangProbMap = new HashMap<>();
    List<LdLocale> langlist = new ArrayList<>();
    int langsize = languageProfiles.size();

    int index = -1;
    for (LanguageProfile profile : languageProfiles) {
        index++;

        langlist.add( profile.getLocale() );

        for (Integer gramLength : gramLengths) {
            if (!profile.getGramLengths().contains(gramLength)) {
                throw new IllegalArgumentException("The language profile for "+profile.getLocale()+" does not contain "+gramLength+"-grams!");
            }
            for (Map.Entry<String, Integer> ngramEntry : profile.iterateGrams(gramLength)) {
                String ngram      = ngramEntry.getKey();
                Integer frequency = ngramEntry.getValue();
                if (!wordLangProbMap.containsKey(ngram)) {
                    wordLangProbMap.put(ngram, new double[langsize]);
                }
                double prob = frequency.doubleValue() / profile.getNumGramOccurrences(ngram.length());
                wordLangProbMap.get(ngram)[index] = prob;
            }
        }
    }

    return new NgramFrequencyData(wordLangProbMap, langlist);
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:39,代码来源:NgramFrequencyData.java

示例10: makeDetector

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard())
            .alpha(alpha)
            .seed(seed)
            .shortTextAlgorithm(50)
            .withProfiles(languageProfiles)
            .build();
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:18,代码来源:CommandLineInterface.java

示例11: DataLanguageDetectorImplTest

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public DataLanguageDetectorImplTest() throws IOException {
    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();

    shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(100)
            .withProfiles(languageProfiles)
            .build();

    longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .shortTextAlgorithm(0)
            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
            .build();
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:14,代码来源:DataLanguageDetectorImplTest.java

示例12: makeNewDetector

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private LanguageDetector makeNewDetector() throws IOException {
    LanguageDetectorBuilder builder = LanguageDetectorBuilder.create(NgramExtractors.standard())
        .shortTextAlgorithm(50)
        .prefixFactor(1.5)
        .suffixFactor(2.0);

    LangProfileReader langProfileReader = new LangProfileReader();
    for (String language : ImmutableList.of("en", "fr", "nl", "de")) {
        LangProfile langProfile = langProfileReader.read(LanguageDetectorImplTest.class.getResourceAsStream("/languages/" + language));
        LanguageProfile languageProfile = OldLangProfileConverter.convert(langProfile);
        builder.withProfile(languageProfile);
    }

    return builder.build();
}
 
开发者ID:optimaize,项目名称:language-detector,代码行数:16,代码来源:LanguageDetectorImplTest.java

示例13: LanguageIdentifier

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public LanguageIdentifier() {
  try {
    List<LanguageProfile> profiles = loadProfiles(getLanguageCodes());
    languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
            .minimalConfidence(MINIMAL_CONFIDENCE)
            .withProfiles(profiles)
            .build();
    textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
  } catch (IOException e) {
    throw new RuntimeException("Could not set up language identifier", e);
  }
}
 
开发者ID:languagetool-org,项目名称:languagetool,代码行数:13,代码来源:LanguageIdentifier.java

示例14: loadProfiles

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
  LanguageProfileReader profileReader = new LanguageProfileReader();
  List<LanguageProfile> profiles = profileReader.read(langCodes);
  for (String externalLangCode : externalLangCodes) {
    String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
    if (JLanguageTool.getDataBroker().resourceExists(profilePath)) {  // not all languages are always available
      try (InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath)) {
        profiles.add(new LanguageProfileReader().read(profile));
      }
    }
  }
  return profiles;
}
 
开发者ID:languagetool-org,项目名称:languagetool,代码行数:14,代码来源:LanguageIdentifier.java

示例15: checkIfNonEnglish

import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public static String checkIfNonEnglish(Post post){


        List<LanguageProfile> languageProfiles;
        com.optimaize.langdetect.LanguageDetector optimaizeDetector;
        org.apache.tika.language.detect.LanguageDetector tikaDetector;
        TextObjectFactory textObjectFactory;

        String dataToCheck = stripTags(stripBody(post)).replaceAll("\\p{Punct}+", "");
        try {

            languageProfiles = new LanguageProfileReader().readAllBuiltIn();
            optimaizeDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                    .withProfiles(languageProfiles)
                    .build();
            textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
            TextObject textObject = textObjectFactory.forText(dataToCheck);
            Optional<LdLocale> lang = optimaizeDetector.detect(textObject);
            if (!lang.isPresent()) {
                if(dataToCheck.length()>50) {
                   tikaDetector = new OptimaizeLangDetector().loadModels();
                   LanguageWriter writer = new LanguageWriter(tikaDetector);
                   writer.append(dataToCheck);
                   LanguageResult result = writer.getLanguage();
                   String tikaLang = result.getLanguage();
                   writer.close();

                   if (!tikaLang.toLowerCase().equals("")) {
                       return tikaLang;
                   }
                   else{
                       return null;
                   }
                }
                else if(dataToCheck.length()<50){
                    return null;
                }
                if(checkIfNoCodeBlock(post)){
                    return "Gibberish";
                }
                return null;
            }
            return lang.get().getLanguage();
        }
        catch (IOException e){
            e.printStackTrace();
        }
        return null;
    }
 
开发者ID:SOBotics,项目名称:Natty,代码行数:50,代码来源:CheckUtils.java


注:本文中的com.optimaize.langdetect.profiles.LanguageProfile类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。