当前位置: 首页>>代码示例>>Java>>正文


Java NGramDistance类代码示例

本文整理汇总了Java中org.apache.lucene.search.spell.NGramDistance的典型用法代码示例。如果您正苦于以下问题:Java NGramDistance类的具体用法?Java NGramDistance怎么用?Java NGramDistance使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


NGramDistance类属于org.apache.lucene.search.spell包,在下文中一共展示了NGramDistance类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: resolveDistance

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:DirectCandidateGeneratorBuilder.java

示例2: getEntityLabelSimMatrix

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
public static Matrix getEntityLabelSimMatrix(TokenizedDocument[] documents) {
    int entityCount = 0;
    for (int i = 0; i < documents.length; ++i) {
        entityCount += documents[i].entities.length;
    }
    String labels[] = new String[entityCount];
    entityCount = 0;
    for (int d = 0; d < documents.length; ++d) {
        for (int e = 0; e < documents[d].entities.length; ++e) {
            labels[entityCount] = documents[d].entities[e].label;
            ++entityCount;
        }
    }
    Matrix stringSimMatrix = new Basic2DMatrix(entityCount, entityCount);
    NGramDistance nGramDistance = new NGramDistance(3);
    double similarity;
    for (int i = 0; i < labels.length; ++i) {
        stringSimMatrix.set(i, i, 1);
        for (int j = i + 1; j < labels.length; ++j) {
            similarity = nGramDistance.getDistance(labels[i], labels[j]);
            stringSimMatrix.set(i, j, similarity);
            stringSimMatrix.set(j, i, similarity);
        }
    }
    return stringSimMatrix;
}
 
开发者ID:dice-group,项目名称:CoreferenceResolution,代码行数:27,代码来源:EntityLabelSimMatrixCreator.java

示例3: CandidateUtil

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
public CandidateUtil(final String file) throws IOException {
  final ClassLoader loader = Thread.currentThread().getContextClassLoader();
  final InputStream is = loader.getResourceAsStream(file);

  final Properties prop = new Properties();
  prop.load(is);

  nodeType = prop.getProperty("nodeType");
  nGramDistance = new NGramDistance(Integer.valueOf(prop.getProperty("ngramDistance")));
  index = new TripleIndex(file);
  context = Boolean.valueOf(prop.getProperty("context"));
  if (context == true) { // in case the index by context exist
    index2 = new TripleIndexContext();
  }
  corporationAffixCleaner = new CorporationAffixCleaner();
  domainWhiteLister = new DomainWhiteLister(index);
  popularity = Boolean.valueOf(prop.getProperty("popularity"));
  acronym = Boolean.valueOf(prop.getProperty("acronym"));
  commonEntities = Boolean.valueOf(prop.getProperty("commonEntities"));
  algorithm = prop.getProperty("algorithm");
}
 
开发者ID:dice-group,项目名称:FOX,代码行数:22,代码来源:CandidateUtil.java

示例4: CandidateUtil

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
public CandidateUtil() throws IOException {
	Properties prop = new Properties();
	InputStream input = CandidateUtil.class.getResourceAsStream("/config/agdistis.properties");
	prop.load(input);

	String envNodeType = System.getenv("AGDISTIS_NODE_TYPE");
	this.nodeType = envNodeType != null ? envNodeType : prop.getProperty("nodeType");
	String envNgramDistance = System.getenv("AGDISTIS_NGRAM_DISTANCE");
	this.nGramDistance = new NGramDistance(
			Integer.valueOf(envNgramDistance != null ? envNgramDistance : prop.getProperty("ngramDistance")));
	this.index = new TripleIndex();
	String envContext = System.getenv("AGDISTIS_CONTEXT");
	this.context = Boolean.valueOf(envContext != null ? envContext : prop.getProperty("context"));
	if (context == true) { // in case the index by context exist
		this.index2 = new TripleIndexContext();
	}
	this.corporationAffixCleaner = new CorporationAffixCleaner();
	this.domainWhiteLister = new DomainWhiteLister(index);
	String envPopularity = System.getenv("AGDISTIS_POPULARITY");
	this.popularity = Boolean.valueOf(envPopularity != null ? envPopularity : prop.getProperty("popularity"));
	String envAcronym = System.getenv("AGDISTIS_ACRONYM");
	this.acronym = Boolean.valueOf(envAcronym != null ? envAcronym : prop.getProperty("acronym"));
	String envCommonEntities = System.getenv("AGDISTIS_COMMON_ENTITIES");
	this.commonEntities = Boolean
			.valueOf(envCommonEntities != null ? envCommonEntities : prop.getProperty("commonEntities"));
	String envAlgorithm = System.getenv("AGDISTIS_ALGORITHM");
	this.algorithm = envAlgorithm != null ? envAlgorithm : prop.getProperty("algorithm");
}
 
开发者ID:dice-group,项目名称:AGDISTIS,代码行数:29,代码来源:CandidateUtil.java

示例5: testSurfaceFormsDistance

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
@Test
public void testSurfaceFormsDistance() {
	String candidateURL = "http://dbpedia.org/resource/Barack_Obama";
	List<Triple> label = index.search(candidateURL, "http://www.w3.org/2000/01/rdf-schema#label", null);
	List<Triple> surfaceForms = index.search(candidateURL, "http://www.w3.org/2004/02/skos/core#altLabel", null);
	log.debug(" * " + surfaceForms.size());
	NGramDistance n = new NGramDistance(3);
	for (Triple t : surfaceForms) {
		log.debug(label.get(0).getObject() + " " + t.getObject() + " : "
				+ n.getDistance(label.get(0).getObject(), t.getObject()));
		assertTrue(n.getDistance(label.get(0).getObject(), t.getObject()) >= 0);

	}
}
 
开发者ID:dice-group,项目名称:AGDISTIS,代码行数:15,代码来源:TripleIndexTest.java

示例6: toLucene

import org.apache.lucene.search.spell.NGramDistance; //导入依赖的package包/类
@Override
public StringDistance toLucene() {
    return new NGramDistance();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:5,代码来源:TermSuggestionBuilder.java


注:本文中的org.apache.lucene.search.spell.NGramDistance类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。