当前位置: 首页>>代码示例>>Java>>正文


Java SnowballStemmer.stem方法代码示例

本文整理汇总了Java中info.ephyra.nlp.SnowballStemmer.stem方法的典型用法代码示例。如果您正苦于以下问题:Java SnowballStemmer.stem方法的具体用法?Java SnowballStemmer.stem怎么用?Java SnowballStemmer.stem使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在info.ephyra.nlp.SnowballStemmer的用法示例。


在下文中一共展示了SnowballStemmer.stem方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: match

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
	 * Checks if the first phrase is inclusive of the second
	 * @param npq parsed string
	 * @param npt parsed string
	 */
	private static boolean match(String npq, String npt) {
		String q = unparse(npq).replace("'s", "").replace("'", "");
		String t = unparse(npt).replace("'s", "").replace("'", "");
		
		boolean exists;
		for (String token1 : q.split(" ")) {
			token1 = SnowballStemmer.stem(token1);
			
			exists = false;
			for (String token2 : t.split(" ")) {
				token2 = SnowballStemmer.stem(token2);

//				System.out.println(token1 + ":" + token2);
				
				if (token1.equalsIgnoreCase(token2)) {
					exists = true;
					break;
				}
			}
			
			if (!exists) {
				return false;
			}
		}
		
		return true;
	}
 
开发者ID:claritylab,项目名称:lucida,代码行数:33,代码来源:CorefResolver.java

示例2: fuzzyContainsToken

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
 * Does a fuzzy lookup for a token. The specified token t is considered as
 * contained in the dictionary is there is a token T in the dictionary such
 * that <code>LevenshteinDistance(t, T) &lt;= maxDistance</code>
 * 
 * @param token the token to look up
 * @param maxDistance the maximum Levenshtein edit distance for fuzzy
 *            comparison
 * @return <code>true</code> iff a word in the dictionary contains the token
 */
public boolean fuzzyContainsToken(String token, int maxDistance) {
	token = SnowballStemmer.stem(token.trim().toLowerCase());
	
	if (maxDistance == 0) return this.tokens.contains(token);
	else if (this.tokens.contains(token)) return true;
	
	Iterator<String> tokenIter = this.tokens.iterator();
	while (tokenIter.hasNext())
		if (getLevenshteinDistance(token, tokenIter.next(), maxDistance, true, 1, 1) <= maxDistance) return true;
	
	return false;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:23,代码来源:HashDictionary.java

示例3: normalize

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
 * Normalizes a string. Similar strings are mapped to equal normalizations.
 * 
 * @param s the string
 * @return normalized string
 */
// TODO use noun and verb stemming (also for equals...Norm() methods)
public static String normalize(String s) {
	// convert to lower-case
	s = s.toLowerCase();
	
	// tokenize
	String tokens[] = NETagger.tokenize(s);
	
	// stemm all tokens
	for (int i = 0; i < tokens.length; i++)
		tokens[i] = SnowballStemmer.stem(tokens[i]);
	
	return concatWithSpaces(tokens);
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:21,代码来源:StringUtils.java

示例4: getTermCounters

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/** @see info.ephyra.answerselection.filters.WebTermImportanceFilter#getTermCounters(java.lang.String[])
 */
@Override
public HashMap<String, TermCounter> getTermCounters(String[] targets) {
	HashMap<String, TermCounter> termCounters = new HashMap<String, TermCounter>();
	for (String target : targets) {
		
		//	get snippets from yahoo
		SearchClient client = new SearchClient(YAHOO_ID);
		
		// create request
		WebSearchRequest request = new WebSearchRequest(target);
		request.setLanguage("en");  // search for English pages only
		request.setStart(BigInteger.valueOf(0));
		request.setResults(MAX_RESULTS_PERQUERY);
		
		// perform search
		WebSearchResult[] searchResults = null;
		int retries = 0;
		while (searchResults == null)
			try {
				searchResults = client.webSearch(request).listResults();
			} catch (Exception e) {
				MsgPrinter.printSearchError(e);  // print search error message
				
				if (retries == RETRIES) {
					MsgPrinter.printErrorMsg("\nSearch failed.");
					System.exit(1);
				}
				retries++;
				
				try {
					YahooKM.sleep(1000);
				} catch (InterruptedException ie) {}
			}
		
		//	parse yahoo snippets
		int lengthSum = 0;
		for (int i = 0; i < searchResults.length; i++) {
			
			String summary = searchResults[i].getSummary();
			if (summary != null) {
				
				//	tokenize and tag sentence
				String[] sentence = NETagger.tokenize(summary);
				lengthSum += sentence.length;
				
				//	scan sentence for NPs
				for (int s = 0; s < sentence.length; s++) {
					String term = SnowballStemmer.stem(sentence[s].toLowerCase());
					if (term.length() > 1) {
						if (!termCounters.containsKey(term))
							termCounters.put(term, new TermCounter());
						termCounters.get(term).increment();
					}
				}
			}
		}
		
	}
	return termCounters;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:63,代码来源:YahooTermImportanceFilter.java

示例5: getGoogleTermCounters

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
private HashMap<String, TermCounter> getGoogleTermCounters(String target) {
	HashMap<String, TermCounter> targetTermCounters = new HashMap<String, TermCounter>();
	
	//	subsequently get top MAX_RESULTS_TOTAL snippets, MAX_RESULTS_PERQUERY each time
	for (int startResult = 0; startResult < MAX_RESULTS_TOTAL; startResult += MAX_RESULTS_PERQUERY) {
		
		//	get snippets from google
		GoogleSearch search = new GoogleSearch();
		if (TEST_TARGET_GENERATION) System.out.println("Got search ...");
		
		// set license key
		search.setKey(GOOGLE_KEY);
		if (TEST_TARGET_GENERATION) System.out.println(" - key is " + GOOGLE_KEY);
		
		// set search string
		search.setQueryString(target);
		if (TEST_TARGET_GENERATION) System.out.println(" - target is " + target);
		
		// set language to English only
		search.setLanguageRestricts("English");
		if (TEST_TARGET_GENERATION) System.out.println(" - language set");
		
		// set hit position of first search result
		search.setStartResult(startResult);
		if (TEST_TARGET_GENERATION) System.out.println(" - start result set to " + startResult);
		
		// set maximum number of search results
		search.setMaxResults(MAX_RESULTS_PERQUERY);
		if (TEST_TARGET_GENERATION) System.out.println(" - max results set");
		
		// perform search
		GoogleSearchResult googleResult = null;
		int retries = 0;
		while (googleResult == null)
			try {
				googleResult = search.doSearch();
			} catch (GoogleSearchFault e) {
				MsgPrinter.printSearchError(e);  // print search error message
				
				if (retries == RETRIES) {
					MsgPrinter.printErrorMsg("\nSearch failed.");
					//System.exit(1);
					return targetTermCounters;
				}
				retries++;
				
				try {
					GoogleKM.sleep(1000);
				} catch (InterruptedException ie) {}
			}
		
		// get snippets
		GoogleSearchResultElement[] elements = googleResult.getResultElements();
		if (TEST_TARGET_GENERATION) System.out.println(" - got results: " + elements.length);
		
		//	parse google snippets
		int lengthSum = 0;
		for (int i = 0; i < elements.length; i++) {
			String plain = elements[i].getSnippet().replaceAll("\\<[^\\>]++\\>", " ");
			plain = plain.replaceAll("\\&\\#39\\;", "'");
			if (TEST_TARGET_GENERATION) System.out.println(" - plain: " + plain);
			
			//	tokenize and tag sentence
			String[] sentence = NETagger.tokenize(plain);
			lengthSum += sentence.length;
			
			//	scan sentence for NPs
			for (int s = 0; s < sentence.length; s++) {
				String term = SnowballStemmer.stem(sentence[s].toLowerCase());
				if (term.length() > 1) {
					if (!targetTermCounters.containsKey(term))
						targetTermCounters.put(term, new TermCounter());
					targetTermCounters.get(term).increment();
				}
			}
		}
	}
	
	return targetTermCounters;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:81,代码来源:WikipediaGoogleTermImportanceFilter.java

示例6: containsToken

import info.ephyra.nlp.SnowballStemmer; //导入方法依赖的package包/类
/**
 * Looks up a word token.
 * 
 * @param token the word token to look up
 * @return <code>true</code> iff a word in the dictionary contains the token
 */
public boolean containsToken(String token) {
	token = SnowballStemmer.stem(token.trim().toLowerCase());
	
	return tokens.contains(token);
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:12,代码来源:HashDictionary.java


注:本文中的info.ephyra.nlp.SnowballStemmer.stem方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。