當前位置: 首頁>>代碼示例>>Java>>正文


Java SnowballStemmer類代碼示例

本文整理匯總了Java中info.ephyra.nlp.SnowballStemmer的典型用法代碼示例。如果您正苦於以下問題:Java SnowballStemmer類的具體用法?Java SnowballStemmer怎麽用?Java SnowballStemmer使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


SnowballStemmer類屬於info.ephyra.nlp包,在下文中一共展示了SnowballStemmer類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: TRECNugget

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * @param	targetID	the targetID of the TREC target the OTHER question belongs to
 * @param	questionID	the ID of the OTHER question
 * @param	nuggetID	the ID of the nugget
 * @param	nuggetType	the type of the nugget (okay or vital)
 * @param	nugget		the nugget's text
 */
public TRECNugget(String targetID, String questionID, String nuggetID, String nuggetType, String nugget) {
	this.targetID = targetID;
	this.questionID = questionID;
	this.nuggetID = nuggetID;
	this.nuggetType = nuggetType;
	this.nugget = nugget;
	
	String[] nTokens = NETagger.tokenize(nugget);
	HashSet<String> nSet = new HashSet<String>();
	for (String n : nTokens)
		if (!FunctionWords.lookup(n) && (n.length() > 1))
			nSet.add(SnowballStemmer.stem(n).toLowerCase());
	
	this.size = nSet.size();
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:23,代碼來源:TRECNugget.java

示例2: covers

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**	check if some result covers some nugger
 * @param	result	the result String
 * @param	nugget	the nugget string
 * @return the tokens of the specified nugget String not contained in the specified result String
 */
private String[] covers(String result, String nugget) {
	String[] rTokens = NETagger.tokenize(result);
	HashSet<String> rSet = new HashSet<String>();
	for (String r : rTokens)
		if (!FunctionWords.lookup(r) && (r.length() > 1))
			rSet.add(SnowballStemmer.stem(r).toLowerCase());
	
	String[] nTokens = NETagger.tokenize(nugget);
	HashSet<String> nSet = new HashSet<String>();
	for (String n : nTokens)
		if (!FunctionWords.lookup(n) && (n.length() > 1))
			nSet.add(SnowballStemmer.stem(n).toLowerCase());
	
	nSet.removeAll(rSet);
	ArrayList<String> remaining = new ArrayList<String>(nSet);
	
	return remaining.toArray(new String[remaining.size()]);
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:24,代碼來源:NuggetEvaluationFilter.java

示例3: add

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Adds a word to the dictionary.
 * 
 * @param word the word to add
 */
public void add(String word) {
	if (word != null) {
		word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
		word = SnowballStemmer.stemAllTokens(word);
		
		// add whole word
		if (word.length() > 0) words.add(word);
		
		// add tokens of word
		String[] tokens = word.split(" ");
		if (tokens.length > maxTokens) maxTokens = tokens.length;
		for (int p = 0; p < tokens.length; p++)
			if (tokens[p].length() > 0) this.tokens.add(tokens[p]);
	}
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:21,代碼來源:HashDictionary.java

示例4: equalsCommonNorm

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Compares the normalizations of the two strings, using the same criterion
 * as the <code>equalsCommon()</code> method.
 * 
 * @param s1 string 1
 * @param s2 string 2
 * @return true, iff the normalizations are equal
 */
public static boolean equalsCommonNorm(String s1, String s2) {
	// convert to lower-case
	s1 = s1.toLowerCase();
	s2 = s2.toLowerCase();
	
	// tokenize
	String tokens1[] = NETagger.tokenize(s1);
	String tokens2[] = NETagger.tokenize(s2);
	
	// eliminate function words and tokens of length < 2, stemm all tokens
	ArrayList<String> tks1 = new ArrayList<String>();
	for (String token1 : tokens1)
		if (token1.length() > 1 && !FunctionWords.lookup(token1))
			tks1.add(SnowballStemmer.stem(token1));
	HashSet<String> tks2 = new HashSet<String>();
	for (String token2 : tokens2)
		if (token2.length() > 1 && !FunctionWords.lookup(token2))
			tks2.add(SnowballStemmer.stem(token2));
	
	// check for common token
	for (String token : tks1) if (tks2.contains(token)) return true;
	
	return false;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:33,代碼來源:StringUtils.java

示例5: equalsCommonProp

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Compares two strings, using the same criterion as the <code>equalsCommonNorm()</code> method, but considers only words starting with a capital letter (proper nouns)
 * 
 * @param s1 string 1
 * @param s2 string 2
 * @return true, iff the proper nouns are equal
 */
public static boolean equalsCommonProp(String s1, String s2) {
	// convert to lower-case
	s1 = s1.toLowerCase();
	s2 = s2.toLowerCase();
	
	// tokenize
	String tokens1[] = NETagger.tokenize(s1);
	String tokens2[] = NETagger.tokenize(s2);
	
	// eliminate function words and tokens of length < 2, stemm all tokens
	ArrayList<String> tks1 = new ArrayList<String>();
	for (String token1 : tokens1)
		if (token1.length() > 1 && !FunctionWords.lookup(token1) && token1.substring(0, 1).matches("[A-Z]"))
			tks1.add(SnowballStemmer.stem(token1));
	HashSet<String> tks2 = new HashSet<String>();
	for (String token2 : tokens2)
		if (token2.length() > 1 && !FunctionWords.lookup(token2) && token2.substring(0, 1).matches("[A-Z]"))
			tks2.add(SnowballStemmer.stem(token2));
	
	// check for common token
	for (String token : tks1) if (tks2.contains(token)) return true;
	
	return false;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:32,代碼來源:StringUtils.java

示例6: match

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
	 * Checks if the first phrase is inclusive of the second
	 * @param npq parsed string
	 * @param npt parsed string
	 */
	private static boolean match(String npq, String npt) {
		String q = unparse(npq).replace("'s", "").replace("'", "");
		String t = unparse(npt).replace("'s", "").replace("'", "");
		
		boolean exists;
		for (String token1 : q.split(" ")) {
			token1 = SnowballStemmer.stem(token1);
			
			exists = false;
			for (String token2 : t.split(" ")) {
				token2 = SnowballStemmer.stem(token2);

//				System.out.println(token1 + ":" + token2);
				
				if (token1.equalsIgnoreCase(token2)) {
					exists = true;
					break;
				}
			}
			
			if (!exists) {
				return false;
			}
		}
		
		return true;
	}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:33,代碼來源:CorefResolver.java

示例7: HashDictionary

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Creates a <code>HashDictionary</code> from a list of words in a file.
 * 
 * @param fileName file containing a list of words
 * @throws IOException if the list could not be read from the file
 */
public HashDictionary(String fileName) throws IOException {
	this();
	
	if (fileName != null) {
		File file = new File(fileName);
		BufferedReader in = new BufferedReader(new FileReader(file));
		
		while (in.ready()) {
			// read and normalize word
			String word = in.readLine().trim();
			if (word.startsWith("//")) continue;  // skip comments
			word = NETagger.tokenizeWithSpaces(word.toLowerCase());
			word = SnowballStemmer.stemAllTokens(word);
			
			// add whole word
			if (word.length() > 0) words.add(word);
			
			// add tokens of word
			String[] tokens = word.split(" ");
			if (tokens.length > maxTokens) maxTokens = tokens.length;
			for (int p = 0; p < tokens.length; p++)
				if (tokens[p].length() > 0) this.tokens.add(tokens[p]);
		}
		
		in.close();
	}
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:34,代碼來源:HashDictionary.java

示例8: contains

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Looks up a word.
 * 
 * @param word the word to look up
 * @return <code>true</code> iff the word was found
 */
public boolean contains(String word) {
	word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
	word = SnowballStemmer.stemAllTokens(word);
	
	return words.contains(word);
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:13,代碼來源:HashDictionary.java

示例9: fuzzyContains

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Does a fuzzy lookup for a word. The specified word w is considered as
 * contained in the dictionary is there is a word W in the dictionary such
 * that <code>LevenshteinDistance(w, W) &lt;= maxDistance</code>
 * 
 * @param word the word to look up
 * @param maxDistance the maximum Levenshtein edit distance for fuzzy
 *            comparison
 * @return <code>true</code> iff the word was found
 */
public boolean fuzzyContains(String word, int maxDistance) {
	word = NETagger.tokenizeWithSpaces(word.trim().toLowerCase());
	word = SnowballStemmer.stemAllTokens(word);
	
	if (maxDistance == 0) return this.words.contains(word);
	else if (this.words.contains(word)) return true;
	
	Iterator<String> wordIter = this.words.iterator();
	while (wordIter.hasNext())
		if (getLevenshteinDistance(word, wordIter.next(), maxDistance, true, 1, 1) <= maxDistance) return true;
	
	return false;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:24,代碼來源:HashDictionary.java

示例10: fuzzyContainsToken

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Does a fuzzy lookup for a token. The specified token t is considered as
 * contained in the dictionary is there is a token T in the dictionary such
 * that <code>LevenshteinDistance(t, T) &lt;= maxDistance</code>
 * 
 * @param token the token to look up
 * @param maxDistance the maximum Levenshtein edit distance for fuzzy
 *            comparison
 * @return <code>true</code> iff a word in the dictionary contains the token
 */
public boolean fuzzyContainsToken(String token, int maxDistance) {
	token = SnowballStemmer.stem(token.trim().toLowerCase());
	
	if (maxDistance == 0) return this.tokens.contains(token);
	else if (this.tokens.contains(token)) return true;
	
	Iterator<String> tokenIter = this.tokens.iterator();
	while (tokenIter.hasNext())
		if (getLevenshteinDistance(token, tokenIter.next(), maxDistance, true, 1, 1) <= maxDistance) return true;
	
	return false;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:23,代碼來源:HashDictionary.java

示例11: normalize

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/**
 * Normalizes a string. Similar strings are mapped to equal normalizations.
 * 
 * @param s the string
 * @return normalized string
 */
// TODO use noun and verb stemming (also for equals...Norm() methods)
public static String normalize(String s) {
	// convert to lower-case
	s = s.toLowerCase();
	
	// tokenize
	String tokens[] = NETagger.tokenize(s);
	
	// stemm all tokens
	for (int i = 0; i < tokens.length; i++)
		tokens[i] = SnowballStemmer.stem(tokens[i]);
	
	return concatWithSpaces(tokens);
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:21,代碼來源:StringUtils.java

示例12: main

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
public static void main(String[] args) {
		TEST_TERM_DOWMLOD = true;
		
		MsgPrinter.enableStatusMsgs(true);
		MsgPrinter.enableErrorMsgs(true);
		
		// create tokenizer
		MsgPrinter.printStatusMsg("Creating tokenizer...");
		if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
			MsgPrinter.printErrorMsg("Could not create tokenizer.");
//		LingPipe.createTokenizer();
		
//		// create sentence detector
//		MsgPrinter.printStatusMsg("Creating sentence detector...");
//		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
//			MsgPrinter.printErrorMsg("Could not create sentence detector.");
//		LingPipe.createSentenceDetector();
		
		// create stemmer
		MsgPrinter.printStatusMsg("Creating stemmer...");
		SnowballStemmer.create();
		
//		// create part of speech tagger
//		MsgPrinter.printStatusMsg("Creating POS tagger...");
//		if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
//									 "res/nlp/postagger/opennlp/tagdict"))
//			MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
//		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
//				"train-wsj-0-18.holder"))
//			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
		
//		// create chunker
//		MsgPrinter.printStatusMsg("Creating chunker...");
//		if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
//								   "EnglishChunk.bin.gz"))
//			MsgPrinter.printErrorMsg("Could not create chunker.");
		
		// create named entity taggers
		MsgPrinter.printStatusMsg("Creating NE taggers...");
		NETagger.loadListTaggers("res/nlp/netagger/lists/");
		NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
		MsgPrinter.printStatusMsg("  ...loading models");
//		if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
//			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
//		if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
//			MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
		MsgPrinter.printStatusMsg("  ...done");
		
		WikipediaTermImportanceFilter wtif = new WikipediaTermImportanceFilter(NO_NORMALIZATION, NO_NORMALIZATION, false);
		TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
		for (TRECTarget target : targets) {
			String question = target.getTargetDesc();
			
			// query generation
			MsgPrinter.printGeneratingQueries();
			String qn = QuestionNormalizer.normalize(question);
			MsgPrinter.printNormalization(qn);  // print normalized question string
			Logger.logNormalization(qn);  // log normalized question string
			String[] kws = KeywordExtractor.getKeywords(qn);
			AnalyzedQuestion aq = new AnalyzedQuestion(question);
			aq.setKeywords(kws);
			aq.setFactoid(false);
			
			Query[] queries = new BagOfWordsG().generateQueries(aq);
			for (int q = 0; q < queries.length; q++)
				queries[q].setOriginalQueryString(question);
			
			Result[] results = new Result[1];
			results[0] = new Result("This would be the answer", queries[0]);
			wtif.apply(results);
		}
	}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:73,代碼來源:WikipediaTermImportanceFilter.java

示例13: getTermCounters

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
/** @see info.ephyra.answerselection.filters.WebTermImportanceFilter#getTermCounters(java.lang.String[])
 */
@Override
public HashMap<String, TermCounter> getTermCounters(String[] targets) {
	HashMap<String, TermCounter> termCounters = new HashMap<String, TermCounter>();
	for (String target : targets) {
		
		//	get snippets from yahoo
		SearchClient client = new SearchClient(YAHOO_ID);
		
		// create request
		WebSearchRequest request = new WebSearchRequest(target);
		request.setLanguage("en");  // search for English pages only
		request.setStart(BigInteger.valueOf(0));
		request.setResults(MAX_RESULTS_PERQUERY);
		
		// perform search
		WebSearchResult[] searchResults = null;
		int retries = 0;
		while (searchResults == null)
			try {
				searchResults = client.webSearch(request).listResults();
			} catch (Exception e) {
				MsgPrinter.printSearchError(e);  // print search error message
				
				if (retries == RETRIES) {
					MsgPrinter.printErrorMsg("\nSearch failed.");
					System.exit(1);
				}
				retries++;
				
				try {
					YahooKM.sleep(1000);
				} catch (InterruptedException ie) {}
			}
		
		//	parse yahoo snippets
		int lengthSum = 0;
		for (int i = 0; i < searchResults.length; i++) {
			
			String summary = searchResults[i].getSummary();
			if (summary != null) {
				
				//	tokenize and tag sentence
				String[] sentence = NETagger.tokenize(summary);
				lengthSum += sentence.length;
				
				//	scan sentence for NPs
				for (int s = 0; s < sentence.length; s++) {
					String term = SnowballStemmer.stem(sentence[s].toLowerCase());
					if (term.length() > 1) {
						if (!termCounters.containsKey(term))
							termCounters.put(term, new TermCounter());
						termCounters.get(term).increment();
					}
				}
			}
		}
		
	}
	return termCounters;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:63,代碼來源:YahooTermImportanceFilter.java

示例14: main

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
public static void main(String[] args) {
		TEST_TARGET_GENERATION = true;
		
		MsgPrinter.enableStatusMsgs(true);
		MsgPrinter.enableErrorMsgs(true);
		
		// create tokenizer
		MsgPrinter.printStatusMsg("Creating tokenizer...");
		if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
			MsgPrinter.printErrorMsg("Could not create tokenizer.");
//		LingPipe.createTokenizer();
		
		// create sentence detector
//		MsgPrinter.printStatusMsg("Creating sentence detector...");
//		if (!OpenNLP.createSentenceDetector("res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
//			MsgPrinter.printErrorMsg("Could not create sentence detector.");
//		LingPipe.createSentenceDetector();
		
		// create stemmer
		MsgPrinter.printStatusMsg("Creating stemmer...");
		SnowballStemmer.create();
		
		// create part of speech tagger
		MsgPrinter.printStatusMsg("Creating POS tagger...");
		if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz",
									 "res/nlp/postagger/opennlp/tagdict"))
			MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
//		if (!StanfordPosTagger.init("res/nlp/postagger/stanford/" +
//				"train-wsj-0-18.holder"))
//			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");
		
		// create chunker
		MsgPrinter.printStatusMsg("Creating chunker...");
		if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/" +
								   "EnglishChunk.bin.gz"))
			MsgPrinter.printErrorMsg("Could not create chunker.");
		
		// create named entity taggers
		MsgPrinter.printStatusMsg("Creating NE taggers...");
		NETagger.loadListTaggers("res/nlp/netagger/lists/");
		NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
		MsgPrinter.printStatusMsg("  ...loading models");
//		if (!NETagger.loadNameFinders("res/nlp/netagger/opennlp/"))
//			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
		if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
			MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
		MsgPrinter.printStatusMsg("  ...done");
		
		WebTermImportanceFilter wtif = new TargetGeneratorTest(NO_NORMALIZATION);
		TRECTarget[] targets = TREC13To16Parser.loadTargets(args[0]);
		for (TRECTarget target : targets) {
			String question = target.getTargetDesc();
			
			// query generation
			MsgPrinter.printGeneratingQueries();
			String qn = QuestionNormalizer.normalize(question);
			MsgPrinter.printNormalization(qn);  // print normalized question string
			Logger.logNormalization(qn);  // log normalized question string
			String[] kws = KeywordExtractor.getKeywords(qn);
			AnalyzedQuestion aq = new AnalyzedQuestion(question);
			aq.setKeywords(kws);
			aq.setFactoid(false);
			
			Query[] queries = new BagOfWordsG().generateQueries(aq);
			for (int q = 0; q < queries.length; q++)
				queries[q].setOriginalQueryString(question);
			
			Result[] results = new Result[1];
			results[0] = new Result("This would be the answer", queries[0]);
			wtif.apply(results);
		}
	}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:73,代碼來源:WebTermImportanceFilter.java

示例15: getGoogleTermCounters

import info.ephyra.nlp.SnowballStemmer; //導入依賴的package包/類
private HashMap<String, TermCounter> getGoogleTermCounters(String target) {
	HashMap<String, TermCounter> targetTermCounters = new HashMap<String, TermCounter>();
	
	//	subsequently get top MAX_RESULTS_TOTAL snippets, MAX_RESULTS_PERQUERY each time
	for (int startResult = 0; startResult < MAX_RESULTS_TOTAL; startResult += MAX_RESULTS_PERQUERY) {
		
		//	get snippets from google
		GoogleSearch search = new GoogleSearch();
		if (TEST_TARGET_GENERATION) System.out.println("Got search ...");
		
		// set license key
		search.setKey(GOOGLE_KEY);
		if (TEST_TARGET_GENERATION) System.out.println(" - key is " + GOOGLE_KEY);
		
		// set search string
		search.setQueryString(target);
		if (TEST_TARGET_GENERATION) System.out.println(" - target is " + target);
		
		// set language to English only
		search.setLanguageRestricts("English");
		if (TEST_TARGET_GENERATION) System.out.println(" - language set");
		
		// set hit position of first search result
		search.setStartResult(startResult);
		if (TEST_TARGET_GENERATION) System.out.println(" - start result set to " + startResult);
		
		// set maximum number of search results
		search.setMaxResults(MAX_RESULTS_PERQUERY);
		if (TEST_TARGET_GENERATION) System.out.println(" - max results set");
		
		// perform search
		GoogleSearchResult googleResult = null;
		int retries = 0;
		while (googleResult == null)
			try {
				googleResult = search.doSearch();
			} catch (GoogleSearchFault e) {
				MsgPrinter.printSearchError(e);  // print search error message
				
				if (retries == RETRIES) {
					MsgPrinter.printErrorMsg("\nSearch failed.");
					//System.exit(1);
					return targetTermCounters;
				}
				retries++;
				
				try {
					GoogleKM.sleep(1000);
				} catch (InterruptedException ie) {}
			}
		
		// get snippets
		GoogleSearchResultElement[] elements = googleResult.getResultElements();
		if (TEST_TARGET_GENERATION) System.out.println(" - got results: " + elements.length);
		
		//	parse google snippets
		int lengthSum = 0;
		for (int i = 0; i < elements.length; i++) {
			String plain = elements[i].getSnippet().replaceAll("\\<[^\\>]++\\>", " ");
			plain = plain.replaceAll("\\&\\#39\\;", "'");
			if (TEST_TARGET_GENERATION) System.out.println(" - plain: " + plain);
			
			//	tokenize and tag sentence
			String[] sentence = NETagger.tokenize(plain);
			lengthSum += sentence.length;
			
			//	scan sentence for NPs
			for (int s = 0; s < sentence.length; s++) {
				String term = SnowballStemmer.stem(sentence[s].toLowerCase());
				if (term.length() > 1) {
					if (!targetTermCounters.containsKey(term))
						targetTermCounters.put(term, new TermCounter());
					targetTermCounters.get(term).increment();
				}
			}
		}
	}
	
	return targetTermCounters;
}
 
開發者ID:claritylab,項目名稱:lucida,代碼行數:81,代碼來源:WikipediaGoogleTermImportanceFilter.java


注:本文中的info.ephyra.nlp.SnowballStemmer類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。