本文整理汇总了Java中com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary.shouldInclude方法的典型用法代码示例。如果您正苦于以下问题:Java CoreStopWordDictionary.shouldInclude方法的具体用法?Java CoreStopWordDictionary.shouldInclude怎么用?Java CoreStopWordDictionary.shouldInclude使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary
的用法示例。
在下文中一共展示了CoreStopWordDictionary.shouldInclude方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: convertSentenceListToDocument
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 将句子列表转化为文档
*
* @param sentenceList
* @return
*/
private static List<List<String>> convertSentenceListToDocument(List<String> sentenceList)
{
List<List<String>> docs = new ArrayList<List<String>>(sentenceList.size());
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
}
return docs;
}
示例2: seg2sentence
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 切分为句子形式
*
* @param text
* @return
*/
public static List<List<Term>> seg2sentence(String text)
{
List<List<Term>> sentenceList = SEGMENT.seg2sentence(text);
for (List<Term> sentence : sentenceList)
{
ListIterator<Term> listIterator = sentence.listIterator();
while (listIterator.hasNext())
{
if (!CoreStopWordDictionary.shouldInclude(listIterator.next()))
{
listIterator.remove();
}
}
}
return sentenceList;
}
示例3: testSegmentCorpus
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
public void testSegmentCorpus() throws Exception
{
File root = new File("D:\\Doc\\语料库\\搜狗文本分类语料库精简版");
for (File folder : root.listFiles())
{
if (folder.isDirectory())
{
for (File file : folder.listFiles())
{
System.out.println(file.getAbsolutePath());
List<Term> termList = HanLP.segment(IOUtil.readTxt(file.getAbsolutePath()));
StringBuilder sbOut = new StringBuilder();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
sbOut.append(term.word).append(" ");
}
}
IOUtil.saveTxt("D:\\Doc\\语料库\\segmented\\" + folder.getName() + "_" + file.getName(), sbOut.toString());
}
}
}
}
示例4: segment
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 分词
*
* @param text 文本
* @return 分词结果
*/
public static List<Term> segment(char[] text)
{
List<Term> resultList = SEGMENT.seg(text);
ListIterator<Term> listIterator = resultList.listIterator();
while (listIterator.hasNext())
{
if (!CoreStopWordDictionary.shouldInclude(listIterator.next()))
{
listIterator.remove();
}
}
return resultList;
}
示例5: getTopSentenceList
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 一句话调用接口
* @param document 目标文档
* @param size 需要的关键句的个数
* @return 关键句列表
*/
public static List<String> getTopSentenceList(String document, int size)
{
List<String> sentenceList = spiltSentence(document);
List<List<String>> docs = new ArrayList<List<String>>();
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
// System.out.println(wordList);
}
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(size);
List<String> resultList = new LinkedList<String>();
for (int i : topSentence)
{
resultList.add(sentenceList.get(i));
}
return resultList;
}
示例6: getSummary
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 一句话调用接口
* @param document 目标文档
* @param max_length 需要摘要的长度
* @return 摘要文本
*/
public static String getSummary(String document, int max_length)
{
if(!validate_document(document, max_length)){
return "";
}
List<String> sentenceList = spiltSentence(document);
int sentence_count = sentenceList.size();
List<List<String>> docs = new ArrayList<List<String>>();
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
// System.out.println(wordList);
}
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(sentence_count);
List<String> resultList = new LinkedList<String>();
for (int i : topSentence)
{
resultList.add(sentenceList.get(i));
}
resultList = permutation(resultList, sentenceList);
resultList = pick_sentences(resultList, max_length);
String summary = "";
for(String temp : resultList)
{
summary += temp;
}
if (summary.length() < 15){
summary = "";
}
return summary;
}
示例7: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* judge whether a word belongs to stop words
* @param term(Term): word needed to be judged
* @return(boolean): if the word is a stop word,return false;otherwise return true
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
示例8: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* judge whether a word belongs to stop words
* @param term(Term): word needed to be judged
* @return(boolean): if the word is a stop word,return false;otherwise return true
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
示例9: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 是否应当将这个term纳入计算,词性属于名词、动词、副词、形容词
* @param term
* @return 是否应当
*/
public boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
示例10: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入方法依赖的package包/类
/**
* 是否应当将这个term纳入计算,词性属于名词、动词、副词、形容词
* @param term
* @return 是否应当
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}