本文整理汇总了Java中com.hankcs.hanlp.seg.common.Term类的典型用法代码示例。如果您正苦于以下问题:Java Term类的具体用法?Java Term怎么用?Java Term使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Term类属于com.hankcs.hanlp.seg.common包,在下文中一共展示了Term类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractNameEntity
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public Map<String, Set<String>> extractNameEntity(String content) {
//对文本进行分词
List<Term> list = SEGMENT.seg(content);
//识别人名
Set<String> nrList = list.stream().filter(term -> term.nature.startsWith("nr"))
.map(term -> term.word).collect(Collectors.toSet());
//识别地名
Set<String> nsList = list.stream().filter(term -> term.nature.startsWith("ns"))
.map(term -> term.word).collect(Collectors.toSet());
//识别机构名
Set<String> ntList = list.stream().filter(term -> term.nature.startsWith("nt"))
.map(term -> term.word).collect(Collectors.toSet());
Map<String, Set<String>> namedEntity = new HashMap<>();
namedEntity.put("nr", nrList);
namedEntity.put("ns", nsList);
namedEntity.put("nt", ntList);
return namedEntity;
}
示例2: testSegmentCorpus
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public void testSegmentCorpus() throws Exception
{
File root = new File("D:\\Doc\\语料库\\搜狗文本分类语料库精简版");
for (File folder : root.listFiles())
{
if (folder.isDirectory())
{
for (File file : folder.listFiles())
{
System.out.println(file.getAbsolutePath());
List<Term> termList = HanLP.segment(IOUtil.readTxt(file.getAbsolutePath()));
StringBuilder sbOut = new StringBuilder();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
sbOut.append(term.word).append(" ");
}
}
IOUtil.saveTxt("D:\\Doc\\语料库\\segmented\\" + folder.getName() + "_" + file.getName(), sbOut.toString());
}
}
}
}
示例3: convert
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 将分词结果转换为同义词列表
* @param sentence 句子
* @param withUndefinedItem 是否保留词典中没有的词语
* @return
*/
public static List<CommonSynonymDictionary.SynonymItem> convert(List<Term> sentence, boolean withUndefinedItem)
{
List<CommonSynonymDictionary.SynonymItem> synonymItemList = new ArrayList<CommonSynonymDictionary.SynonymItem>(sentence.size());
for (Term term : sentence)
{
CommonSynonymDictionary.SynonymItem item = get(term.word);
if (item == null)
{
if (withUndefinedItem)
{
item = CommonSynonymDictionary.SynonymItem.createUndefined(term.word);
synonymItemList.add(item);
}
}
else
{
synonymItemList.add(item);
}
}
return synonymItemList;
}
示例4: rewrite
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public String rewrite(String text)
{
List<Term> termList = StandardTokenizer.segment(text.toCharArray());
StringBuilder sbOut = new StringBuilder((int) (text.length() * 1.2));
String preWord = Predefine.TAG_BIGIN;
for (Term term : termList)
{
SynonymItem synonymItem = get(term.word);
Synonym synonym;
if (synonymItem != null && (synonym = synonymItem.randomSynonym(Type.EQUAL, preWord)) != null)
{
sbOut.append(synonym.realWord);
}
else sbOut.append(term.word);
preWord = PosTagCompiler.compile(term.nature.toString(), term.word);
}
return sbOut.toString();
}
示例5: extractSuffixByWords
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 此方法认为后缀一定是整个的词语,所以length是以词语为单位的
* @param length
* @param size
* @param extend
* @return
*/
public List<String> extractSuffixByWords(int length, int size, boolean extend)
{
TFDictionary suffixTreeSet = new TFDictionary();
for (String key : tfDictionary.keySet())
{
List<Term> termList = StandardTokenizer.segment(key);
if (termList.size() > length)
{
suffixTreeSet.add(combine(termList.subList(termList.size() - length, termList.size())));
if (extend)
{
for (int l = 1; l < length; ++l)
{
suffixTreeSet.add(combine(termList.subList(termList.size() - l, termList.size())));
}
}
}
}
return extract(suffixTreeSet, size);
}
示例6: toVertexList
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
private static List<Vertex> toVertexList(List<Term> termList, boolean appendStart)
{
ArrayList<Vertex> vertexList = new ArrayList<Vertex>(termList.size() + 1);
if (appendStart) vertexList.add(Vertex.B);
for (Term term : termList)
{
CoreDictionary.Attribute attribute = CoreDictionary.get(term.word);
if (attribute == null)
{
if (term.word.trim().length() == 0) attribute = new CoreDictionary.Attribute(Nature.x);
else attribute = new CoreDictionary.Attribute(Nature.nz);
}
else term.nature = attribute.nature[0];
Vertex vertex = new Vertex(term.word, attribute);
vertexList.add(vertex);
}
return vertexList;
}
示例7: convertSentenceListToDocument
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 将句子列表转化为文档
*
* @param sentenceList
* @return
*/
private static List<List<String>> convertSentenceListToDocument(List<String> sentenceList)
{
List<List<String>> docs = new ArrayList<List<String>>(sentenceList.size());
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
}
return docs;
}
示例8: main
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public static void main(String[] args)
{
List<Term> termList = TraditionalChineseTokenizer.segment("大衛貝克漢不僅僅是名著名球員,球場以外,其妻為前" +
"辣妹合唱團成員維多利亞·碧咸,亦由於他擁有" +
"突出外表、百變髮型及正面的形象,以至自己" +
"品牌的男士香水等商品,及長期擔任運動品牌" +
"Adidas的代言人,因此對大眾傳播媒介和時尚界" +
"等方面都具很大的影響力,在足球圈外所獲得的" +
"認受程度可謂前所未見。");
System.out.println(termList);
termList = TraditionalChineseTokenizer.segment("(中央社記者黃巧雯台北20日電)外需不振,影響接單動能,經濟部今天公布7月外銷訂單金額362.9億美元,年減5%," +
"連續4個月衰退,減幅較6月縮小。1040820\n");
System.out.println(termList);
termList = TraditionalChineseTokenizer.segment("中央社记者黄巧雯台北20日电");
System.out.println(termList);
}
示例9: seg2sentence
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 切分为句子形式
*
* @param text
* @return
*/
public static List<List<Term>> seg2sentence(String text)
{
List<List<Term>> sentenceList = SEGMENT.seg2sentence(text);
for (List<Term> sentence : sentenceList)
{
ListIterator<Term> listIterator = sentence.listIterator();
while (listIterator.hasNext())
{
if (!CoreStopWordDictionary.shouldInclude(listIterator.next()))
{
listIterator.remove();
}
}
}
return sentenceList;
}
示例10: segSentence
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
private static List<Term> segSentence(String text)
{
String sText = CharTable.convert(text);
List<Term> termList = SEGMENT.seg(sText);
int offset = 0;
for (Term term : termList)
{
String tText;
term.offset = offset;
if (term.length() == 1 || (tText = SimplifiedChineseDictionary.getTraditionalChinese(term.word)) == null)
{
term.word = text.substring(offset, offset + term.length());
offset += term.length();
}
else
{
offset += term.length();
term.word = tText;
}
}
return termList;
}
示例11: segment
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 分词
* @param text 文本
* @return 分词结果
*/
public static List<Term> segment(String text)
{
List<Term> termList = new LinkedList<Term>();
Matcher matcher = WEB_URL.matcher(text);
int begin = 0;
int end;
while (matcher.find())
{
end = matcher.start();
termList.addAll(SEGMENT.seg(text.substring(begin, end)));
termList.add(new Term(matcher.group(), Nature.xu));
begin = matcher.end();
}
if (begin < text.length()) termList.addAll(SEGMENT.seg(text.substring(begin)));
return termList;
}
示例12: main
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public static void main(String[] args)
{
String[] testCase = new String[]{
"签约仪式前,秦光荣、李纪恒、仇和等一同会见了参加签约的企业家。",
"区长庄木弟新年致辞",
"朱立伦:两岸都希望共创双赢 习朱历史会晤在即",
"陕西首富吴一坚被带走 与令计划妻子有交集",
"据美国之音电台网站4月28日报道,8岁的凯瑟琳·克罗尔(凤甫娟)和很多华裔美国小朋友一样,小小年纪就开始学小提琴了。她的妈妈是位虎妈么?",
"凯瑟琳和露西(庐瑞媛),跟她们的哥哥们有一些不同。",
"王国强、高峰、汪洋、张朝阳光着头、韩寒、小四",
"张浩和胡健康复员回家了",
"王总和小丽结婚了",
"编剧邵钧林和稽道青说",
"这里有关天培的有关事迹",
"龚学平等领导说,邓颖超生前杜绝超生",
};
Segment segment = HanLP.newSegment().enableNameRecognize(true);
for (String sentence : testCase)
{
List<Term> termList = segment.seg(sentence);
System.out.println(termList);
}
}
示例13: segment
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
/**
* 中文分词
*
* @param sentence 待分词文本
* @return 分词结果列表
*/
public static List<String> segment(String sentence) {
List<String> results = new ArrayList<>();
// HanLP
List<Term> termList = HanLP.segment(sentence);
results.addAll(termList
.stream()
.map(term -> term.word)
.collect(Collectors.toList())
);
return results;
}
示例14: testSeg
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public void testSeg() throws Exception
{
String text = "商品与服务";
DijkstraSegment segment = new DijkstraSegment();
List<Term> resultList = segment.seg(text);
System.out.println(resultList);
}
示例15: testIssue22
import com.hankcs.hanlp.seg.common.Term; //导入依赖的package包/类
public void testIssue22() throws Exception
{
CoreDictionary.Attribute attribute = CoreDictionary.get("年");
System.out.println(attribute);
List<Term> termList = StandardTokenizer.segment("三年");
System.out.println(termList);
assertEquals(attribute.nature[0], termList.get(1).nature);
System.out.println(StandardTokenizer.segment("三元"));
StandardTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
System.out.println(StandardTokenizer.segment("三年"));
}