本文整理汇总了Java中org.ansj.domain.Term类的典型用法代码示例。如果您正苦于以下问题:Java Term类的具体用法?Java Term怎么用?Java Term使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Term类属于org.ansj.domain包,在下文中一共展示了Term类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeNewTerm
import org.ansj.domain.Term; //导入依赖的package包/类
private void makeNewTerm() {
// TODO Auto-generated method stub
StringBuilder sb = new StringBuilder();
for (int j = offe; j <= endOffe; j++) {
if (terms[j] == null) {
continue;
} else {
sb.append(terms[j].getName());
}
// terms[j] = null;
}
TermNatures termNatures = new TermNatures(new TermNature(tempNature, tempFreq));
Term term = new Term(sb.toString(), offe, termNatures);
term.selfScore(-1 * tempFreq);
TermUtil.insertTerm(terms, term);
// reset();
}
示例2: main
import org.ansj.domain.Term; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
System.out.println(parse);
List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");
//System.out.println(parse1);
String text11="ZW321282050000000325";
Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt =
tokenizer.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAtt =
tokenizer.addAttribute(PositionIncrementAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()){
System.out.print(new String(termAtt.toString()+" ") );
// System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");
}
tokenizer.close();
}
示例3: checkTextContent
import org.ansj.domain.Term; //导入依赖的package包/类
public int checkTextContent(int userId, String content) throws IOException {
HashSet<String> sensitiveWords = new HashSet<String>();
InputStream fis = new FileInputStream(source);
InputStreamReader isr = new InputStreamReader(fis, Charset.forName("UTF-8"));
BufferedReader br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null)
sensitiveWords.add(line.substring(0, line.length() - 1));
Result result = ToAnalysis.parse(Jsoup.clean(content, Whitelist.none()));
List<Term> termList = result.getTerms();
for (Term term : termList) {
if (sensitiveWords.contains(term.getName()))
return 0;
}
return 1;
}
示例4: doPost
import org.ansj.domain.Term; //导入依赖的package包/类
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
request.setCharacterEncoding("UTF-8");
response.setCharacterEncoding("UTF-8");
// 必填参数
String Text = request.getParameter("text");
List<Term> terms = ToAnalysis.parse(Text);
PrintWriter out = response.getWriter();
out.print("分词结果为:\n" + terms);
out.flush();
out.close();
}
示例5: seg
import org.ansj.domain.Term; //导入依赖的package包/类
public static List<Term> seg(String input) {
if (!builtStopWordSet) {
buildStopWordSet("data/dicts/stopwords_cn.txt");
builtStopWordSet = true;
}
List<Term> retSet = new ArrayList<>();
List<Term> parse = NlpAnalysis.parse(input);
new NatureRecognition(parse).recognition();
FilterModifWord.modifResult(parse);
for (Term term : parse) {
if (term.toString().contains("/") && term.toString().length() >=3 &&
!stopWordSet.contains(term.toString().split("/")[0])) {
retSet.add(term);
}
}
return retSet;
}
示例6: filterSlight
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 过滤掉一些不重要词性的词语
*
* @param parse
* 分词列表
* @return
* 过滤后的分词列表
*/
public static List<Term> filterSlight(List<Term> parse) {
if (parse == null) {
throw new NullPointerException("filterSlight > 参数为空");
}
List<Term> result = new ArrayList<>();
for (Term term : parse) {
String natureStr = term.getNatureStr();
if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
continue;
}
natureStr = natureStr.substring(0, 1);
if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkx]$")) {
continue;
}
result.add(term);
}
return result;
}
示例7: splitScreenSegment
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 将一句话分词并去除无关的词性
*
* @param segment
* 句子
* @return
* 筛选之后的分词列表
*/
public static List<Term> splitScreenSegment(String segment) {
if (StringUtils.isEmpty(segment)) {
throw new NullPointerException("splitScreenSegment > 请给出一个有效的输入信息");
}
List<Term> result = new ArrayList<>();
List<Term> parse = NlpAnalysis.parse(segment);
for (Term term : parse) {
String natureStr = term.getNatureStr();
if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
continue;
}
natureStr = natureStr.substring(0, 1);
if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkxw]$")) {
continue;
}
result.add(term);
}
return result;
}
示例8: filterSlightNature
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 过滤掉原句子中的无关词性的词语
*
* @param parse
* 原数据
* @return
* 筛选之后的分词列表
*/
public static List<Term> filterSlightNature(List<Term> parse) {
if (parse == null || parse.size() == 0) {
throw new NullPointerException("filterSlightNature > 请给出一个有效的输入信息" + parse);
}
List<Term> result = new ArrayList<>();
for (Term term : parse) {
String natureStr = term.getNatureStr();
if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
continue;
}
natureStr = natureStr.substring(0, 1);
if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkxw]$")) {
continue;
}
result.add(term);
}
return result;
}
示例9: splitWord
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 将一句话分词并封装成Subject对象
*
* @param segment
* 句子
* @return
* 主题对象
*/
public static Subject splitWord(String segment) {
List<Term> parse = NlpAnalysis.parse(segment);
Subject subject = new Subject();
for (Term term : parse) {
String name = term.getName();
if (StringUtils.isEmpty(name) || name.equals(" ")) {
continue;
}
if (term.getNatureStr().startsWith("n") || term.getNatureStr().equals("j")) {
if (subject.getPredicate() == null || subject.getPredicate().size() == 0) {
subject.addSubject(name);
} else {
subject.addObject(name);
}
} else if (term.getNatureStr().startsWith("v")) {
subject.addPredicate(name);
}
}
return subject;
}
示例10: segmentSubject
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 针对单个句子的关键信息提取
*
* @param segments
* 各个句子
*/
public void segmentSubject(List<String> segments) {
if (segments == null || segments.size() == 0) {
throw new NullPointerException("segmentSubject > 请给出一个有效的输入信息");
}
for (String segment : segments) {
System.out.println("原文:" + segment);
List<Term> parse = NlpAnalysis.parse(segment);
parse = SubjectTools.filterDuplicate(parse);
parse = SubjectTools.filterSlight(parse);
parse = SubjectTools.filterGranularity(parse);
System.out.println("过滤后:" + SubjectTools.listToString(parse));
}
}
示例11: findEntities
import org.ansj.domain.Term; //导入依赖的package包/类
@Override
public Entities findEntities(String sentence, boolean allowDuplicated) {
Entities entities = new Entities(allowDuplicated);
Result result = ToAnalysis.parse(sentence);
for (Term term : result.getTerms()) {
if (term.getName().length() < 2) {
continue;
}
if (term.getNatureStr().startsWith("nr")) {
entities.addPerson(term.getName());
} else if (term.getNatureStr().startsWith("nt")) {
entities.addOrganization(term.getName());
} else if (term.getNatureStr().startsWith("ns")) {
if (term.getName().endsWith("大学") || term.getName().endsWith("学院")) {
entities.addOrganization(term.getName());
} else {
entities.addSpace(term.getName());
}
}
}
return entities;
}
示例12: getResult
import org.ansj.domain.Term; //导入依赖的package包/类
@Override
protected List<Term> getResult(Graph graph) {
List<Term> result = new LinkedList<Term>();
int length = graph.terms.length - 1;
Term term = null;
for (int i = 0; i < length; i++) {
if ((term=graph.terms[i]) != null) {
result.add(term);
while((term =term.getNext())!=null){
result.add(term);
}
}
}
return result;
}
示例13: addTerm
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 增加一个词语到图中
*
* @param term
*/
public void addTerm(Term term) {
// 是否有数字
if (!hasNum && term.termNatures().numAttr.numFreq > 0) {
hasNum = true;
}
// 是否有人名
if (!hasPerson && term.termNatures().personAttr.flag) {
hasPerson = true;
}
// 将词放到图的位置
if (terms[term.getOffe()] == null) {
terms[term.getOffe()] = term;
} else {
terms[term.getOffe()] = term.setNext(terms[term.getOffe()]);
}
}
示例14: optimalRoot
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 取得最优路径的root Term
*
* @return
*/
protected Term optimalRoot() {
Term to = end;
to.clearScore();
Term from = null;
while ((from = to.from()) != null) {
for (int i = from.getOffe() + 1; i < to.getOffe(); i++) {
terms[i] = null;
}
if (from.getOffe() > -1) {
terms[from.getOffe()] = from;
}
// 断开横向链表.节省内存
from.setNext(null);
from.setTo(to);
from.clearScore();
to = from;
}
return root;
}
示例15: getMaxTerm
import org.ansj.domain.Term; //导入依赖的package包/类
/**
* 得道最到本行最大term
*
* @param i
* @return
*/
private Term getMaxTerm(int i) {
// TODO Auto-generated method stub
Term maxTerm = terms[i];
if (maxTerm == null) {
return null;
}
int maxTo = maxTerm.toValue();
Term term = maxTerm;
while ((term = term.getNext()) != null) {
if (maxTo < term.toValue()) {
maxTo = term.toValue();
maxTerm = term;
}
}
return maxTerm;
}