当前位置: 首页>>代码示例>>Java>>正文


Java Term类代码示例

本文整理汇总了Java中org.ansj.domain.Term的典型用法代码示例。如果您正苦于以下问题:Java Term类的具体用法?Java Term怎么用?Java Term使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Term类属于org.ansj.domain包,在下文中一共展示了Term类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: makeNewTerm

import org.ansj.domain.Term; //导入依赖的package包/类
private void makeNewTerm() {
	// TODO Auto-generated method stub
	StringBuilder sb = new StringBuilder();
	for (int j = offe; j <= endOffe; j++) {
		if (terms[j] == null) {
			continue;
		} else {
			sb.append(terms[j].getName());
		}
		// terms[j] = null;
	}
	TermNatures termNatures = new TermNatures(new TermNature(tempNature, tempFreq));
	Term term = new Term(sb.toString(), offe, termNatures);
	term.selfScore(-1 * tempFreq);
	TermUtil.insertTerm(terms, term);
	// reset();
}
 
开发者ID:Lewis-Liu-001,项目名称:ansj_segx,代码行数:18,代码来源:UserDefineRecognition.java

示例2: main

import org.ansj.domain.Term; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
	System.out.println(parse);
	List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");
	
  
	//System.out.println(parse1);
	String text11="ZW321282050000000325";
	
	Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
	CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
	OffsetAttribute offsetAtt = 
			tokenizer.addAttribute(OffsetAttribute.class);
		PositionIncrementAttribute positionIncrementAtt = 
			tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
	while (tokenizer.incrementToken()){

	      System.out.print(new String(termAtt.toString()+" ") );
		//  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
		//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

	}
	tokenizer.close();
}
 
开发者ID:dimensoft,项目名称:improved-journey,代码行数:27,代码来源:TestAnsj.java

示例3: checkTextContent

import org.ansj.domain.Term; //导入依赖的package包/类
public int checkTextContent(int userId, String content) throws IOException {
    HashSet<String> sensitiveWords = new HashSet<String>();
    InputStream fis = new FileInputStream(source);
    InputStreamReader isr = new InputStreamReader(fis, Charset.forName("UTF-8"));
    BufferedReader br = new BufferedReader(isr);
    String line;
    while ((line = br.readLine()) != null)
        sensitiveWords.add(line.substring(0, line.length() - 1));


    Result result = ToAnalysis.parse(Jsoup.clean(content, Whitelist.none()));
    List<Term> termList = result.getTerms();
    for (Term term : termList) {
        if (sensitiveWords.contains(term.getName()))
            return 0;
    }
    return 1;
}
 
开发者ID:qinjr,项目名称:TeamNote,代码行数:19,代码来源:QualityUtilImpl.java

示例4: doPost

import org.ansj.domain.Term; //导入依赖的package包/类
public void doPost(HttpServletRequest request, HttpServletResponse response) 
		throws ServletException, IOException {
	
	request.setCharacterEncoding("UTF-8");
	response.setCharacterEncoding("UTF-8");
	
	// 必填参数
	String Text = request.getParameter("text");
	
	List<Term> terms = ToAnalysis.parse(Text);
	
	PrintWriter out = response.getWriter();
	out.print("分词结果为:\n" + terms);
	out.flush();
	out.close();
}
 
开发者ID:landriesnidis,项目名称:NSIITA-SemanticMatching,代码行数:17,代码来源:Ansj_seg.java

示例5: seg

import org.ansj.domain.Term; //导入依赖的package包/类
public static List<Term> seg(String input) {
    if (!builtStopWordSet) {
        buildStopWordSet("data/dicts/stopwords_cn.txt");
        builtStopWordSet = true;
    }

    List<Term> retSet = new ArrayList<>();

    List<Term> parse = NlpAnalysis.parse(input);
    new NatureRecognition(parse).recognition();
    FilterModifWord.modifResult(parse);

    for (Term term : parse) {
        if (term.toString().contains("/") && term.toString().length() >=3 &&
                !stopWordSet.contains(term.toString().split("/")[0])) {
            retSet.add(term);
        }
    }

    return retSet;
}
 
开发者ID:IACASNLPIR,项目名称:GKHMC,代码行数:22,代码来源:ANSJSEG.java

示例6: filterSlight

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 过滤掉一些不重要词性的词语
 * 
 * @param parse
 *      分词列表
 * @return
 *      过滤后的分词列表
 */
public static List<Term> filterSlight(List<Term> parse) {
    if (parse == null) {
        throw new NullPointerException("filterSlight > 参数为空");
    }
    
    List<Term> result = new ArrayList<>();
    
    for (Term term : parse) {
        String natureStr = term.getNatureStr();
        if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
            continue;
        }
        
        natureStr = natureStr.substring(0, 1);
        if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkx]$")) {
            continue;
        }
        result.add(term);
    }
    
    return result;
}
 
开发者ID:William-Hai,项目名称:CorpusSpider,代码行数:31,代码来源:SubjectTools.java

示例7: splitScreenSegment

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 将一句话分词并去除无关的词性
 * 
 * @param segment
 *      句子
 * @return
 *      筛选之后的分词列表
 */
public static List<Term> splitScreenSegment(String segment) {
    if (StringUtils.isEmpty(segment)) {
        throw new NullPointerException("splitScreenSegment > 请给出一个有效的输入信息");
    }
    
    List<Term> result = new ArrayList<>();
    List<Term> parse = NlpAnalysis.parse(segment);
    for (Term term : parse) {
        String natureStr = term.getNatureStr();
        if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
            continue;
        }
        
        natureStr = natureStr.substring(0, 1);
        if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkxw]$")) {
            continue;
        }
        
        result.add(term);
    }
    
    return result;
}
 
开发者ID:William-Hai,项目名称:CorpusSpider,代码行数:32,代码来源:SubjectTools.java

示例8: filterSlightNature

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 过滤掉原句子中的无关词性的词语
 * 
 * @param parse
 *      原数据
 * @return
 *      筛选之后的分词列表
 */
public static List<Term> filterSlightNature(List<Term> parse) {
    if (parse == null || parse.size() == 0) {
        throw new NullPointerException("filterSlightNature > 请给出一个有效的输入信息" + parse);
    }
    
    List<Term> result = new ArrayList<>();
    for (Term term : parse) {
        String natureStr = term.getNatureStr();
        if (StringUtils.isEmpty(natureStr) || StringUtils.isEmpty(natureStr.trim())) {
            continue;
        }
        
        natureStr = natureStr.substring(0, 1);
        if (StringUtils.RegexUtils.isSub(natureStr, "^[iltsfabzrmqdpcueyohkxw]$")) {
            continue;
        }
        
        result.add(term);
    }
    
    return result;
}
 
开发者ID:William-Hai,项目名称:CorpusSpider,代码行数:31,代码来源:SubjectTools.java

示例9: splitWord

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 将一句话分词并封装成Subject对象
 * 
 * @param segment
 *      句子
 * @return
 *      主题对象
 */
public static Subject splitWord(String segment) {
    List<Term> parse = NlpAnalysis.parse(segment);
    Subject subject = new Subject();
    
    for (Term term : parse) {
        String name = term.getName();
        if (StringUtils.isEmpty(name) || name.equals(" ")) {
            continue;
        }
        
        if (term.getNatureStr().startsWith("n") || term.getNatureStr().equals("j")) {
            if (subject.getPredicate() == null || subject.getPredicate().size() == 0) {
                subject.addSubject(name);
            } else {
                subject.addObject(name);
            }
        } else if (term.getNatureStr().startsWith("v")) {
            subject.addPredicate(name);
        }
    }
    
    return subject;
}
 
开发者ID:William-Hai,项目名称:CorpusSpider,代码行数:32,代码来源:SubjectTools.java

示例10: segmentSubject

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 针对单个句子的关键信息提取
 * 
 * @param segments
 *      各个句子
 */
public void segmentSubject(List<String> segments) {
    if (segments == null || segments.size() == 0) {
        throw new NullPointerException("segmentSubject > 请给出一个有效的输入信息");
    }
    
    for (String segment : segments) {
        System.out.println("原文:" + segment);
        
        List<Term> parse = NlpAnalysis.parse(segment);
        parse = SubjectTools.filterDuplicate(parse);
        parse = SubjectTools.filterSlight(parse);
        parse = SubjectTools.filterGranularity(parse);
        
        System.out.println("过滤后:" + SubjectTools.listToString(parse));
    }
}
 
开发者ID:William-Hai,项目名称:CorpusSpider,代码行数:23,代码来源:SubjectAnalysis.java

示例11: findEntities

import org.ansj.domain.Term; //导入依赖的package包/类
@Override
public Entities findEntities(String sentence, boolean allowDuplicated) {
    Entities entities = new Entities(allowDuplicated);

    Result result = ToAnalysis.parse(sentence);
    for (Term term : result.getTerms()) {
        if (term.getName().length() < 2) {
            continue;
        }
        if (term.getNatureStr().startsWith("nr")) {
            entities.addPerson(term.getName());
        } else if (term.getNatureStr().startsWith("nt")) {
            entities.addOrganization(term.getName());
        } else if (term.getNatureStr().startsWith("ns")) {
            if (term.getName().endsWith("大学") || term.getName().endsWith("学院")) {
                entities.addOrganization(term.getName());
            } else {
                entities.addSpace(term.getName());
            }
        }
    }
    return entities;
}
 
开发者ID:iamxiatian,项目名称:wikit,代码行数:24,代码来源:AnsjSegment.java

示例12: getResult

import org.ansj.domain.Term; //导入依赖的package包/类
@Override
protected List<Term> getResult(Graph graph) {
	List<Term> result = new LinkedList<Term>();
	int length = graph.terms.length - 1;
	Term term = null;
	for (int i = 0; i < length; i++) {
		if ((term=graph.terms[i]) != null) {
			result.add(term);
			while((term =term.getNext())!=null){
				result.add(term);
			}
		}
	}

	return result;
}
 
开发者ID:Lewis-Liu-001,项目名称:ansj_segx,代码行数:17,代码来源:FastIndexAnalysis.java

示例13: addTerm

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 增加一个词语到图中
 * 
 * @param term
 */
public void addTerm(Term term) {
	// 是否有数字
	if (!hasNum && term.termNatures().numAttr.numFreq > 0) {
		hasNum = true;
	}
	// 是否有人名
	if (!hasPerson && term.termNatures().personAttr.flag) {
		hasPerson = true;
	}
	// 将词放到图的位置
	if (terms[term.getOffe()] == null) {
		terms[term.getOffe()] = term;
	} else {
		terms[term.getOffe()] = term.setNext(terms[term.getOffe()]);
	}
}
 
开发者ID:Lewis-Liu-001,项目名称:ansj_segx,代码行数:22,代码来源:Graph.java

示例14: optimalRoot

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 取得最优路径的root Term
 * 
 * @return
 */
protected Term optimalRoot() {
	Term to = end;
	to.clearScore();
	Term from = null;
	while ((from = to.from()) != null) {
		for (int i = from.getOffe() + 1; i < to.getOffe(); i++) {
			terms[i] = null;
		}
		if (from.getOffe() > -1) {
			terms[from.getOffe()] = from;
		}
		// 断开横向链表.节省内存
		from.setNext(null);
		from.setTo(to);
		from.clearScore();
		to = from;
	}
	return root;
}
 
开发者ID:Lewis-Liu-001,项目名称:ansj_segx,代码行数:25,代码来源:Graph.java

示例15: getMaxTerm

import org.ansj.domain.Term; //导入依赖的package包/类
/**
 * 得道最到本行最大term
 * 
 * @param i
 * @return
 */
private Term getMaxTerm(int i) {
	// TODO Auto-generated method stub
	Term maxTerm = terms[i];
	if (maxTerm == null) {
		return null;
	}
	int maxTo = maxTerm.toValue();
	Term term = maxTerm;
	while ((term = term.getNext()) != null) {
		if (maxTo < term.toValue()) {
			maxTo = term.toValue();
			maxTerm = term;
		}
	}
	return maxTerm;
}
 
开发者ID:Lewis-Liu-001,项目名称:ansj_segx,代码行数:23,代码来源:Graph.java


注:本文中的org.ansj.domain.Term类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。