本文整理汇总了Java中org.ansj.domain.Term.termNatures方法的典型用法代码示例。如果您正苦于以下问题:Java Term.termNatures方法的具体用法?Java Term.termNatures怎么用?Java Term.termNatures使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.ansj.domain.Term
的用法示例。
在下文中一共展示了Term.termNatures方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: addTerm
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 增加一个词语到图中
*
* @param term
*/
public void addTerm(Term term) {
// 是否有数字
if (!hasNum && term.termNatures().numAttr.numFreq > 0) {
hasNum = true;
}
// 是否有人名
if (!hasPerson && term.termNatures().personAttr.flag) {
hasPerson = true;
}
// 将词放到图的位置
if (terms[term.getOffe()] == null) {
terms[term.getOffe()] = term;
} else {
terms[term.getOffe()] = term.setNext(terms[term.getOffe()]);
}
}
示例2: compuScore
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 从一个词的词性到另一个词的词的分数
*
* @param form
* 前面的词
* @param to
* 后面的词
* @return 分数
*/
public static double compuScore(Term from, Term to) {
double frequency = from.termNatures().allFreq + 1;
if (frequency < 0) {
double score = from.score() + MAX_FREQUENCE;
from.score(score);
return score;
}
int nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);
double value = -Math.log(dSmoothingPara * frequency / (MAX_FREQUENCE + 80000) + (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));
if (value < 0) {
value += frequency;
}
return from.score() + value;
}
示例3: recognition
import org.ansj.domain.Term; //导入方法依赖的package包/类
public void recognition() {
String name = null;
Term term = null;
reset();
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
term = terms[i];
// 如果名字的开始是人名的前缀,或者后缀.那么忽略
if (tempList.size() == 0) {
if (term.termNatures().personAttr.end > 10) {
continue;
}
if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
continue;
}
}
name = term.getName();
if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
boolean flag = validate(name);
if (flag) {
tempList.add(term);
}
} else if (tempList.size() == 1) {
reset();
} else if (tempList.size() > 1) {
TermUtil.insertTerm(terms, tempList, TermNatures.NR);
reset();
}
}
}
示例4: getNewTerms
import org.ansj.domain.Term; //导入方法依赖的package包/类
public List<Term> getNewTerms() {
LinkedList<Term> result = new LinkedList<Term>();
String name = null;
Term term = null;
reset();
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
term = terms[i];
// 如果名字的开始是人名的前缀,或者后缀.那么忽略
if (tempList.size() == 0) {
if (term.termNatures().personAttr.end > 10) {
continue;
}
if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
continue;
}
}
name = term.getName();
if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
boolean flag = validate(name);
if (flag) {
tempList.add(term);
}
} else if (tempList.size() == 1) {
reset();
} else if (tempList.size() > 1) {
result.add(makeNewTerm());
reset();
}
}
return result;
}
示例5: recogntion_
import org.ansj.domain.Term; //导入方法依赖的package包/类
private List<Term> recogntion_() {
Term term = null;
Term tempTerm = null;
List<Term> termList = new ArrayList<Term>();
int beginFreq = 10;
for (int i = 0; i < terms.length; i++) {
term = terms[i];
if (term == null || !term.termNatures().personAttr.flag) {
continue;
}
term.score(0);
term.selfScore(0);
int freq = 0;
for (int j = 2; j > -1; j--) {
freq = term.termNatures().personAttr.getFreq(j, 0);
if ((freq > 10) || (term.getName().length() == 2 && freq > 10)) {
tempTerm = nameFind(i, beginFreq, j);
if (tempTerm != null) {
termList.add(tempTerm);
// 如果是无争议性识别
if (skip) {
for (int j2 = i; j2 < tempTerm.toValue(); j2++) {
if (terms[j2] != null) {
terms[j2].score(0);
terms[j2].selfScore(0);
}
}
i = tempTerm.toValue() - 1;
break;
}
}
}
}
beginFreq = term.termNatures().personAttr.begin + 1;
}
return termList;
}
示例6: incrementToken
import org.ansj.domain.Term; //导入方法依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
// TODO Auto-generated method stub
clearAttributes();
int position = 0;
Term term = null;
String name = null;
int length = 0;
boolean flag = true;
do {
term = analysis.next();
if (term == null) {
break;
}
name = term.getName();
length = name.length();
if (isStemming && term.termNatures() == TermNatures.EN) {
name = stemmer.stem(name);
term.setName(name);
}
if (filter != null && filter.contains(name)) {
continue;
} else {
position++;
flag = false;
}
} while (flag);
if (term != null) {
positionAttr.setPositionIncrement(position);
termAtt.setEmpty().append(term.getName());
offsetAtt.setOffset(term.getOffe(), term.getOffe() + length);
return true;
} else {
return false;
}
}
示例7: makeNewTermNum
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 将两个term合并为一个全新的term
*
* @param termNatures
* @return
*/
public static Term makeNewTermNum(Term from, Term to, TermNatures termNatures) {
Term term = new Term(from.getName() + to.getName(), from.getOffe(), termNatures);
term.termNatures().numAttr = from.termNatures().numAttr;
TermUtil.termLink(term, to.to());
TermUtil.termLink(term.from(), term);
return term;
}
示例8: nameAmbiguity
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 人名消歧,比如.邓颖超生前->邓颖 超生 前 fix to 丁颖超 生 前! 规则的方式增加如果两个人名之间连接是- , ·,•则连接
*/
public static void nameAmbiguity(Term[] terms) {
Term from = null;
Term term = null;
Term next = null;
for (int i = 0; i < terms.length - 1; i++) {
term = terms[i];
if (term != null && term.termNatures() == TermNatures.NR && term.getName().length() == 2) {
next = terms[i + 2];
if (next.termNatures().personAttr.split > 0) {
term.setName(term.getName() + next.getName().charAt(0));
terms[i + 2] = null;
terms[i + 3] = new Term(next.getName().substring(1), next.getOffe(), TermNatures.NW);
TermUtil.termLink(term, terms[i + 3]);
TermUtil.termLink(terms[i + 3], next.to());
}
}
}
// 外国人名修正
for (int i = 0; i < terms.length; i++) {
term = terms[i];
if (term != null && term.getName().length() == 1 && i > 0 && WordAlert.CharCover(term.getName().charAt(0)) == '·') {
from = term.from();
next = term.to();
if (from.natrue().natureStr.startsWith("nr") && next.natrue().natureStr.startsWith("nr")) {
from.setName(from.getName() + term.getName() + next.getName());
TermUtil.termLink(from, next.to());
terms[i] = null;
terms[i + 1] = null;
}
}
}
}
示例9: recognition
import org.ansj.domain.Term; //导入方法依赖的package包/类
@Override
public void recognition(Term[] terms) {
this.terms = terms;
String name = null;
Term term = null;
reset();
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
term = terms[i];
// 如果名字的开始是人名的前缀,或者后缀.那么忽略
if (tempList.isEmpty()) {
if (term.termNatures().personAttr.end > 10) {
continue;
}
if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
continue;
}
}
name = term.getName();
if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
boolean flag = validate(name);
if (flag) {
tempList.add(term);
}
} else if (tempList.size() == 1) {
reset();
} else if (tempList.size() > 1) {
TermUtil.insertTerm(terms, tempList, TermNatures.NR);
reset();
}
}
}
示例10: getNewTerms
import org.ansj.domain.Term; //导入方法依赖的package包/类
public List<Term> getNewTerms() {
LinkedList<Term> result = new LinkedList<Term>();
String name = null;
Term term = null;
reset();
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
term = terms[i];
// 如果名字的开始是人名的前缀,或者后缀.那么忽略
if (tempList.isEmpty()) {
if (term.termNatures().personAttr.end > 10) {
continue;
}
if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
continue;
}
}
name = term.getName();
if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
boolean flag = validate(name);
if (flag) {
tempList.add(term);
}
} else if (tempList.size() == 1) {
reset();
} else if (tempList.size() > 1) {
result.add(makeNewTerm());
reset();
}
}
return result;
}
示例11: addTerm
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 增加一个词语到图中
*
* @param term
*/
public void addTerm(Term term) {
// 是否有数字
if (!hasNum && term.termNatures().numAttr.numFreq > 0) {
hasNum = true;
}
// 是否有人名
if (!hasPerson && term.termNatures().personAttr.flag) {
hasPerson = true;
}
TermUtil.insertTerm(terms, term, InsertTermType.REPLACE);
}
示例12: makeNewTermNum
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 将两个term合并为一个全新的term
*
* @param termNatures
* @return
*/
public static Term makeNewTermNum(Term from, Term to, TermNatures termNatures) {
Term term = new Term(from.getName() + to.getName(), from.getOffe(), termNatures);
term.termNatures().numAttr = from.termNatures().numAttr;
TermUtil.termLink(term, to.to());
TermUtil.termLink(term.from(), term);
return term;
}
示例13: compuScore
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 从一个词的词性到另一个词的词的分数
*
* @param form
* 前面的词
* @param to
* 后面的词
* @return 分数
*/
public static double compuScore(Term from, Term to, Map<String, Double> relationMap) {
double frequency = from.termNatures().allFreq + 1;
if (frequency < 0) {
double score = from.score() + MAX_FREQUENCE;
from.score(score);
return score;
}
double nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);
if (relationMap != null) {
Double d = relationMap.get(from.getName() + TAB + to.getName());
if (d != null) {
nTwoWordsFreq += d;
}
}
double value = -Math.log(dSmoothingPara * frequency / (MAX_FREQUENCE + 80000)
+ (1 - dSmoothingPara) * ((1 - dTemp) * nTwoWordsFreq / frequency + dTemp));
if (value < 0) {
value += frequency;
}
return from.score() + value;
}
示例14: nameAmbiguity
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 人名消歧,比如.邓颖超生前->邓颖 超生 前 fix to 丁颖超 生 前! 规则的方式增加如果两个人名之间连接是- , ·,•则连接
*/
public static void nameAmbiguity(Term[] terms, Forest... forests) {
Term from = null;
Term term = null;
Term next = null;
for (int i = 0; i < terms.length - 1; i++) {
term = terms[i];
if (term != null && term.termNatures() == TermNatures.NR && term.getName().length() == 2) {
next = terms[i + 2];
if (next.termNatures().personAttr.split > 0) {
term.setName(term.getName() + next.getName().charAt(0));
terms[i + 2] = null;
String name = next.getName().substring(1);
terms[i + 3] = new Term(name, next.getOffe() + 1,
new NatureRecognition(forests).getTermNatures(name));
TermUtil.termLink(term, terms[i + 3]);
TermUtil.termLink(terms[i + 3], next.to());
}
}
}
// 外国人名修正
for (int i = 0; i < terms.length; i++) {
term = terms[i];
if (term != null && term.getName().length() == 1 && i > 0
&& WordAlert.CharCover(term.getName().charAt(0)) == '·') {
from = term.from();
next = term.to();
if (from.natrue().natureStr.startsWith("nr") && next.natrue().natureStr.startsWith("nr")) {
from.setName(from.getName() + term.getName() + next.getName());
TermUtil.termLink(from, next.to());
terms[i] = null;
terms[i + 1] = null;
}
}
}
}
示例15: recognition
import org.ansj.domain.Term; //导入方法依赖的package包/类
/**
* 数字+数字合并,zheng
*
* @param terms
*/
public static void recognition(Term[] terms) {
int length = terms.length - 1;
Term from = null;
Term to = null;
Term temp = null;
for (int i = 0; i < length; i++) {
if (terms[i] == null) {
continue;
} else if (".".equals(terms[i].getName()) || ".".equals(terms[i].getName())) {
// 如果是.前后都为数字进行特殊处理
to = terms[i].to();
from = terms[i].from();
if (from.termNatures().numAttr.flag && to.termNatures().numAttr.flag) {
from.setName(from.getName() + "." + to.getName());
TermUtil.termLink(from, to.to());
terms[to.getOffe()] = null;
terms[i] = null;
i = from.getOffe() - 1;
}
continue;
} else if (!terms[i].termNatures().numAttr.flag) {
continue;
}
temp = terms[i];
// 将所有的数字合并
while ((temp = temp.to()).termNatures().numAttr.flag) {
terms[i].setName(terms[i].getName() + temp.getName());
}
// 如果是数字结尾
if (MyStaticValue.isQuantifierRecognition && temp.termNatures().numAttr.numEndFreq > 0) {
terms[i].setName(terms[i].getName() + temp.getName());
temp = temp.to();
}
// 如果不等,说明terms[i]发生了改变
if (terms[i].to() != temp) {
TermUtil.termLink(terms[i], temp);
// 将中间无用元素设置为null
for (int j = i + 1; j < temp.getOffe(); j++) {
terms[j] = null;
}
i = temp.getOffe() - 1;
}
}
}