本文整理汇总了Java中com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord类的典型用法代码示例。如果您正苦于以下问题:Java CoNLLWord类的具体用法?Java CoNLLWord怎么用?Java CoNLLWord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CoNLLWord类属于com.hankcs.hanlp.corpus.dependency.CoNll包,在下文中一共展示了CoNLLWord类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getWordsInPath
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获得词语依存路径中的词语
* @param word 词语
* @param maxReturn 最大路径长度
* @return 依存路径词语列表
*/
public static List<Term> getWordsInPath(CoNLLWord word, int maxReturn) {
List<Term> words = new ArrayList<Term>();
if (word == CoNLLWord.ROOT || maxReturn < 1) return words;
while (word != CoNLLWord.ROOT) {
words.add(new Term(word.LEMMA, Nature.fromString(word.POSTAG)));
word = word.HEAD;
if (--maxReturn < 1) break;
}
return words;
}
示例2: getWordPaths
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获得词语依存路径
* @param segResult 分词结果
* @return 依存路径列表
*/
public static List<List<Term>> getWordPaths(List<Term> segResult) {
CoNLLWord[] wordArray = parse(segResult).getWordArray();
List<List<Term>> wordPaths = new ArrayList<List<Term>>();
for (CoNLLWord word : wordArray)
wordPaths.add(getWordsInPath(word));
return wordPaths;
}
示例3: calWordDepth
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获得词语的深度
* @param word 词语
* @return 词语在句法树中的深度
*/
public static int calWordDepth(CoNLLWord word) {
if (word == CoNLLWord.ROOT) return -1;
int depth = 0;
while(word.HEAD != CoNLLWord.ROOT) {
depth++;
word = word.HEAD;
}
return depth;
}
示例4: getWordsDepth
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获取词语的深度
* @param segResult 分词结果
* @return 词语在句法树中的深度
*/
public static Map<String, Integer> getWordsDepth(List<Term> segResult) {
CoNLLSentence sentenceDep = parse(segResult);
HashMap<String, Integer> wordsDepth = new HashMap<>();
for (CoNLLWord wordDep : sentenceDep)
wordsDepth.put(wordDep.LEMMA, calWordDepth(wordDep));
return wordsDepth;
}
示例5: getTopWordsDepth
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获取上层词语深度
* @param segResult 分词结果
* @param maxDepth 句法树最大深度
* @return 词语和对应深度
*/
public static Map<String, Integer> getTopWordsDepth(List<Term> segResult, int maxDepth) {
CoNLLSentence sentenceDep = parse(segResult);
HashMap<String, Integer> wordsDepth = new HashMap<>();
for (CoNLLWord wordDep : sentenceDep) {
int depth = calWordDepth(wordDep);
if (depth <= maxDepth)
wordsDepth.put(wordDep.LEMMA, depth);
}
return wordsDepth;
}
示例6: getTopWords
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 获取上层词语
* @param segResult 分词结果
* @param maxDepth 句法树最大深度
* @return 词语
*/
public static List<String> getTopWords(List<Term> segResult, int maxDepth) {
CoNLLSentence sentenceDep = parse(segResult);
List<String> wordsDepth = new ArrayList<>();
for (CoNLLWord wordDep : sentenceDep) {
if (calWordDepth(wordDep) <= maxDepth)
wordsDepth.add(wordDep.LEMMA);
}
return wordsDepth;
}
示例7: makeModel
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
public static boolean makeModel(String corpusLoadPath, String modelSavePath) throws IOException
{
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(IOUtil.newOutputStream(modelSavePath)));
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList(corpusLoadPath);
int id = 1;
for (CoNLLSentence sentence : sentenceList)
{
System.out.printf("%d / %d...", id++, sentenceList.size());
String[][] edgeArray = sentence.getEdgeArray();
CoNLLWord[] word = sentence.getWordArrayWithRoot();
for (int i = 0; i < word.length; ++i)
{
for (int j = 0; j < word.length; ++j)
{
if (i == j) continue;
// 这就是一个边的实例,从i出发,到j,当然它可能存在也可能不存在,不存在取null照样是一个实例
List<String> contextList = new LinkedList<String>();
// 先生成i和j的原子特征
contextList.addAll(generateSingleWordContext(word, i, "i"));
contextList.addAll(generateSingleWordContext(word, j, "j"));
// 然后生成二元组的特征
contextList.addAll(generateUniContext(word, i, j));
// 将特征字符串化
for (String f : contextList)
{
bw.write(f);
bw.write(' ');
}
// 事件名称为依存关系
bw.write("" + edgeArray[i][j]);
bw.newLine();
}
}
System.out.println("done.");
}
bw.close();
return true;
}
示例8: generateSingleWordContext
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
public static Collection<String> generateSingleWordContext(CoNLLWord[] word, int index, String mark)
{
Collection<String> context = new LinkedList<String>();
for (int i = index - 2; i < index + 2 + 1; ++i)
{
CoNLLWord w = i >= 0 && i < word.length ? word[i] : CoNLLWord.NULL;
context.add(w.NAME + mark + (i - index)); // 在尾巴上做个标记,不然特征冲突了
context.add(w.POSTAG + mark + (i - index));
}
return context;
}
示例9: generateUniContext
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
public static Collection<String> generateUniContext(CoNLLWord[] word, int i, int j)
{
Collection<String> context = new LinkedList<String>();
context.add(word[i].NAME + '→' + word[j].NAME);
context.add(word[i].POSTAG + '→' + word[j].POSTAG);
context.add(word[i].NAME + '→' + word[j].NAME + (i - j));
context.add(word[i].POSTAG + '→' + word[j].POSTAG + (i - j));
CoNLLWord wordBeforeI = i - 1 >= 0 ? word[i - 1] : CoNLLWord.NULL;
CoNLLWord wordBeforeJ = j - 1 >= 0 ? word[j - 1] : CoNLLWord.NULL;
context.add(wordBeforeI.NAME + '@' + word[i].NAME + '→' + word[j].NAME);
context.add(word[i].NAME + '→' + wordBeforeJ.NAME + '@' + word[j].NAME);
context.add(wordBeforeI.POSTAG + '@' + word[i].POSTAG + '→' + word[j].POSTAG);
context.add(word[i].POSTAG + '→' + wordBeforeJ.POSTAG + '@' + word[j].POSTAG);
return context;
}
示例10: parse
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
@Override
public CoNLLSentence parse(List<Term> termList)
{
List<String> posTagList = PosTagUtil.to863(termList);
List<String> wordList = new ArrayList<String>(termList.size());
for (Term term : termList)
{
wordList.add(term.word);
}
List<Integer> heads = new ArrayList<Integer>(termList.size());
List<String> deprels = new ArrayList<String>(termList.size());
parser_dll.parse(wordList, posTagList, heads, deprels);
CoNLLWord[] wordArray = new CoNLLWord[termList.size()];
for (int i = 0; i < wordArray.length; ++i)
{
wordArray[i] = new CoNLLWord(i + 1, wordList.get(i), posTagList.get(i), termList.get(i).nature.toString());
wordArray[i].DEPREL = deprels.get(i);
}
for (int i = 0; i < wordArray.length; ++i)
{
int index = heads.get(i) - 1;
if (index < 0)
{
wordArray[i].HEAD = CoNLLWord.ROOT;
continue;
}
wordArray[i].HEAD = wordArray[index];
}
return new CoNLLSentence(wordArray);
}
示例11: testPosTag
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 细粒度转粗粒度
*
* @throws Exception
*/
public void testPosTag() throws Exception
{
DictionaryMaker dictionaryMaker = new DictionaryMaker();
LinkedList<CoNLLSentence> coNLLSentences = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll.fixed.txt");
for (CoNLLSentence coNLLSentence : coNLLSentences)
{
for (CoNLLWord coNLLWord : coNLLSentence.word)
{
dictionaryMaker.add(new Item(coNLLWord.POSTAG, coNLLWord.CPOSTAG));
}
}
System.out.println(dictionaryMaker.entrySet());
}
示例12: makeModel
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
public static boolean makeModel(String corpusLoadPath, String modelSavePath) throws IOException
{
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(modelSavePath)));
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList(corpusLoadPath);
int id = 1;
for (CoNLLSentence sentence : sentenceList)
{
System.out.printf("%d / %d...", id++, sentenceList.size());
String[][] edgeArray = sentence.getEdgeArray();
CoNLLWord[] word = sentence.getWordArrayWithRoot();
for (int i = 0; i < word.length; ++i)
{
for (int j = 0; j < word.length; ++j)
{
if (i == j) continue;
// 这就是一个边的实例,从i出发,到j,当然它可能存在也可能不存在,不存在取null照样是一个实例
List<String> contextList = new LinkedList<String>();
// 先生成i和j的原子特征
contextList.addAll(generateSingleWordContext(word, i, "i"));
contextList.addAll(generateSingleWordContext(word, j, "j"));
// 然后生成二元组的特征
contextList.addAll(generateUniContext(word, i, j));
// 将特征字符串化
for (String f : contextList)
{
bw.write(f);
bw.write(' ');
}
// 事件名称为依存关系
bw.write("" + edgeArray[i][j]);
bw.newLine();
}
}
System.out.println("done.");
}
bw.close();
return true;
}
示例13: testMakeCRF
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord; //导入依赖的package包/类
/**
* 导出CRF训练语料
*
* @throws Exception
*/
public void testMakeCRF() throws Exception
{
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\Tools\\CRF++-0.58\\example\\dependency\\dev.txt")));
LinkedList<CoNLLSentence> coNLLSentences = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll.fixed.txt");
for (CoNLLSentence coNLLSentence : coNLLSentences)
{
for (CoNLLWord coNLLWord : coNLLSentence.word)
{
bw.write(coNLLWord.NAME);
bw.write('\t');
bw.write(coNLLWord.CPOSTAG);
bw.write('\t');
bw.write(coNLLWord.POSTAG);
bw.write('\t');
int d = coNLLWord.HEAD.ID - coNLLWord.ID;
int posDistance = 1;
if (d > 0) // 在后面
{
for (int i = 1; i < d; ++i)
{
if (coNLLSentence.word[coNLLWord.ID - 1 + i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG))
{
++posDistance;
}
}
}
else
{
for (int i = 1; i < -d; ++i) // 在前面
{
if (coNLLSentence.word[coNLLWord.ID - 1 - i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG))
{
++posDistance;
}
}
}
bw.write((d > 0 ? "+" : "-") + posDistance + "_" + coNLLWord.HEAD.CPOSTAG
// + "_" + coNLLWord.DEPREL
);
bw.newLine();
}
bw.newLine();
}
bw.close();
}