当前位置: 首页>>代码示例>>Java>>正文


Java Nature类代码示例

本文整理汇总了Java中com.hankcs.hanlp.corpus.tag.Nature的典型用法代码示例。如果您正苦于以下问题:Java Nature类的具体用法?Java Nature怎么用?Java Nature使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Nature类属于com.hankcs.hanlp.corpus.tag包,在下文中一共展示了Nature类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: roleTag

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
@Override
protected void roleTag(List<List<IWord>> sentenceList)
{
    logger.info("开始标注");
    int i = 0;
    for (List<IWord> wordList : sentenceList)
    {
        logger.info(++i + " / " + sentenceList.size());
        for (IWord word : wordList)
        {
            Precompiler.compile(word);  // 编译为等效字符串
        }
        LinkedList<IWord> wordLinkedList = (LinkedList<IWord>) wordList;
        wordLinkedList.addFirst(new Word(Predefine.TAG_BIGIN, Nature.begin.toString()));
        wordLinkedList.addLast(new Word(Predefine.TAG_END, Nature.end.toString()));
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:18,代码来源:NatureDictionaryMaker.java

示例2: loadDictionary

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 将一个BufferedReader中的词条加载到词典
 * @param br 源
 * @param storage 储存位置
 * @throws IOException 异常表示加载失败
 */
public static void loadDictionary(BufferedReader br, TreeMap<String, CoreDictionary.Attribute> storage) throws IOException
{
    String line;
    while ((line = br.readLine()) != null)
    {
        String param[] = line.split("\\s");
        int natureCount = (param.length - 1) / 2;
        CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(natureCount);
        for (int i = 0; i < natureCount; ++i)
        {
            attribute.nature[i] = Enum.valueOf(Nature.class, param[1 + 2 * i]);
            attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
            attribute.totalFrequency += attribute.frequency[i];
        }
        storage.put(param[0], attribute);
    }
    br.close();
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:25,代码来源:IOUtil.java

示例3: toVertexList

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
private static List<Vertex> toVertexList(List<Term> termList, boolean appendStart)
{
    ArrayList<Vertex> vertexList = new ArrayList<Vertex>(termList.size() + 1);
    if (appendStart) vertexList.add(Vertex.B);
    for (Term term : termList)
    {
        CoreDictionary.Attribute attribute = CoreDictionary.get(term.word);
        if (attribute == null)
        {
            if (term.word.trim().length() == 0) attribute = new CoreDictionary.Attribute(Nature.x);
            else attribute = new CoreDictionary.Attribute(Nature.nz);
        }
        else term.nature = attribute.nature[0];
        Vertex vertex = new Vertex(term.word, attribute);
        vertexList.add(vertex);
    }

    return vertexList;
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:20,代码来源:CRFSegment.java

示例4: confirmNature

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 将属性的词性锁定为nature,此重载会降低性能
 * 
 * @param nature
 *            词性
 * @param updateWord
 *            是否更新预编译字串
 * @return 如果锁定词性在词性列表中,返回真,否则返回假
 */
public boolean confirmNature(Nature nature, boolean updateWord) {
	switch (nature) {

	case m:
		word = Predefine.TAG_NUMBER;
		break;
	case t:
		word = Predefine.TAG_TIME;
		break;
	default:
		logger.warning("没有与" + nature + "对应的case");
		break;
	}

	return confirmNature(nature);
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:26,代码来源:Vertex.java

示例5: segment

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 分词
 * @param text 文本
 * @return 分词结果
 */
public static List<Term> segment(String text)
{
    List<Term> termList = new LinkedList<Term>();
    Matcher matcher = WEB_URL.matcher(text);
    int begin = 0;
    int end;
    while (matcher.find())
    {
        end = matcher.start();
        termList.addAll(SEGMENT.seg(text.substring(begin, end)));
        termList.add(new Term(matcher.group(), Nature.xu));
        begin = matcher.end();
    }
    if (begin < text.length()) termList.addAll(SEGMENT.seg(text.substring(begin)));

    return termList;
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:23,代码来源:URLTokenizer.java

示例6: main

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static void main(String[] args)
{
    String text =
            "HanLP的项目地址是https://github.com/hankcs/HanLP," +
                    "发布地址是https://github.com/hankcs/HanLP/releases," +
                    "我有时候会在www.hankcs.com上面发布一些消息," +
                    "我的微博是http://weibo.com/hankcs/,会同步推送hankcs.com的新闻。" +
                    "听说.中国域名开放申请了,但我并没有申请hankcs.中国,因为穷……";
    List<Term> termList = URLTokenizer.segment(text);
    System.out.println(termList);
    for (Term term : termList)
    {
        if (term.nature == Nature.xu)
            System.out.println(term.word);
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:17,代码来源:DemoURLRecognition.java

示例7: testRemoveNotNS

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * data/dictionary/custom/全国地名大全.txt中有很多人名,删掉它们
 * @throws Exception
 */
public void testRemoveNotNS() throws Exception
{
    String path = "data/dictionary/custom/全国地名大全.txt";
    final Set<Character> suffixSet = new TreeSet<Character>();
    for (char c : Predefine.POSTFIX_SINGLE.toCharArray())
    {
        suffixSet.add(c);
    }
    DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter()
    {
        Segment segment = HanLP.newSegment().enableCustomDictionary(false);
        @Override
        public boolean onSave(Item item)
        {
            if (suffixSet.contains(item.key.charAt(item.key.length() - 1))) return true;
            List<Term> termList = segment.seg(item.key);
            if (termList.size() == 1 && termList.get(0).nature == Nature.nr)
            {
                System.out.println(item);
                return false;
            }
            return true;
        }
    });
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:30,代码来源:TestCustomDictionary.java

示例8: testBatch

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public void testBatch() throws Exception
{
    List<File> fileList = FolderWalker.open(FOLDER);
    int i = 0;
    for (File file : fileList)
    {
        System.out.println(++i + " / " + fileList.size() + " " + file.getName() + " ");
        String path = file.getAbsolutePath();
        String content = IOUtil.readTxt(path);
        DijkstraSegment segment = new DijkstraSegment();
        List<List<Term>> sentenceList = segment.seg2sentence(content);
        for (List<Term> sentence : sentenceList)
        {
            if (SentencesUtil.hasNature(sentence, Nature.nr))
            {
                System.out.println(sentence);
            }
        }
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:21,代码来源:TestPersonRecognition.java

示例9: create

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static Attribute create(String natureWithFrequency)
{
    try
    {
        String param[] = natureWithFrequency.split(" ");
        int natureCount = param.length / 2;
        Attribute attribute = new Attribute(natureCount);
        for (int i = 0; i < natureCount; ++i)
        {
            attribute.nature[i] = Enum.valueOf(Nature.class, param[2 * i]);
            attribute.frequency[i] = Integer.parseInt(param[1 + 2 * i]);
            attribute.totalFrequency += attribute.frequency[i];
        }
        return attribute;
    }
    catch (Exception e)
    {
        logger.warning("使用字符串" + natureWithFrequency + "创建词条属性失败!" + TextUtility.exceptionToString(e));
        return null;
    }
}
 
开发者ID:ml-distribution,项目名称:HanLP,代码行数:22,代码来源:CoreDictionary.java

示例10: roleTag

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static List<EnumItem<NR>> roleTag(List<Vertex> pWordSegResult)
{
    List<EnumItem<NR>> tagList = new LinkedList<EnumItem<NR>>();
    for (Vertex vertex : pWordSegResult)
    {
        // 有些双名实际上可以构成更长的三名
        if (Nature.nr == vertex.getNature() && vertex.getAttribute().totalFrequency <= 1000)
        {
            if (vertex.realWord.length() == 2)
            {
                tagList.add(new EnumItem<NR>(NR.X, NR.G));
                continue;
            }
        }
        EnumItem<NR> nrEnumItem = PersonDictionary.dictionary.get(vertex.realWord);
        if (nrEnumItem == null)
        {
            nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
        }
        tagList.add(nrEnumItem);
    }
    return tagList;
}
 
开发者ID:ml-distribution,项目名称:HanLP,代码行数:24,代码来源:PersonRecognition.java

示例11: confirmNature

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 将属性的词性锁定为nature,此重载会降低性能
 *
 * @param nature     词性
 * @param updateWord 是否更新预编译字串
 * @return 如果锁定词性在词性列表中,返回真,否则返回假
 */
public boolean confirmNature(Nature nature, boolean updateWord)
{
    switch (nature)
    {

        case m:
            word = Predefine.TAG_NUMBER;
            break;
        case t:
            word = Predefine.TAG_TIME;
            break;
        default:
            logger.warning("没有与" + nature + "对应的case");
            break;
    }

    return confirmNature(nature);
}
 
开发者ID:ml-distribution,项目名称:HanLP,代码行数:26,代码来源:Vertex.java

示例12: incrementToken

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
@Override
final public boolean incrementToken() throws IOException {
    clearAttributes();
    int position = 0;
    Term term;
    boolean un_increased = true;
    do {
        term = segment.next();
        if (term == null) {
            break;
        }
        if (enablePorterStemming && term.nature == Nature.nx) {
            term.word = stemmer.stem(term.word);
        }

        if (filter != null && filter.containsKey(term.word)) {
            continue;
        } else {
            ++position;
            un_increased = false;
        }
    }
    while (un_increased);

    if (term != null) {
        positionAttr.setPositionIncrement(position);
        termAtt.setEmpty().append(term.word);
        offsetAtt.setOffset(term.offset, term.offset + term.word.length());
        typeAtt.setType(term.nature == null ? "null" : term.nature.toString());
        return true;
    } else {
        return false;
    }
}
 
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:35,代码来源:HanLPTokenizer.java

示例13: getWordsInPath

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 获得词语依存路径中的词语
 * @param word 词语
 * @param maxReturn 最大路径长度
 * @return 依存路径词语列表
 */
public static List<Term> getWordsInPath(CoNLLWord word, int maxReturn) {
    List<Term> words = new ArrayList<Term>();
    if (word == CoNLLWord.ROOT || maxReturn < 1) return words;
    while (word != CoNLLWord.ROOT) {
        words.add(new Term(word.LEMMA, Nature.fromString(word.POSTAG)));
        word = word.HEAD;
        if (--maxReturn < 1) break;
    }
    return words;
}
 
开发者ID:jsksxs360,项目名称:AHANLP,代码行数:17,代码来源:DependencyParser.java

示例14: loadDat

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
 * 从磁盘加载双数组
 *
 * @param path
 * @return
 */
static boolean loadDat(String path)
{
    try
    {
        ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
        if (byteArray == null) return false;
        int size = byteArray.nextInt();
        CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
        final Nature[] natureIndexArray = Nature.values();
        for (int i = 0; i < size; ++i)
        {
            // 第一个是全部频次,第二个是词性个数
            int currentTotalFrequency = byteArray.nextInt();
            int length = byteArray.nextInt();
            attributes[i] = new CoreDictionary.Attribute(length);
            attributes[i].totalFrequency = currentTotalFrequency;
            for (int j = 0; j < length; ++j)
            {
                attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
                attributes[i].frequency[j] = byteArray.nextInt();
            }
        }
        if (!trie.load(byteArray, attributes) || byteArray.hasMore()) return false;
    }
    catch (Exception e)
    {
        logger.warning("读取失败,问题发生在" + e);
        return false;
    }
    return true;
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:38,代码来源:CoreDictionary.java

示例15: Attribute

import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public Attribute(Nature nature, int frequency)
{
    this(1);
    this.nature[0] = nature;
    this.frequency[0] = frequency;
    totalFrequency = frequency;
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:8,代码来源:CoreDictionary.java


注:本文中的com.hankcs.hanlp.corpus.tag.Nature类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。