当前位置: 首页>>代码示例>>Java>>正文


Java Segment类代码示例

本文整理汇总了Java中com.hankcs.hanlp.seg.Segment的典型用法代码示例。如果您正苦于以下问题:Java Segment类的具体用法?Java Segment怎么用?Java Segment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Segment类属于com.hankcs.hanlp.seg包,在下文中一共展示了Segment类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static void main(String[] args)
{
    String[] testCase = new String[]{
            "签约仪式前,秦光荣、李纪恒、仇和等一同会见了参加签约的企业家。",
            "区长庄木弟新年致辞",
            "朱立伦:两岸都希望共创双赢 习朱历史会晤在即",
            "陕西首富吴一坚被带走 与令计划妻子有交集",
            "据美国之音电台网站4月28日报道,8岁的凯瑟琳·克罗尔(凤甫娟)和很多华裔美国小朋友一样,小小年纪就开始学小提琴了。她的妈妈是位虎妈么?",
            "凯瑟琳和露西(庐瑞媛),跟她们的哥哥们有一些不同。",
            "王国强、高峰、汪洋、张朝阳光着头、韩寒、小四",
            "张浩和胡健康复员回家了",
            "王总和小丽结婚了",
            "编剧邵钧林和稽道青说",
            "这里有关天培的有关事迹",
            "龚学平等领导说,邓颖超生前杜绝超生",
    };
    Segment segment = HanLP.newSegment().enableNameRecognize(true);
    for (String sentence : testCase)
    {
        List<Term> termList = segment.seg(sentence);
        System.out.println(termList);
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:24,代码来源:DemoChineseNameRecognition.java

示例2: testRemoveNotNS

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
/**
 * data/dictionary/custom/全国地名大全.txt中有很多人名,删掉它们
 * @throws Exception
 */
public void testRemoveNotNS() throws Exception
{
    String path = "data/dictionary/custom/全国地名大全.txt";
    final Set<Character> suffixSet = new TreeSet<Character>();
    for (char c : Predefine.POSTFIX_SINGLE.toCharArray())
    {
        suffixSet.add(c);
    }
    DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter()
    {
        Segment segment = HanLP.newSegment().enableCustomDictionary(false);
        @Override
        public boolean onSave(Item item)
        {
            if (suffixSet.contains(item.key.charAt(item.key.length() - 1))) return true;
            List<Term> termList = segment.seg(item.key);
            if (termList.size() == 1 && termList.get(0).nature == Nature.nr)
            {
                System.out.println(item);
                return false;
            }
            return true;
        }
    });
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:30,代码来源:TestCustomDictionary.java

示例3: testSpeedOfSecondViterbi

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public void testSpeedOfSecondViterbi() throws Exception
{
    String text = "王总和小丽结婚了";
    Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(false)
            .enableNameRecognize(false) // 人名识别需要二次维特比,比较慢
            .enableCustomDictionary(false);
    System.out.println(segment.seg(text));
    long start = System.currentTimeMillis();
    int pressure = 1000000;
    for (int i = 0; i < pressure; ++i)
    {
        segment.seg(text);
    }
    double costTime = (System.currentTimeMillis() - start) / (double) 1000;
    System.out.printf("分词速度:%.2f字每秒", text.length() * pressure / costTime);
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:17,代码来源:TestSegment.java

示例4: testIssue193

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public void testIssue193() throws Exception
{
    String[] testCase = new String[]{
            "以每台约200元的价格送到苹果售后维修中心换新机(苹果的保修基本是免费换新机)",
            "可能以2500~2800元的价格回收",
            "3700个益农信息社打通服务“最后一公里”",
            "一位李先生给高政留言说上周五可以帮忙献血",
            "一位浩宁达高层透露",
            "五和万科长阳天地5个普宅项目",
            "以1974点低点和5178点高点作江恩角度线",
            "纳入统计的18家京系基金公司",
            "华夏基金与嘉实基金两家京系基金公司",
            "则应从排名第八的投标人开始依次递补三名投标人"
    };
    Segment segment = HanLP.newSegment().enableOrganizationRecognize(true).enableNumberQuantifierRecognize(true);
    for (String sentence : testCase)
    {
        List<Term> termList = segment.seg(sentence);
        System.out.println(termList);
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:22,代码来源:TestSegment.java

示例5: statistics

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static void statistics(Segment segment, String inputFilePath) {
    try {
        //词频统计
        WordFreqStatistics statistic = new WordFreqStatistics(segment);
        BufferedReader reader = IOUtil.newBufferedReader(inputFilePath);
        String t;
        StringBuilder s = new StringBuilder();
        while ((t = reader.readLine()) != null) {
            s.append(t);
        }
        statistic.seg(s.toString());
        statistic.setResultPath(inputFilePath.replace(".txt", "") + "-WordFrequencyStatistics-Result.txt");
        statistic.dump();
        reader.close();
    } catch (IOException e) {
        logger.error("IO error: " + e.getLocalizedMessage());
    }
}
 
开发者ID:shibing624,项目名称:similarity,代码行数:19,代码来源:WordFreqStatistics.java

示例6: testSpeedOfSecondViterbi

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public void testSpeedOfSecondViterbi() throws Exception
{
    String text = "王总和小丽结婚了";
    Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(false)
            .enableNameRecognize(false) // 人名识别需要二次维特比,比较慢
            .enableCustomDictionary(false)
            ;
    System.out.println(segment.seg(text));
    long start = System.currentTimeMillis();
    int pressure = 1000000;
    for (int i = 0; i < pressure; ++i)
    {
        segment.seg(text);
    }
    double costTime = (System.currentTimeMillis() - start) / (double)1000;
    System.out.printf("分词速度:%.2f字每秒", text.length() * pressure / costTime);
}
 
开发者ID:ml-distribution,项目名称:HanLP,代码行数:18,代码来源:TestSegment.java

示例7: extract

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
protected static List<String> extract(Set<String> tagging, String text) {
    List<String> ne = new LinkedList<String>();

    com.hankcs.hanlp.seg.Segment segment = SegEngine.enableOrganizationRecognize(true)
            .enableNameRecognize(true)
            .enablePlaceRecognize(true);

    List<Term> Result = segment.seg(text);

    for (Term t : Result) {
        String tag = t.nature.name();
        String w = t.word;
        if (tagging.contains(tag)) {
            ne.add(w);
            System.out.println("Naming Entity Found: " + w);
        }
    }
    return ne;
}
 
开发者ID:furaoing,项目名称:HanLP-1.2.4-Taikor,代码行数:20,代码来源:NerExtractor.java

示例8: createNShort

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static HanLPTokenizerFactory createNShort(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
        @Override
        public Tokenizer create() {
            Segment seg = new NShortSegment().enableCustomDictionary(false).enablePlaceRecognize(true).enableOrganizationRecognize(true);
            return new HanLPTokenizer(seg, null, enablePorterStemming);
        }
    };
}
 
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:10,代码来源:HanLPTokenizerFactory.java

示例9: createShortest

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static HanLPTokenizerFactory createShortest(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
        @Override
        public Tokenizer create() {
            Segment seg = new DijkstraSegment().enableCustomDictionary(false).enablePlaceRecognize(true).enableOrganizationRecognize(true);
            return new HanLPTokenizer(seg, null, enablePorterStemming);
        }
    };
}
 
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:10,代码来源:HanLPTokenizerFactory.java

示例10: createCRF

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static HanLPTokenizerFactory createCRF(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    return new HanLPTokenizerFactory(indexSettings, environment, name, settings) {
        @Override
        public Tokenizer create() {
            Segment seg = new CRFSegment().enablePartOfSpeechTagging(true);
            return new HanLPTokenizer(seg, null, enablePorterStemming);
        }
    };
}
 
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:10,代码来源:HanLPTokenizerFactory.java

示例11: HanLPTokenizer

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
/**
 * @param segment              HanLP中的某个分词器
 * @param filter               停用词
 * @param enablePorterStemming 英文原型转换
 */
public HanLPTokenizer(Segment segment, Set<String> filter, boolean enablePorterStemming) {
    super();
    this.segment = new SegmentWrapper(input, segment);
    if (filter != null && filter.size() > 0) {
        this.filter = new BinTrie<String>();
        for (String stopWord : filter) {
            this.filter.put(stopWord, null);
        }
    }
    this.enablePorterStemming = enablePorterStemming;
}
 
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:17,代码来源:HanLPTokenizer.java

示例12: enableNumberQuantifierRecognize

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
@Override
    public Segment enableNumberQuantifierRecognize(boolean enable)
    {
        throw new UnsupportedOperationException("暂不支持");
//        enablePartOfSpeechTagging(enable);
//        return super.enableNumberQuantifierRecognize(enable);
    }
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:8,代码来源:CRFSegment.java

示例13: main

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static void main(String[] args)
{
    String text = "教授正在教授自然语言处理课程";
    Segment segment = HanLP.newSegment();

    System.out.println("未标注:" + segment.seg(text));
    segment.enablePartOfSpeechTagging(true);
    System.out.println("标注后:" + segment.seg(text));
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:10,代码来源:DemoPosTagging.java

示例14: main

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static void main(String[] args)
{
    Segment nShortSegment = new NShortSegment().enableCustomDictionary(false).enablePlaceRecognize(true).enableOrganizationRecognize(true);
    Segment shortestSegment = new ViterbiSegment().enableCustomDictionary(false).enablePlaceRecognize(true).enableOrganizationRecognize(true);
    String[] testCase = new String[]{
            "今天,刘志军案的关键人物,山西女商人丁书苗在市二中院出庭受审。",
            "江西省监狱管理局与中国太平洋财产保险股份有限公司南昌中心支公司保险合同纠纷案",
            "新北商贸有限公司",
    };
    for (String sentence : testCase)
    {
        System.out.println("N-最短分词:" + nShortSegment.seg(sentence) + "\n最短路分词:" + shortestSegment.seg(sentence));
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:15,代码来源:DemoNShortSegment.java

示例15: main

import com.hankcs.hanlp.seg.Segment; //导入依赖的package包/类
public static void main(String[] args)
{
    String[] testCase = new String[]{
            "蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机",
    };
    Segment segment = HanLP.newSegment().enablePlaceRecognize(true);
    for (String sentence : testCase)
    {
        List<Term> termList = segment.seg(sentence);
        System.out.println(termList);
    }
}
 
开发者ID:priester,项目名称:hanlpStudy,代码行数:13,代码来源:DemoPlaceRecognition.java


注:本文中的com.hankcs.hanlp.seg.Segment类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。