当前位置: 首页>>代码示例>>Java>>正文


Java CRFClassifier类代码示例

本文整理汇总了Java中edu.stanford.nlp.ie.crf.CRFClassifier的典型用法代码示例。如果您正苦于以下问题:Java CRFClassifier类的具体用法?Java CRFClassifier怎么用?Java CRFClassifier使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CRFClassifier类属于edu.stanford.nlp.ie.crf包,在下文中一共展示了CRFClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: preprocess

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public Concept preprocess(Concept c) {

		if (this.tagger == null)
			this.tagger = new MaxentTagger("ext_models/pos_tagger/english-left3words-distsim.tagger");
		if (this.ner == null)
			this.ner = CRFClassifier.getClassifierNoExceptions("ext_models/ner/english.all.3class.distsim.crf.ser.gz");

		List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(c.name)).tokenize();
		tagger.tagCoreLabels(words);
		words = ner.classifySentence(words);
		words = this.addLemmas(words);

		List<PToken> tokens = new ArrayList<PToken>();
		for (CoreLabel word : words) {
			PToken t = new PToken(word.originalText());
			t.pos = word.tag();
			t.neTag = word.get(CoreAnnotations.AnswerAnnotation.class);
			t.lemma = word.get(LemmaAnnotation.class);
			tokens.add(t);
		}
		c.tokenList = tokens;

		return c;
	}
 
开发者ID:UKPLab,项目名称:ijcnlp2017-cmaps,代码行数:25,代码来源:NonUIMAPreprocessor.java

示例2: CRF

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void CRF() {
	// Classifer
       CRFClassifier<CoreMap> crf
               = CRFClassifier.getClassifierNoExceptions(
                       "C:/Current Books in Progress/NLP and Java/Models"
                       + "/english.all.3class.distsim.crf.ser.gz");
       String S1 = "Good afternoon Rajat Raina, how are you today?";
       String S2 = "I go to school at Stanford University, which is located in California.";
       System.out.println(crf.classifyToString(S1));
       System.out.println(crf.classifyWithInlineXML(S2));
       System.out.println(crf.classifyToString(S2, "xml", true));

       Object classification[] = crf.classify(S2).toArray();
       for (int i = 0; i < classification.length; i++) {
           System.out.println(classification[i]);
       }
}
 
开发者ID:jackeylu,项目名称:NLP_with_Java_zh,代码行数:18,代码来源:UseStanfordClassifier.java

示例3: testConvertNERtoCLAVIN

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}
 
开发者ID:Berico-Technologies,项目名称:CLAVIN-NERD,代码行数:26,代码来源:StanfordExtractorTest.java

示例4: main

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void main(String[] args) {
    String path = IntelConfig.DEPARTMENT_TRAIN_PROPERTY;
    Properties props = StringUtils.propFileToProperties(path);

    SeqClassifierFlags flags = new SeqClassifierFlags(props);
    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(flags);
    crf.train();
    String modelPath = props.getProperty("serializeTo");
    crf.serializeClassifier(modelPath);
    System.out.println("Build model to " + modelPath);
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:12,代码来源:TrainNerModel.java

示例5: ner

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public List<IntPair> ner(String s) {
    try {
        if (classifier == null) {
            classifier = CRFClassifier.getClassifier(classifierFile);
        }
        List<List<CoreLabel>> result = classifier.classify(s);
        int begin = 0;
        String last = "";
        boolean in = false;
        int j = 0;
        List<IntPair> rval = new ArrayList<>();
        for (CoreLabel word : result.get(0)) {
            String tag = word.get(CoreAnnotations.AnswerAnnotation.class);
            if (tag.equals("O")) {
                if (in) {
                    rval.add(new IntPair(begin, j));
                }
                in = false;
            } else {
                if (!in) {
                    begin = j;
                    in = true;
                }
            }
            j++;
            last = tag;
        }
        if (in) {
            rval.add(new IntPair(begin, result.get(0).size()));
        }
        return rval;
    } catch (IOException | ClassNotFoundException x) {
        throw new RuntimeException(x);
    }
}
 
开发者ID:jmccrae,项目名称:naisc,代码行数:36,代码来源:SultanModified.java

示例6: init

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
@Override
public Resource init() throws ResourceInstantiationException {
  if(tagger == null) {
    fireStatusChanged("Loading Stanford NER model");
    try (InputStream in = modelFile.openStream();
        GZIPInputStream gzipIn = new GZIPInputStream(in)){
      tagger = CRFClassifier.getClassifier(gzipIn);
    } catch(Exception e) {
      throw new ResourceInstantiationException(e);
    }
  }
  return this;
}
 
开发者ID:GateNLP,项目名称:gateplugin-Stanford_CoreNLP,代码行数:14,代码来源:NER.java

示例7: StanfordChineseSegmenterWrapper

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
/**
 * Initialize the segmenter
 * 
 * @param dataDir this is the 'datadir' from the 2008-05-21 distribution.
 */
public StanfordChineseSegmenterWrapper(File dataDir){
	File serDictionaryFile=new File(dataDir,"dict-chris6.ser.gz");
	File ctbFile=new File(dataDir,"ctb.gz");
	String error=FileExistenceVerifier.verifyFiles(serDictionaryFile,ctbFile);
	if(error!=null)
		throw new Error(error);
	
	/*
	 * This is pretty much a copy&paste of the SegDemo.java, with minor edits on the files.
	 * No idea if this is the fastest or best way to do this.
	 */
    Properties props = new Properties();
    //props.setProperty("sighanCorporaDict", "data");
    props.setProperty("sighanCorporaDict", dataDir.toString());
    // props.setProperty("NormalizationTable", "data/norm.simp.utf8");
    // props.setProperty("normTableEncoding", "UTF-8");
    // below is needed because CTBSegDocumentIteratorFactory accesses it
    //props.setProperty("serDictionary","data/dict-chris6.ser.gz");
    props.setProperty("serDictionary",serDictionaryFile.toString());
    //props.setProperty("testFile", args[0]);
    props.setProperty("inputEncoding", "UTF-8");
    props.setProperty("sighanPostProcessing", "true");

    classifier = new CRFClassifier<CoreLabel>(props);
    //classifier.loadClassifierNoExceptions("data/ctb.gz", props);
    classifier.loadClassifierNoExceptions(ctbFile.toString(), props);
    // flags must be re-set after data is loaded
    classifier.flags.setProperties(props);
    //classifier.writeAnswers(classifier.test(args[0]));
    //classifier.testAndWriteAnswers(args[0]);
}
 
开发者ID:rudaoshi,项目名称:mate,代码行数:37,代码来源:StanfordChineseSegmenterWrapper.java

示例8: main

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void main(String[] args) throws Exception{
		args=new String[]{"chi-sen.deseg"};		
	    Properties props = new Properties();
	    //props.setProperty("sighanCorporaDict", "data");
//	    String dir="/home/users0/anders/storage/backuped/demos/SRLDemos/models/chi/stanford-chinese-segmenter-2008-05-21";
	    String dir="/home/users0/anders/storage/scratch/anders/stanford-segmenter-2013-06-20/";
	    props.setProperty("sighanCorporaDict", dir+"/data");
	    // props.setProperty("NormalizationTable", "data/norm.simp.utf8");
	    // props.setProperty("normTableEncoding", "UTF-8");
	    // below is needed because CTBSegDocumentIteratorFactory accesses it
	    //props.setProperty("serDictionary","data/dict-chris6.ser.gz");
	    props.setProperty("serDictionary",dir+"/data/dict-chris6.ser.gz");
	    //props.setProperty("testFile", args[0]);
	    props.setProperty("inputEncoding", "UTF-8");
	    props.setProperty("sighanPostProcessing", "true");

	    CRFClassifier<CoreLabel> classifier = new CRFClassifier<CoreLabel>(props);
	    //classifier.loadClassifierNoExceptions("data/ctb.gz", props);
	    classifier.loadClassifierNoExceptions(dir+"/data/ctb.gz", props);
	    // flags must be re-set after data is loaded
	    classifier.flags.setProperties(props);
	    //classifier.writeAnswers(classifier.test(args[0]));
	    //classifier.testAndWriteAnswers(args[0]);
	    
	    //ObjectBank<List<CoreLabel>> documents = classifier.makeObjectBank(args[0]);
	    List<String> forms=classifier.segmentString("上海浦东近年来颁布实行了涉及经济、贸易、建设、规划、科技、文教等领域的七十一件法规性文件,确保了浦东开发的有序进行。");
	    for(String form:forms)
	    	System.out.println(form);
	}
 
开发者ID:rudaoshi,项目名称:mate,代码行数:30,代码来源:StanfordChineseSegmenterWrapper.java

示例9: loadClassifier

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static CRFClassifier<CoreLabel> loadClassifier(String options) throws IllegalArgumentException {
  String[] inputFlags = options.split(" ");
  Properties props = StringUtils.argsToProperties(inputFlags);
  SeqClassifierFlags flags = new SeqClassifierFlags(props);
  CRFClassifier<CoreLabel> crfSegmenter = new CRFClassifier<>(flags);
  if(flags.loadClassifier == null) {
    throw new IllegalArgumentException("missing -loadClassifier flag for CRF preprocessor.");
  }
  crfSegmenter.loadClassifierNoExceptions(flags.loadClassifier, props);
  crfSegmenter.loadTagIndex();
  return crfSegmenter;
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:13,代码来源:CRFPreprocessor.java

示例10: CRFPostprocessor

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public CRFPostprocessor(Properties props) {
  // Currently, this class only supports one featureFactory.
  props.put("featureFactory", CRFPostprocessorFeatureFactory.class.getName());

  flags = new SeqClassifierFlags(props);
  classifier = new CRFClassifier<CoreLabel>(flags);
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:8,代码来源:CRFPostprocessor.java

示例11: train

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void train(ListMatrix<ListMatrix<MapMatrix<String, String>>> listMatrix) throws Exception {
    List<List<CoreLabel>> sentenceList = new ArrayList<List<CoreLabel>>();
    for (ListMatrix<MapMatrix<String, String>> innerList : listMatrix) {
        List<CoreLabel> tokenList = new ArrayList<CoreLabel>();
        sentenceList.add(tokenList);
        for (MapMatrix<String, String> mapMatrix : innerList) {
            CoreLabel l = new CoreLabel();
            l.set(CoreAnnotations.TextAnnotation.class, mapMatrix.getAsString("Token"));
            l.set(CoreAnnotations.AnswerAnnotation.class, mapMatrix.getAsString("Class"));
            tokenList.add(l);
        }
    }

    SeqClassifierFlags flags = new SeqClassifierFlags();
    flags.maxLeft = 3;
    flags.useClassFeature = true;
    flags.useWord = true;
    flags.maxNGramLeng = 6;
    flags.usePrev = true;
    flags.useNext = true;
    flags.useDisjunctive = true;
    flags.useSequences = true;
    flags.usePrevSequences = true;
    flags.useTypeSeqs = true;
    flags.useTypeSeqs2 = true;
    flags.useTypeySequences = true;
    flags.wordShape = WordShapeClassifier.WORDSHAPECHRIS2;

    flags.useNGrams = true;
    crf = new CRFClassifier<CoreLabel>(flags);
    crf.train(sentenceList, null);
}
 
开发者ID:jdmp,项目名称:java-data-mining-package,代码行数:33,代码来源:StanfordTagger.java

示例12: setSegmentor

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void setSegmentor()
{
	Properties props = new Properties();
	props.setProperty("sighanCorporaDict", "segment");
	props.setProperty("serDictionary","segment/dict-chris6.ser.gz");
	props.setProperty("inputEncoding", "UTF-8");
	props.setProperty("sighanPostProcessing", "true");
	segmentor = new CRFClassifier(props);
	segmentor.loadClassifierNoExceptions("segment/ctb.gz", props);
	segmentor.flags.setProperties(props);
}
 
开发者ID:intfloat,项目名称:weibo-emotion-analyzer,代码行数:12,代码来源:DicModel.java

示例13: initialValue

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
protected AbstractSequenceClassifier<CoreLabel> initialValue() {
	try {
		return CRFClassifier.getClassifier(classifierFilePath);
	} catch (final Exception exception) {
		LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
		return NULL_OBJECT_CLASSIFIER;
	}
}
 
开发者ID:ALIADA,项目名称:aliada-tool,代码行数:9,代码来源:NERThreadLocalService.java

示例14: classifier

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
@Override
AbstractSequenceClassifier<CoreLabel> classifier() {
	synchronized(this) {
		if (classifier == null) {
				try {
					classifier = CRFClassifier.getClassifier(classifierFilePath);
				} catch (final Exception exception) {
					LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
					classifier = NULL_OBJECT_CLASSIFIER;
				}
		}
		return classifier;
	}
}
 
开发者ID:ALIADA,项目名称:aliada-tool,代码行数:15,代码来源:NERSingletonService.java

示例15: init

import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void init() throws NamedEntityRecognizerException
{
	if (initialized)
		throw new NamedEntityRecognizerException(
				"init() was called though the StanfordNamedEntityRecognizer was already initialized.");
	try
	{
		this.crfClassifier = CRFClassifier.getClassifier(this.classifierPath.getPath());
		this.initialized = true;
	}
	catch (Exception e)
	{
		throw new NamedEntityRecognizerException("Classifier load failed.",e);
	}
}
 
开发者ID:hltfbk,项目名称:Excitement-TDMLEDA,代码行数:16,代码来源:StanfordNamedEntityRecognizer.java


注:本文中的edu.stanford.nlp.ie.crf.CRFClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。