本文整理汇总了Java中edu.stanford.nlp.ie.crf.CRFClassifier类的典型用法代码示例。如果您正苦于以下问题:Java CRFClassifier类的具体用法?Java CRFClassifier怎么用?Java CRFClassifier使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CRFClassifier类属于edu.stanford.nlp.ie.crf包,在下文中一共展示了CRFClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: preprocess
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public Concept preprocess(Concept c) {
if (this.tagger == null)
this.tagger = new MaxentTagger("ext_models/pos_tagger/english-left3words-distsim.tagger");
if (this.ner == null)
this.ner = CRFClassifier.getClassifierNoExceptions("ext_models/ner/english.all.3class.distsim.crf.ser.gz");
List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(c.name)).tokenize();
tagger.tagCoreLabels(words);
words = ner.classifySentence(words);
words = this.addLemmas(words);
List<PToken> tokens = new ArrayList<PToken>();
for (CoreLabel word : words) {
PToken t = new PToken(word.originalText());
t.pos = word.tag();
t.neTag = word.get(CoreAnnotations.AnswerAnnotation.class);
t.lemma = word.get(LemmaAnnotation.class);
tokens.add(t);
}
c.tokenList = tokens;
return c;
}
示例2: CRF
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void CRF() {
// Classifer
CRFClassifier<CoreMap> crf
= CRFClassifier.getClassifierNoExceptions(
"C:/Current Books in Progress/NLP and Java/Models"
+ "/english.all.3class.distsim.crf.ser.gz");
String S1 = "Good afternoon Rajat Raina, how are you today?";
String S2 = "I go to school at Stanford University, which is located in California.";
System.out.println(crf.classifyToString(S1));
System.out.println(crf.classifyWithInlineXML(S2));
System.out.println(crf.classifyToString(S2, "xml", true));
Object classification[] = crf.classify(S2).toArray();
for (int i = 0; i < classification.length; i++) {
System.out.println(classification[i]);
}
}
示例3: testConvertNERtoCLAVIN
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
/**
* Checks conversion of Stanford NER output format into
* {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
* input format.
*
* @throws IOException
*/
@Test
public void testConvertNERtoCLAVIN() throws IOException {
InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
Properties mp = new Properties();
mp.load(mpis);
AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);
String text = "I was born in Springfield and grew up in Boston.";
List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);
List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}
示例4: main
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void main(String[] args) {
String path = IntelConfig.DEPARTMENT_TRAIN_PROPERTY;
Properties props = StringUtils.propFileToProperties(path);
SeqClassifierFlags flags = new SeqClassifierFlags(props);
CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(flags);
crf.train();
String modelPath = props.getProperty("serializeTo");
crf.serializeClassifier(modelPath);
System.out.println("Build model to " + modelPath);
}
示例5: ner
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public List<IntPair> ner(String s) {
try {
if (classifier == null) {
classifier = CRFClassifier.getClassifier(classifierFile);
}
List<List<CoreLabel>> result = classifier.classify(s);
int begin = 0;
String last = "";
boolean in = false;
int j = 0;
List<IntPair> rval = new ArrayList<>();
for (CoreLabel word : result.get(0)) {
String tag = word.get(CoreAnnotations.AnswerAnnotation.class);
if (tag.equals("O")) {
if (in) {
rval.add(new IntPair(begin, j));
}
in = false;
} else {
if (!in) {
begin = j;
in = true;
}
}
j++;
last = tag;
}
if (in) {
rval.add(new IntPair(begin, result.get(0).size()));
}
return rval;
} catch (IOException | ClassNotFoundException x) {
throw new RuntimeException(x);
}
}
示例6: init
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
@Override
public Resource init() throws ResourceInstantiationException {
if(tagger == null) {
fireStatusChanged("Loading Stanford NER model");
try (InputStream in = modelFile.openStream();
GZIPInputStream gzipIn = new GZIPInputStream(in)){
tagger = CRFClassifier.getClassifier(gzipIn);
} catch(Exception e) {
throw new ResourceInstantiationException(e);
}
}
return this;
}
示例7: StanfordChineseSegmenterWrapper
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
/**
* Initialize the segmenter
*
* @param dataDir this is the 'datadir' from the 2008-05-21 distribution.
*/
public StanfordChineseSegmenterWrapper(File dataDir){
File serDictionaryFile=new File(dataDir,"dict-chris6.ser.gz");
File ctbFile=new File(dataDir,"ctb.gz");
String error=FileExistenceVerifier.verifyFiles(serDictionaryFile,ctbFile);
if(error!=null)
throw new Error(error);
/*
* This is pretty much a copy&paste of the SegDemo.java, with minor edits on the files.
* No idea if this is the fastest or best way to do this.
*/
Properties props = new Properties();
//props.setProperty("sighanCorporaDict", "data");
props.setProperty("sighanCorporaDict", dataDir.toString());
// props.setProperty("NormalizationTable", "data/norm.simp.utf8");
// props.setProperty("normTableEncoding", "UTF-8");
// below is needed because CTBSegDocumentIteratorFactory accesses it
//props.setProperty("serDictionary","data/dict-chris6.ser.gz");
props.setProperty("serDictionary",serDictionaryFile.toString());
//props.setProperty("testFile", args[0]);
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("sighanPostProcessing", "true");
classifier = new CRFClassifier<CoreLabel>(props);
//classifier.loadClassifierNoExceptions("data/ctb.gz", props);
classifier.loadClassifierNoExceptions(ctbFile.toString(), props);
// flags must be re-set after data is loaded
classifier.flags.setProperties(props);
//classifier.writeAnswers(classifier.test(args[0]));
//classifier.testAndWriteAnswers(args[0]);
}
示例8: main
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static void main(String[] args) throws Exception{
args=new String[]{"chi-sen.deseg"};
Properties props = new Properties();
//props.setProperty("sighanCorporaDict", "data");
// String dir="/home/users0/anders/storage/backuped/demos/SRLDemos/models/chi/stanford-chinese-segmenter-2008-05-21";
String dir="/home/users0/anders/storage/scratch/anders/stanford-segmenter-2013-06-20/";
props.setProperty("sighanCorporaDict", dir+"/data");
// props.setProperty("NormalizationTable", "data/norm.simp.utf8");
// props.setProperty("normTableEncoding", "UTF-8");
// below is needed because CTBSegDocumentIteratorFactory accesses it
//props.setProperty("serDictionary","data/dict-chris6.ser.gz");
props.setProperty("serDictionary",dir+"/data/dict-chris6.ser.gz");
//props.setProperty("testFile", args[0]);
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("sighanPostProcessing", "true");
CRFClassifier<CoreLabel> classifier = new CRFClassifier<CoreLabel>(props);
//classifier.loadClassifierNoExceptions("data/ctb.gz", props);
classifier.loadClassifierNoExceptions(dir+"/data/ctb.gz", props);
// flags must be re-set after data is loaded
classifier.flags.setProperties(props);
//classifier.writeAnswers(classifier.test(args[0]));
//classifier.testAndWriteAnswers(args[0]);
//ObjectBank<List<CoreLabel>> documents = classifier.makeObjectBank(args[0]);
List<String> forms=classifier.segmentString("上海浦东近年来颁布实行了涉及经济、贸易、建设、规划、科技、文教等领域的七十一件法规性文件,确保了浦东开发的有序进行。");
for(String form:forms)
System.out.println(form);
}
示例9: loadClassifier
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public static CRFClassifier<CoreLabel> loadClassifier(String options) throws IllegalArgumentException {
String[] inputFlags = options.split(" ");
Properties props = StringUtils.argsToProperties(inputFlags);
SeqClassifierFlags flags = new SeqClassifierFlags(props);
CRFClassifier<CoreLabel> crfSegmenter = new CRFClassifier<>(flags);
if(flags.loadClassifier == null) {
throw new IllegalArgumentException("missing -loadClassifier flag for CRF preprocessor.");
}
crfSegmenter.loadClassifierNoExceptions(flags.loadClassifier, props);
crfSegmenter.loadTagIndex();
return crfSegmenter;
}
示例10: CRFPostprocessor
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public CRFPostprocessor(Properties props) {
// Currently, this class only supports one featureFactory.
props.put("featureFactory", CRFPostprocessorFeatureFactory.class.getName());
flags = new SeqClassifierFlags(props);
classifier = new CRFClassifier<CoreLabel>(flags);
}
示例11: train
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void train(ListMatrix<ListMatrix<MapMatrix<String, String>>> listMatrix) throws Exception {
List<List<CoreLabel>> sentenceList = new ArrayList<List<CoreLabel>>();
for (ListMatrix<MapMatrix<String, String>> innerList : listMatrix) {
List<CoreLabel> tokenList = new ArrayList<CoreLabel>();
sentenceList.add(tokenList);
for (MapMatrix<String, String> mapMatrix : innerList) {
CoreLabel l = new CoreLabel();
l.set(CoreAnnotations.TextAnnotation.class, mapMatrix.getAsString("Token"));
l.set(CoreAnnotations.AnswerAnnotation.class, mapMatrix.getAsString("Class"));
tokenList.add(l);
}
}
SeqClassifierFlags flags = new SeqClassifierFlags();
flags.maxLeft = 3;
flags.useClassFeature = true;
flags.useWord = true;
flags.maxNGramLeng = 6;
flags.usePrev = true;
flags.useNext = true;
flags.useDisjunctive = true;
flags.useSequences = true;
flags.usePrevSequences = true;
flags.useTypeSeqs = true;
flags.useTypeSeqs2 = true;
flags.useTypeySequences = true;
flags.wordShape = WordShapeClassifier.WORDSHAPECHRIS2;
flags.useNGrams = true;
crf = new CRFClassifier<CoreLabel>(flags);
crf.train(sentenceList, null);
}
示例12: setSegmentor
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void setSegmentor()
{
Properties props = new Properties();
props.setProperty("sighanCorporaDict", "segment");
props.setProperty("serDictionary","segment/dict-chris6.ser.gz");
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("sighanPostProcessing", "true");
segmentor = new CRFClassifier(props);
segmentor.loadClassifierNoExceptions("segment/ctb.gz", props);
segmentor.flags.setProperties(props);
}
示例13: initialValue
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
protected AbstractSequenceClassifier<CoreLabel> initialValue() {
try {
return CRFClassifier.getClassifier(classifierFilePath);
} catch (final Exception exception) {
LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
return NULL_OBJECT_CLASSIFIER;
}
}
示例14: classifier
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
@Override
AbstractSequenceClassifier<CoreLabel> classifier() {
synchronized(this) {
if (classifier == null) {
try {
classifier = CRFClassifier.getClassifier(classifierFilePath);
} catch (final Exception exception) {
LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
classifier = NULL_OBJECT_CLASSIFIER;
}
}
return classifier;
}
}
示例15: init
import edu.stanford.nlp.ie.crf.CRFClassifier; //导入依赖的package包/类
public void init() throws NamedEntityRecognizerException
{
if (initialized)
throw new NamedEntityRecognizerException(
"init() was called though the StanfordNamedEntityRecognizer was already initialized.");
try
{
this.crfClassifier = CRFClassifier.getClassifier(this.classifierPath.getPath());
this.initialized = true;
}
catch (Exception e)
{
throw new NamedEntityRecognizerException("Classifier load failed.",e);
}
}