当前位置: 首页>>代码示例>>Java>>正文


Java OffsetConjunctions类代码示例

本文整理汇总了Java中edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions的典型用法代码示例。如果您正苦于以下问题:Java OffsetConjunctions类的具体用法?Java OffsetConjunctions怎么用?Java OffsetConjunctions使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


OffsetConjunctions类属于edu.umass.cs.mallet.base.pipe.tsf包,在下文中一共展示了OffsetConjunctions类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: makeSpacePredictionPipe

import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
	    new CharSequence2TokenSequence("."),
	    new TokenSequenceLowercase(),
	    new TestMEMMTokenSequenceRemoveSpaces(),
	    new TokenText(),
	    new OffsetConjunctions(true,
	                           new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
	                             {1}, {-1, 0}, {0, 1},
//	                             {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
	                             //{-2,-1}, {-1,0}, {0,1}, {1,2},
	                             //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
	                           }),
//      new PrintInputAndTarget(),
	    new TokenSequence2FeatureVectorSequence()
	  });
    return p;
  }
 
开发者ID:clulab,项目名称:reach-banner,代码行数:20,代码来源:TestMEMM.java

示例2: setupPipes

import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private static void setupPipes(ArrayList<Pipe> pipes, String regexFilename)
{
	try
	{
		if (regexFilename != null)
			pipes.add(new ConfigurableRegexMatches(regexFilename));
	} catch (IOException e)
	{
		throw new RuntimeException(e);
	}
	pipes.add(new TokenTextCharPrefix("2PREFIX=", 2));
	pipes.add(new TokenTextCharPrefix("3PREFIX=", 3));
	pipes.add(new TokenTextCharPrefix("4PREFIX=", 4));
	pipes.add(new TokenTextCharSuffix("2SUFFIX=", 2));
	pipes.add(new TokenTextCharSuffix("3SUFFIX=", 3));
	pipes.add(new TokenTextCharSuffix("4SUFFIX=", 4));
	pipes.add(new TokenTextCharNGrams("CHARNGRAM=", new int[] { 2, 3 }, true));
	// pipes.add(new LexiconMembership()); // Use this for determining
	// whether word in a lexicon
	pipes.add(new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+", Pattern.CASE_INSENSITIVE)));
	pipes.add(new RegexMatches("GREEK", Pattern.compile(GREEK, Pattern.CASE_INSENSITIVE)));
	pipes.add(new RegexMatches("ISPUNCT", Pattern.compile("[`[email protected]#$%^&*()-=_+\\[\\]\\\\{}|;\':\\\",./<>?]+")));
	pipes.add(new OffsetConjunctions(new int[][] { { -2 }, { -1 }, { 1 }, { 2 } }));
	pipes.add(new TokenSequence2FeatureVectorSequence(true, true));
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:26,代码来源:CRFTagger.java

示例3: setupPipes

import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private static void setupPipes(ArrayList<Pipe> pipes)
{
    pipes.add(new RegexMatches("ALPHA", Pattern.compile("[A-Za-z]+")));
    pipes.add(new RegexMatches("INITCAPS", Pattern.compile("[A-Z].*")));
    pipes.add(new RegexMatches("UPPER-LOWER", Pattern.compile("[A-Z][a-z].*")));
    pipes.add(new RegexMatches("LOWER-UPPER", Pattern.compile("[a-z]+[A-Z]+.*")));
    pipes.add(new RegexMatches("ALLCAPS", Pattern.compile("[A-Z]+")));
    pipes.add(new RegexMatches("MIXEDCAPS", Pattern.compile("[A-Z][a-z]+[A-Z][A-Za-z]*")));
    pipes.add(new RegexMatches("SINGLECHAR", Pattern.compile("[A-Za-z]")));
    pipes.add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]")));
    pipes.add(new RegexMatches("DOUBLEDIGIT", Pattern.compile("[0-9][0-9]")));
    pipes.add(new RegexMatches("NUMBER", Pattern.compile("[0-9,]+")));
    pipes.add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*")));
    pipes.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[0-9].*[A-Za-z].*")));
    pipes.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[A-Za-z].*[0-9].*")));
    pipes.add(new RegexMatches("LETTERS_NUMBERS", Pattern.compile("[0-9]+[A-Za-z]+")));
    pipes.add(new RegexMatches("NUMBERS_LETTERS", Pattern.compile("[A-Za-z]+[0-9]+")));

    pipes.add(new RegexMatches("HAS_DASH", Pattern.compile(".*-.*")));
    pipes.add(new RegexMatches("HAS_QUOTE", Pattern.compile(".*'.*")));
    pipes.add(new RegexMatches("HAS_SLASH", Pattern.compile(".*/.*")));

    // Start second set of new features (to handle improvements in
    // BaseTokenizer)
    pipes.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]+(\\.[0-9]*)?%?")));
    pipes.add(new RegexMatches("REALNUMBER", Pattern.compile("(-|\\+)?[0-9,]*(\\.[0-9]+)?%?")));
    pipes.add(new RegexMatches("START_MINUS", Pattern.compile("-.*")));
    pipes.add(new RegexMatches("START_PLUS", Pattern.compile("\\+.*")));
    pipes.add(new RegexMatches("END_PERCENT", Pattern.compile(".*%")));
    // End second set

    pipes.add(new TokenTextCharPrefix("2PREFIX=", 2));
    pipes.add(new TokenTextCharPrefix("3PREFIX=", 3));
    pipes.add(new TokenTextCharPrefix("4PREFIX=", 4));
    pipes.add(new TokenTextCharSuffix("2SUFFIX=", 2));
    pipes.add(new TokenTextCharSuffix("3SUFFIX=", 3));
    pipes.add(new TokenTextCharSuffix("4SUFFIX=", 4));
    pipes.add(new TokenTextCharNGrams("CHARNGRAM=", new int[] {2, 3}, true));
    // pipes.add(new LexiconMembership()); // Use this for determining
    // whether word in a lexicon
    pipes.add(new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+", Pattern.CASE_INSENSITIVE)));
    pipes.add(new RegexMatches("GREEK", Pattern.compile(GREEK, Pattern.CASE_INSENSITIVE)));
    pipes.add(new RegexMatches("ISPUNCT", Pattern.compile("[`[email protected]#$%^&*()-=_+\\[\\]\\\\{}|;\':\\\",./<>?]+")));
    pipes.add(new OffsetConjunctions(new int[][] { {-2}, {2}}));
    pipes.add(new TokenSequence2FeatureVectorSequence(true, true));
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:47,代码来源:CRFTagger.java


注:本文中的edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。