当前位置: 首页>>代码示例>>Java>>正文


Java TregexPattern.compile方法代码示例

本文整理汇总了Java中edu.stanford.nlp.trees.tregex.TregexPattern.compile方法的典型用法代码示例。如果您正苦于以下问题:Java TregexPattern.compile方法的具体用法?Java TregexPattern.compile怎么用?Java TregexPattern.compile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在edu.stanford.nlp.trees.tregex.TregexPattern的用法示例。


在下文中一共展示了TregexPattern.compile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: compile

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
@Override
protected void compile() {
  super.compile();
  by1 = TregexPattern
      .compile("VP <-1 (PP <<, /by|via|through/ <2 (S <1 (VP <1 VBG=tr)))");
  
  by2 = TregexPattern
      .compile("VP <-1 (PP <<, /by|via|through/ <2 (S <1 ADVP <2 (VP <1 VBG=tr)))");
  
  by3 = TregexPattern
          .compile("VP <-1 (PP <<, /by|via|through/ <2 (S <1 (VP <1 ADVP <2 VBG=tr)))");
  
  by4 = TregexPattern
      .compile("VP <-1 (NP <- (PP <<, /by|via|through/ <2 (NP <+(NP) (NN=tr <, /ing$/))))");
  
  by5 = TregexPattern.compile("VP <-1  (PP <<, /by|via|through/ <2 (NP <+(NP) (NN=tr <, /ing$/)))");
  
  
  by6 = TregexPattern
          .compile("VP <-1 (PP <1 ADVP <2 (IN <<, /by|via|through/) <3 (S <1 (VP <1 VBG=tr)))");
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:22,代码来源:ByVbg.java

示例2: compile

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
@Override
protected void compile() {
  super.compile();
  //vadj = TregexPattern.compile("VP <1 /VB.*/ <2 (ADJP=tr)");
  
  // a is JJ
  vadjs = new ArrayList<TregexPattern>();
  vadj = TregexPattern.compile("VP <1 AUX < ADJP|VP=tr");
  vadjs.add(vadj);
  //vadj = TregexPattern.compile("VP <1 AUX < ADVP < ADJP|VP=tr");
  //vadjs.add(vadj);
  vadj = TregexPattern.compile("VP=tr <1 VBN");
  vadjs.add(vadj);
  vadj = TregexPattern.compile("VBN|JJ=tr");
  vadjs.add(vadj);
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:17,代码来源:ArgVadj.java

示例3: extractNPorPRP

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
protected void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
  List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
  Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
  tree.indexLeaves();
  SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

  final String mentionPattern = "/^(?:NP|PRP)/";
  TregexPattern tgrepPattern = TregexPattern.compile(mentionPattern);
  TregexMatcher matcher = tgrepPattern.matcher(tree);
  while (matcher.find()) {
    Tree t = matcher.getMatch();
    List<Tree> mLeaves = t.getLeaves();
    int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
    int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
    IntPair mSpan = new IntPair(beginIdx, endIdx);
    if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
      int mentionID = assignIds? ++maxID:-1;
      Mention m = new Mention(mentionID, beginIdx, endIdx, dependency, new ArrayList<CoreLabel>(sent.subList(beginIdx, endIdx)), t);
      mentions.add(m);
      mentionSpanSet.add(mSpan);
    }
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:24,代码来源:RuleBasedCorefMentionFinder.java

示例4: findTreePattern

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
/** Find syntactic pattern in a sentence by tregex */
private void findTreePattern(Tree tree, String pattern, Set<Pair<Integer, Integer>> foundPairs) {
  try {
    TregexPattern tgrepPattern = TregexPattern.compile(pattern);
    TregexMatcher m = tgrepPattern.matcher(tree);
    while (m.find()) {
      Tree t = m.getMatch();
      Tree np1 = m.getNode("m1");
      Tree np2 = m.getNode("m2");
      Tree np3 = null;
      if(pattern.contains("m3")) np3 = m.getNode("m3");
      addFoundPair(np1, np2, t, foundPairs);
      if(np3!=null) addFoundPair(np2, np3, t, foundPairs);
    }
  } catch (Exception e) {
    // shouldn't happen....
    throw new RuntimeException(e);
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:20,代码来源:MentionExtractor.java

示例5: ArabicTreeNormalizer

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
public ArabicTreeNormalizer(boolean retainNPTmp, boolean markPRDverb, boolean changeNoLabels,
    boolean retainNPSbj, boolean retainPPClr) {
  super(new ArabicTreebankLanguagePack());
  this.retainNPTmp = retainNPTmp;
  this.retainNPSbj = retainNPSbj;
  this.markPRDverb = markPRDverb;
  this.changeNoLabels = changeNoLabels;
  this.retainPPClr = retainPPClr;

  rootLabel = tlp.startSymbol();

  prdVerbPattern  = TregexPattern.compile("/^V[^P]/ > VP $ /-PRD$/=prd");

  prdPattern = Pattern.compile("^[A-Z]+-PRD");

  //Marks NP subjects that *do not* occur in verb-initial clauses
  npSbjPattern = TregexPattern.compile("/^NP-SBJ/ !> @VP");

  emptyFilter = new ArabicEmptyFilter();
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:21,代码来源:ArabicTreeNormalizer.java

示例6: evaluateTregexPattern

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
public List<String> evaluateTregexPattern(String parseTree, String tregexPattern)
{
    List<String> foundMatches = new ArrayList<String>();

    TregexPattern pattern = TregexPattern.compile(tregexPattern);
    TregexMatcher matches = pattern.matcher(Tree.valueOf(parseTree));
    Set<String> nodes = matches.getNodeNames();
    while (matches.find())
    {
        foundMatches.add(matches.getMatch().pennString());
        for (String node : nodes)
        {
            foundMatches.add(matches.getNode(node).pennString());
        }
    }

    return foundMatches;
}
 
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:19,代码来源:StanfordTregexThrift.java

示例7: ArabicTreeNormalizer

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
public ArabicTreeNormalizer(boolean retainNPTmp, boolean markPRDverb, boolean changeNoLabels,
    boolean retainNPSbj, boolean retainPPClr) {
  super(new ArabicTreebankLanguagePack());
  this.retainNPTmp = retainNPTmp;
  this.retainNPSbj = retainNPSbj;
  this.markPRDverb = markPRDverb;
  this.changeNoLabels = changeNoLabels;
  this.retainPPClr = retainPPClr;

  rootLabel = tlp.startSymbol();

  prdVerbPattern  = TregexPattern.compile("/^V[^P]/ > VP $ /-PRD$/=prd");
  
  prdPattern = Pattern.compile("^[A-Z]+-PRD");
  
  //Marks NP subjects that *do not* occur in verb-initial clauses
  npSbjPattern = TregexPattern.compile("/^NP-SBJ/ !> @VP");
  
  emptyFilter = new ArabicEmptyFilter();
}
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:21,代码来源:ArabicTreeNormalizer.java

示例8: findHeadVerb

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
public Optional<String> findHeadVerb(Tree parseTree) {
	TregexPattern pattern = TregexPattern.compile("ROOT <<: (__ < (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))");
	TregexMatcher matcher = pattern.matcher(parseTree);
	while (matcher.findAt(parseTree)) {
		Tree lowestvp = matcher.getNode("lowestvp");

		return Optional.of(ParseTreeExtractionUtils.getContainingWords(lowestvp).get(0).word());
	}
	return Optional.empty();
}
 
开发者ID:Lambda-3,项目名称:Graphene,代码行数:11,代码来源:HeadVerbFinder.java

示例9: getChunkVector

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
/**
 * Extract chunks. 
 * 
 * @param tree
 * @return
 */
static int[] getChunkVector(Tree tree) {
  String[] iobVector = new String[tree.yield().size()];
  Arrays.fill(iobVector, "O");
  
  // NOTE: The order in which these patterns are applied is important.
  
  // Base XPs
  TregexPattern baseXPPattern = TregexPattern.compile("__ < (__ < (__ !< __)) !< (__ < (__ < __))");
  
  // Non-recursive NPs
  TregexPattern NPPattern = TregexPattern.compile("@NP < (__ $ __) !<< (@NP < (__ $ __)) !<< @PP");

  // Non-recursive PPs
  TregexPattern PPattern = TregexPattern.compile("@PP !<< @PP");
  
  TregexMatcher tregexMatcher = baseXPPattern.matcher(tree);
  CoreNLPToJSON.fillVectorWithYield(iobVector, tregexMatcher);
  
  tregexMatcher = NPPattern.matcher(tree);
  CoreNLPToJSON.fillVectorWithYield(iobVector, tregexMatcher);
  
  tregexMatcher = PPattern.matcher(tree);
  CoreNLPToJSON.fillVectorWithYield(iobVector, tregexMatcher);
  
  int[] indexVector = CoreNLPToJSON.iobToIndices(iobVector);
  return indexVector;
}
 
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:34,代码来源:RawFrenchToJSON.java

示例10: compile

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
@Override
protected void compile() {
  super.compile();
  
  // trigger extraction is done in tree regular expression
  // Q: how to leverage work between tregex and procedure?
  through1 = TregexPattern
      .compile("VP <1 /VB.*/ <-1 (PP <<, /^through|via|by$/ <2 (NP <1 (NP << (NN=tr < /ion/)) ! << /PRP/ ))");
  through2 = TregexPattern
      .compile("VP <1 /VB.*/ <-1 (PP <<, /^through|via|by$/ <2 (NP << (NN=tr < /ion/)) ! << /PRP/ )");
  
  through3 = TregexPattern
          .compile("VP <1 /VB.*/ < ((PP <<, /^through|via|by$/ <2 (NP << (NN=tr < /ion/)) !<< /PRP/ ) $+ /,|\\./)");
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:15,代码来源:ThroughVion.java

示例11: getCandidates

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
protected List<Entity> getCandidates(Entity entity, Treebank treebank, boolean intraSentence)
{
 List<Entity> entityList = new ArrayList<Entity>();
 
 for(Tree tree : treebank)
 {
  List<Tree> leaves = tree.getLeaves();
  OffsetLabel first = (OffsetLabel) leaves.get(0).label();
  OffsetLabel last = (OffsetLabel) leaves.get(leaves.size() - 1).label();
  int start = first.beginPosition();
  int end = last.endPosition();
  
  TregexPattern np = TregexPattern.compile("NP|NNP|NNPS|NN|NNS");
  TregexMatcher m = np.matcher(tree);
  while(m.find())
  {
	  Tree npTree = m.getMatch();
	  List<Token> tokens = Utils.getTokens(tree, npTree);

	  if(!npTree.isLeaf())
	  {
		  Entity candidate = new Entity("",npTree.nodeString(),tokens);
  
			if (entity.from() > candidate.to()) {
				if (intraSentence) {
					if ((entity.from() > start) && (entity.to() < end))
						entityList.add(candidate);
				} else
					entityList.add(candidate);
			}
	  }
  } 
 }
 return entityList;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:36,代码来源:ResoluteAnaphora.java

示例12: ArabicTreeNormalizer

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
public ArabicTreeNormalizer(boolean retainNPTmp, boolean markPRDverb, boolean changeNoLabels,
    boolean retainNPSbj, boolean retainPPClr) {
  super(new ArabicTreebankLanguagePack());
  this.retainNPTmp = retainNPTmp;
  this.retainNPSbj = retainNPSbj;
  this.markPRDverb = markPRDverb;
  this.changeNoLabels = changeNoLabels;
  this.retainPPClr = retainPPClr;

  rootLabel = tlp.startSymbol();

  try {
    prdVerbPattern  = TregexPattern.compile("/^V[^P]/ > VP $ /-PRD$/=prd");

    prdPattern = Pattern.compile("^[A-Z]+-PRD");

    //Marks NP subjects that *do not* occur in verb-initial clauses
    npSbjPattern = TregexPattern.compile("/^NP-SBJ/ !> @VP");

  } catch(ParseException e) {
    e.printStackTrace();
    throw new RuntimeException();
  }

  emptyFilter = new ArabicEmptyFilter();
  lexMapper = new DefaultLexicalMapper();
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:28,代码来源:ArabicTreeNormalizer.java

示例13: getOperationFromReader

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
/**
 * Parses a tsurgeon script text input and compiles all operations in the
 * file into one tsurgeon pattern.
 *
 * @param reader File to read patterns from
 * @return A pair of a tregex and tsurgeon pattern read from a file
 * @throws IOException If any IO problem
 */
public static Pair<TregexPattern, TsurgeonPattern> getOperationFromReader(BufferedReader reader) throws IOException {
  String patternString = getPatternFromFile(reader);
  TregexPattern matchPattern;
  try {
    matchPattern = TregexPattern.compile(patternString);
  } catch (edu.stanford.nlp.trees.tregex.ParseException e) {
    System.err.println("Error parsing your tregex pattern:\n" + patternString);
    throw new RuntimeException(e);
  }

  TsurgeonPattern collectedPattern = getTsurgeonOperationsFromReader(reader);
  return new Pair<TregexPattern,TsurgeonPattern>(matchPattern,collectedPattern);
}
 
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:22,代码来源:Tsurgeon.java

示例14: extractEnumerations

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
/** Extract enumerations (A, B, and C) */
protected void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet){
  List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
  Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
  SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

  final String mentionPattern = "NP < (/^(?:NP|NNP|NML)/=m1 $.. (/^CC|,/ $.. /^(?:NP|NNP|NML)/=m2))";
  TregexPattern tgrepPattern = TregexPattern.compile(mentionPattern);
  TregexMatcher matcher = tgrepPattern.matcher(tree);
  Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
  while (matcher.find()) {
    matcher.getMatch();
    Tree m1 = matcher.getNode("m1");
    Tree m2 = matcher.getNode("m2");

    List<Tree> mLeaves = m1.getLeaves();
    int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
    int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
    spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);

    mLeaves = m2.getLeaves();
    beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
    endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
    spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
  }

  for(IntPair mSpan : spanToMentionSubTree.keySet()){
    if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
      int mentionID = assignIds? ++maxID:-1;
      Mention m = new Mention(mentionID, mSpan.get(0), mSpan.get(1), dependency,
                              new ArrayList<CoreLabel>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan));
      mentions.add(m);
      mentionSpanSet.add(mSpan);
    }
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:37,代码来源:RuleBasedCorefMentionFinder.java

示例15: checkPleonastic

import edu.stanford.nlp.trees.tregex.TregexPattern; //导入方法依赖的package包/类
private static boolean checkPleonastic(Mention m, Tree tree, String pattern) {
  try {
    TregexPattern tgrepPattern = TregexPattern.compile(pattern);
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
      Tree np1 = matcher.getNode("m1");
      if (((CoreLabel)np1.label()).get(CoreAnnotations.BeginIndexAnnotation.class)+1 == m.headWord.get(CoreAnnotations.IndexAnnotation.class)) {
        return true;
      }
    }
  } catch (Exception e) {
    e.printStackTrace();
  }
  return false;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:16,代码来源:RuleBasedCorefMentionFinder.java


注:本文中的edu.stanford.nlp.trees.tregex.TregexPattern.compile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。