本文整理汇总了Python中Core.ExampleUtils.appendExamples方法的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils.appendExamples方法的具体用法?Python ExampleUtils.appendExamples怎么用?Python ExampleUtils.appendExamples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Core.ExampleUtils
的用法示例。
在下文中一共展示了ExampleUtils.appendExamples方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildExamplesForSentences
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
def buildExamplesForSentences(self, sentences, goldSentences, output, idFileTag=None, append=False):
examples = []
counter = ProgressCounter(len(sentences), "Build examples")
if append:
outfile = open(output, "at")
else:
outfile = open(output, "wt")
exampleCount = 0
for i in range(len(sentences)):
sentence = sentences[i]
goldSentence = [None]
if goldSentences != None:
goldSentence = goldSentences[i]
counter.update(1, "Building examples (" + sentence[0].getSentenceId() + "): ")
examples = self.buildExamples(sentence[0], goldSentence[0], append=append)
exampleCount += len(examples)
examples = self.preProcessExamples(examples)
ExampleUtils.appendExamples(examples, outfile)
outfile.close()
print >>sys.stderr, "Examples built:", exampleCount
print >>sys.stderr, "Features:", len(self.featureSet.getNames())
# IF LOCAL
if self.exampleStats.getExampleCount() > 0:
self.exampleStats.printStats()
# ENDIF
# Save Ids
if idFileTag != None:
print >>sys.stderr, "Saving class names to", idFileTag + ".class_names"
self.classSet.write(idFileTag + ".class_names")
print >>sys.stderr, "Saving feature names to", idFileTag + ".feature_names"
self.featureSet.write(idFileTag + ".feature_names")
示例2: polynomizeExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
def polynomizeExamples(exampleFile, outFile, weightFeatures, idSet):
outFile = open(outFile, "wt")
addCount = 0
f = open(exampleFile)
numExamples = sum([1 for line in f])
f.close()
counter = ProgressCounter(numExamples, "Polynomize examples", step=0)
weightFeatureIds = {}
for weightFeature in weightFeatures:
wId = idSet.getId(weightFeature, False)
if wId == None:
sys.exit("Weight vector feature", weightFeature, "not in id file")
weightFeatureIds[weightFeature] = wId
print "Polynomizing", exampleFile
exampleCache = []
for example in ExampleUtils.readExamples(exampleFile):
counter.update(1, "Processing example ("+example[0]+"): ")
features = example[2]
for i in range(len(weightFeatures)-1):
wI = weightFeatures[i]
wIid = weightFeatureIds[wI]
if not features.has_key(wIid):
continue
for j in range(i + 1, len(weightFeatures)):
wJ = weightFeatures[j]
wJid = weightFeatureIds[wJ]
if not features.has_key(wJid):
continue
# Make polynomial feature
features[idSet.getId(wI + "_AND_" + wJ)] = 1
addCount += 1
exampleCache.append(example)
if len(exampleCache) > 50:
ExampleUtils.appendExamples(exampleCache, outFile)
exampleCache = []
ExampleUtils.appendExamples(exampleCache, outFile)
outFile.close()
print "Added", addCount, "polynomial features"
示例3: buildExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
def buildExamples(exampleBuilder, sentences, outfilename):
timer = Timer()
examples = []
if "graph_kernel" in exampleBuilder.styles:
counter = ProgressCounter(len(sentences), "Build examples", 0)
else:
counter = ProgressCounter(len(sentences), "Build examples")
calculatePredictedRange(exampleBuilder, sentences)
outfile = open(outfilename, "wt")
exampleCount = 0
for sentence in sentences:
counter.update(1, "Building examples ("+sentence[0].getSentenceId()+"): ")
examples = exampleBuilder.buildExamples(sentence[0])
exampleCount += len(examples)
examples = exampleBuilder.preProcessExamples(examples)
Example.appendExamples(examples, outfile)
outfile.close()
print >> sys.stderr, "Examples built:", str(exampleCount)
print >> sys.stderr, "Features:", len(exampleBuilder.featureSet.getNames())
print >> sys.stderr, "Elapsed", timer.toString()
示例4: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
#.........这里部分代码省略.........
makeExample = False
self.exampleStats.filter("genia_task1")
if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eI, eJ):
makeExample = False
self.exampleStats.filter("rel_limits")
if self.styles["co_limits"] and not self.isPotentialCOInteraction(eI, eJ, sentenceGraph):
makeExample = False
self.exampleStats.filter("co_limits")
if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eI, eJ, sentenceGraph):
makeExample = False
self.exampleStats.filter("bb_limits")
if categoryName != "neg":
self.exampleStats.filter("bb_limits(" + categoryName + ":" + eI.get("type") + "/" + eJ.get("type") + ")")
if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eI, eJ, sentenceGraph, self.exampleStats):
makeExample = False
#self.exampleStats.filter("bi_limits")
if self.styles["epi_limits"] and not self.isPotentialEPIInteraction(eI, eJ, sentenceGraph):
makeExample = False
self.exampleStats.filter("epi_limits")
if self.styles["id_limits"] and not self.isPotentialIDInteraction(eI, eJ, sentenceGraph):
makeExample = False
self.exampleStats.filter("id_limits")
# if self.styles["selftrain_limits"] and (eI.get("selftrain") == "False" or eJ.get("selftrain") == "False"):
# makeExample = False
# self.exampleStats.filter("selftrain_limits")
# if self.styles["selftrain_group"] and (eI.get("selftraingroup") not in self.selfTrainGroups or eJ.get("selftraingroup") not in self.selfTrainGroups):
# makeExample = False
# self.exampleStats.filter("selftrain_group")
if self.styles["pos_only"] and categoryName == "neg":
makeExample = False
self.exampleStats.filter("pos_only")
if makeExample:
#examples.append( self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ) )
ExampleUtils.appendExamples([self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ)], outfile)
exampleIndex += 1
self.exampleStats.endExample()
# define reverse
if self.styles["entities"]:
categoryName = self.getCategoryName(sentenceGraph, eJ, eI, True)
if goldGraph != None:
categoryName = self.getGoldCategoryName(goldGraph, entityToGold, eJ, eI, True)
else:
categoryName = self.getCategoryNameFromTokens(sentenceGraph, tJ, tI, True)
# make reverse
self.exampleStats.beginExample(categoryName)
makeExample = True
if self.styles["genia_limits"] and not self.isPotentialGeniaInteraction(eJ, eI):
makeExample = False
self.exampleStats.filter("genia_limits")
if self.styles["genia_task1"] and (eI.get("type") == "Entity" or eJ.get("type") == "Entity"):
makeExample = False
self.exampleStats.filter("genia_task1")
if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eJ, eI):
makeExample = False
self.exampleStats.filter("rel_limits")
if self.styles["co_limits"] and not self.isPotentialCOInteraction(eJ, eI, sentenceGraph):
makeExample = False
self.exampleStats.filter("co_limits")
if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eJ, eI, sentenceGraph):
makeExample = False
self.exampleStats.filter("bb_limits")
if categoryName != "neg":
self.exampleStats.filter("bb_limits(" + categoryName + ":" + eJ.get("type") + "/" + eI.get("type") + ")")
if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eJ, eI, sentenceGraph, self.exampleStats):
makeExample = False
示例5: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
#.........这里部分代码省略.........
elif text[j] == "/":
features[self.featureSet.getId("has_fslash")] = 1
elif text[j] == "\\":
features[self.featureSet.getId("has_bslash")] = 1
# duplets
if j > 0:
features[self.featureSet.getId("dt_"+text[j-1:j+1].lower())] = 1
# triplets
if j > 1:
features[self.featureSet.getId("tt_"+text[j-2:j+1].lower())] = 1
# quadruplets (don't work, slight decrease (0.5 pp) on f-score
#if j > 2:
# features[self.featureSet.getId("qt_"+text[j-3:j+1].lower())] = 1
# Attached edges (Hanging in and out edges)
t1InEdges = self.inEdgesByToken[token]
for edge in t1InEdges:
edgeType = edge[2].get("type")
features[self.featureSet.getId("t1HIn_"+edgeType)] = 1
features[self.featureSet.getId("t1HIn_"+edge[0].get("POS"))] = 1
features[self.featureSet.getId("t1HIn_"+edgeType+"_"+edge[0].get("POS"))] = 1
tokenText = sentenceGraph.getTokenText(edge[0])
features[self.featureSet.getId("t1HIn_"+tokenText)] = 1
features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenText)] = 1
tokenStem = PorterStemmer.stem(tokenText)
features[self.featureSet.getId("t1HIn_"+tokenStem)] = 1
features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenStem)] = 1
features[self.featureSet.getId("t1HIn_"+norStem+"_"+edgeType+"_"+tokenStem)] = 1
t1OutEdges = self.outEdgesByToken[token]
for edge in t1OutEdges:
edgeType = edge[2].get("type")
features[self.featureSet.getId("t1HOut_"+edgeType)] = 1
features[self.featureSet.getId("t1HOut_"+edge[1].get("POS"))] = 1
features[self.featureSet.getId("t1HOut_"+edgeType+"_"+edge[1].get("POS"))] = 1
tokenText = sentenceGraph.getTokenText(edge[1])
features[self.featureSet.getId("t1HOut_"+tokenText)] = 1
features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenText)] = 1
tokenStem = PorterStemmer.stem(tokenText)
features[self.featureSet.getId("t1HOut_"+tokenStem)] = 1
features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenStem)] = 1
features[self.featureSet.getId("t1HOut_"+norStem+"_"+edgeType+"_"+tokenStem)] = 1
# REL features
if self.styles["rel_features"]:
self.relFeatureBuilder.setFeatureVector(features)
self.relFeatureBuilder.buildAllFeatures(sentenceGraph.tokens, i)
self.relFeatureBuilder.setFeatureVector(None)
# DDI13 features
if self.styles["ddi13_features"]:
for index in range(len(normalizedText)):
features[self.featureSet.getId("ddi13_fromstart" + str(index) + "_" + normalizedText[:index+1])] = 1
features[self.featureSet.getId("ddi13_fromend" + str(index) + "_" + normalizedText[index:])] = 1
if self.styles["drugbank_features"]:
self.drugFeatureBuilder.setFeatureVector(features)
self.drugFeatureBuilder.tag = "ddi_"
self.drugFeatureBuilder.buildDrugFeatures(token)
self.drugFeatureBuilder.setFeatureVector(None)
#self.wordNetFeatureBuilder.getTokenFeatures("show", "VBP")
#tokTxt = token.get("text")
#tokPOS = token.get("POS")
#wordNetFeatures = []
#wordNetFeatures = self.wordNetFeatureBuilder.getTokenFeatures(tokTxt, tokPOS)
#self.wordNetFeatureBuilder.getTokenFeatures(tokTxt, tokPOS)
if self.styles["wordnet"]:
tokTxt = token.get("text")
tokPOS = token.get("POS")
wordNetFeatures = self.wordNetFeatureBuilder.getTokenFeatures(tokTxt, tokPOS)
for wordNetFeature in wordNetFeatures:
#print wordNetFeature,
features[self.featureSet.getId("WN_"+wordNetFeature)] = 1
#print
if self.styles["giuliano"]:
self.giulianoFeatureBuilder.setFeatureVector(features)
self.giulianoFeatureBuilder.buildTriggerFeatures(token, sentenceGraph)
self.giulianoFeatureBuilder.setFeatureVector(None)
extra = {"xtype":"token","t":token.get("id")}
if self.styles["bb_features"]:
extra["trigex"] = "bb" # Request trigger extension in ExampleWriter
if self.styles["epi_merge_negated"]:
extra["unmergeneg"] = "epi" # Request trigger type unmerging
if entityIds != None:
extra["goldIds"] = entityIds # The entities to which this example corresponds
#examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
# chains
self.buildChains(token, sentenceGraph, features)
if self.styles["pos_pairs"]:
self.buildPOSPairs(token, namedEntityHeadTokens, features)
example = (sentenceGraph.getSentenceId()+".x"+str(exampleIndex), category, features, extra)
ExampleUtils.appendExamples([example], outfile)
exampleIndex += 1
self.exampleStats.endExample()
#return examples
return exampleIndex
示例6: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph = None, structureAnalyzer=None):
"""
Build examples for a single sentence. Returns a list of examples.
See Core/ExampleUtils for example format.
"""
#examples = []
exampleIndex = 0
# example directionality
if self.styles["directed"] == None and self.styles["undirected"] == None: # determine directedness from corpus
examplesAreDirected = structureAnalyzer.hasDirectedTargets() if structureAnalyzer != None else True
elif self.styles["directed"]:
assert self.styles["undirected"] in [None, False]
examplesAreDirected = True
elif self.styles["undirected"]:
assert self.styles["directed"] in [None, False]
examplesAreDirected = False
if not self.styles["no_trigger_features"]:
self.triggerFeatureBuilder.initSentence(sentenceGraph)
if self.styles["evex"]:
self.evexFeatureBuilder.initSentence(sentenceGraph)
# if self.styles["sdb_merge"]:
# self.determineNonOverlappingTypes(structureAnalyzer)
# Filter entities, if needed
sentenceGraph.mergeInteractionGraph(True)
entities = sentenceGraph.mergedEntities
entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities))
# Connect to optional gold graph
entityToGold = None
if goldGraph != None:
entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities)
paths = None
if not self.styles["no_path"]:
undirected = sentenceGraph.dependencyGraph.toUndirected()
paths = undirected
if self.styles["filter_shortest_path"] != None: # For DDI use filter_shortest_path=conj_and
paths.resetAnalyses() # just in case
paths.FloydWarshall(self.filterEdge, {"edgeTypes":self.styles["filter_shortest_path"]})
# Generate examples based on interactions between entities or interactions between tokens
if self.styles["token_nodes"]:
loopRange = len(sentenceGraph.tokens)
else:
loopRange = len(entities)
for i in range(loopRange-1):
for j in range(i+1,loopRange):
eI = None
eJ = None
if self.styles["token_nodes"]:
tI = sentenceGraph.tokens[i]
tJ = sentenceGraph.tokens[j]
else:
eI = entities[i]
eJ = entities[j]
tI = sentenceGraph.entityHeadTokenByEntity[eI]
tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
if eI.get("type") == "neg" or eJ.get("type") == "neg":
continue
if self.styles["skip_extra_triggers"]:
if eI.get("source") != None or eJ.get("source") != None:
continue
# only consider paths between entities (NOTE! entities, not only named entities)
if self.styles["headsOnly"]:
if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0):
continue
examples = self.buildExamplesForPair(tI, tJ, paths, sentenceGraph, goldGraph, entityToGold, eI, eJ, structureAnalyzer, examplesAreDirected)
for categoryName, features, extra in examples:
# make example
if self.styles["binary"]:
if categoryName != "neg":
category = 1
else:
category = -1
extra["categoryName"] = "i"
else:
category = self.classSet.getId(categoryName)
example = [sentenceGraph.getSentenceId()+".x"+str(exampleIndex), category, features, extra]
ExampleUtils.appendExamples([example], outfile)
exampleIndex += 1
return exampleIndex
示例7: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
#.........这里部分代码省略.........
# release.
#
# Besides, using the classSet here generates an unneeded
# additional class, that shows up in evaluations etc. However, to be
# able to publish the exact models used for the publication experiments,
# this can't be fixed so it breaks feature id consistency. Therefore I'll
# now just remove the redundant class id from the classSet.
#ENDIF
#features[self.featureSet.getId(entityType)] = 1
features[self.featureSet.getId(namedEntityCountFeature)] = 1
features[self.featureSet.getId(entityCountFeature)] = 1
#for k,v in bagOfWords.iteritems():
# features[self.featureSet.getId(k)] = v
# pre-calculate bow _features_
features.update(bowFeatures)
# for j in range(len(sentenceGraph.tokens)):
# text = "bow_" + sentenceGraph.tokens[j].get("text")
# if j < i:
# features[self.featureSet.getId("bf_" + text)] = 1
# elif j > i:
# features[self.featureSet.getId("af_" + text)] = 1
# Main features
text = token.get("text")
features[self.featureSet.getId("txt_"+text)] = 1
features[self.featureSet.getId("POS_"+token.get("POS"))] = 1
stem = PorterStemmer.stem(text)
features[self.featureSet.getId("stem_"+stem)] = 1
features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1
if self.styles["speculation_words"]:
if text in self.specWords:
features[self.featureSet.getId("ent_spec")] = 1
if stem in self.specWordStems:
features[self.featureSet.getId("ent_spec_stem")] = 1
# Linear order features
for i in range(len(sentenceGraph.tokens)):
if token == sentenceGraph.tokens[i]:
break
for index in [-3,-2,-1,1,2,3]:
if i + index > 0 and i + index < len(sentenceGraph.tokens):
self.buildLinearOrderFeatures(sentenceGraph, i + index, str(index), features)
# Content
if i > 0 and text[0].isalpha() and text[0].isupper():
features[self.featureSet.getId("upper_case_start")] = 1
for j in range(len(text)):
if j > 0 and text[j].isalpha() and text[j].isupper():
features[self.featureSet.getId("upper_case_middle")] = 1
# numbers and special characters
if text[j].isdigit():
features[self.featureSet.getId("has_digits")] = 1
if j > 0 and text[j-1] == "-":
features[self.featureSet.getId("has_hyphenated_digit")] = 1
elif text[j] == "-":
features[self.featureSet.getId("has_hyphen")] = 1
elif text[j] == "/":
features[self.featureSet.getId("has_fslash")] = 1
elif text[j] == "\\":
features[self.featureSet.getId("has_bslash")] = 1
# duplets
if j > 0:
features[self.featureSet.getId("dt_"+text[j-1:j+1].lower())] = 1
# triplets
if j > 1:
features[self.featureSet.getId("tt_"+text[j-2:j+1].lower())] = 1
# Attached edges (Hanging in and out edges)
t1InEdges = self.inEdgesByToken[token]
for edge in t1InEdges:
edgeType = edge[2].get("type")
features[self.featureSet.getId("t1HIn_"+edgeType)] = 1
features[self.featureSet.getId("t1HIn_"+edge[0].get("POS"))] = 1
features[self.featureSet.getId("t1HIn_"+edgeType+"_"+edge[0].get("POS"))] = 1
tokenText = sentenceGraph.getTokenText(edge[0])
features[self.featureSet.getId("t1HIn_"+tokenText)] = 1
features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenText)] = 1
t1OutEdges = self.outEdgesByToken[token]
for edge in t1OutEdges:
edgeType = edge[2].get("type")
features[self.featureSet.getId("t1HOut_"+edgeType)] = 1
features[self.featureSet.getId("t1HOut_"+edge[1].get("POS"))] = 1
features[self.featureSet.getId("t1HOut_"+edgeType+"_"+edge[1].get("POS"))] = 1
tokenText = sentenceGraph.getTokenText(edge[1])
features[self.featureSet.getId("t1HOut_"+tokenText)] = 1
features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenText)] = 1
self.buildChains(token, sentenceGraph, features)
extra = {"xtype":"task3","t3type":task3Type,"t":token.get("id"),"entity":entity.get("id")}
#examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
example = (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra)
ExampleUtils.appendExamples([example], outfile)
exampleIndex += 1
self.exampleStats.endExample()
#return examples
return exampleIndex
示例8: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None, structureAnalyzer=None):
"""
Build one example for each phrase in the sentence
"""
self.triggerFeatureBuilder.initSentence(sentenceGraph)
# examples = []
exampleIndex = 0
# Prepare phrases, create subphrases
# filter = set(["NP", "TOK-IN", "WHADVP", "WHNP", "TOK-WP$", "TOK-PRP$", "NP-IN"])
phrases = MapPhrases.getPhrases(sentenceGraph.parseElement, sentenceGraph.tokens, set(["NP", "WHADVP", "WHNP"]))
phraseDict = MapPhrases.getPhraseDict(phrases)
phrases.extend(MapPhrases.makeINSubPhrases(phrases, sentenceGraph.tokens, phraseDict, ["NP"]))
phrases.extend(MapPhrases.makeTokenSubPhrases(sentenceGraph.tokens, phraseDict))
phraseToEntity = MapPhrases.getPhraseEntityMapping(sentenceGraph.entities, phraseDict)
# Make counts
phraseTypeCounts = MapPhrases.getPhraseTypeCounts(phrases)
for key in phraseTypeCounts.keys():
if not self.phraseTypeCounts.has_key(key):
self.phraseTypeCounts[key] = 0
self.phraseTypeCounts[key] += phraseTypeCounts[key]
self.exampleStats.addVariable(
"Phrase type counts", self.phraseTypeCounts
) # can be added on each loop, will always point to the same thing
# Build one example for each phrase
for phrase in phrases:
features = {}
self.triggerFeatureBuilder.setFeatureVector(features)
categoryName = self.getCategoryName(phrase, phraseToEntity)
category = self.classSet.getId(categoryName)
phraseTokens = self.getPhraseTokens(phrase, sentenceGraph)
phraseHeadToken = self.getPhraseHeadToken(phrase, phraseTokens)
self.exampleStats.beginExample(categoryName)
if self.styles["co_limits"] and not self.isPotentialCOTrigger(phrase, phraseTokens, sentenceGraph):
self.exampleStats.filter("co_limits")
self.exampleStats.endExample()
continue
# Sentence level features
features.update(self.triggerFeatureBuilder.bowFeatures)
# Whole phrase features
self.buildLinearNGram(phraseTokens, sentenceGraph, features)
features[self.featureSet.getId("pType_" + phrase.get("type"))] = 1
for split in phrase.get("type").split("-"):
features[self.featureSet.getId("pSubType_" + split)] = 1
# Check named entities
nameCount = 0
for token in phraseTokens:
if sentenceGraph.tokenIsName[token]:
nameCount += 1
features[self.featureSet.getId("phraseNames_" + str(nameCount))] = 1
features[self.featureSet.getId("phraseNameCount")] = nameCount
# Head token features
self.triggerFeatureBuilder.setTag("head_")
self.triggerFeatureBuilder.buildFeatures(phraseHeadToken)
self.triggerFeatureBuilder.buildAttachedEdgeFeatures(phraseHeadToken, sentenceGraph)
self.triggerFeatureBuilder.setTag()
# Features for all phrase tokens
self.triggerFeatureBuilder.setTag("ptok_")
phraseTokenPos = 0
# print len(phraseTokens)
for token in phraseTokens:
self.triggerFeatureBuilder.setTag("ptok_")
self.triggerFeatureBuilder.buildFeatures(phraseHeadToken, linear=False, chains=False)
self.triggerFeatureBuilder.setTag("ptok_" + str(phraseTokenPos) + "_")
self.triggerFeatureBuilder.buildFeatures(phraseHeadToken, linear=False, chains=False)
self.triggerFeatureBuilder.setTag("ptok_" + str(phraseTokenPos - len(phraseTokens)) + "_")
self.triggerFeatureBuilder.buildFeatures(phraseHeadToken, linear=False, chains=False)
# self.triggerFeatureBuilder.buildAttachedEdgeFeatures(phraseHeadToken)
phraseTokenPos += 1
self.triggerFeatureBuilder.setTag()
extra = {
"xtype": "phrase",
"t": phraseHeadToken.get("id"),
"p": phrase.get("id"),
"ptype": phrase.get("type"),
}
extra["charOffset"] = phrase.get("charOffset")
if phrase not in phraseToEntity:
extra["eids"] = "neg"
else:
extra["eids"] = ",".join([x.get("id") for x in phraseToEntity[phrase]])
example = (sentenceGraph.getSentenceId() + ".x" + str(exampleIndex), category, features, extra)
ExampleUtils.appendExamples([example], outfile)
self.exampleStats.endExample()
exampleIndex += 1
# Mark missed entities in exampleStats
linkedEntities = set(sum(phraseToEntity.values(), []))
for entity in sentenceGraph.entities:
if entity.get("given") != "True" and entity not in linkedEntities:
self.exampleStats.addValue("Entities with no phrase", 1)
#.........这里部分代码省略.........
示例9: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
#.........这里部分代码省略.........
#undirected = self.nxMultiDiGraphToUndirected(sentenceGraph.dependencyGraph)
#paths = NX10.all_pairs_shortest_path(undirected, cutoff=999)
undirected = sentenceGraph.dependencyGraph.toUndirected()
paths = undirected
# Get argument order
self.interactionLenghts = self.getInteractionEdgeLengths(sentenceGraph, paths)
# Map tokens to character offsets
tokenByOffset = {}
for i in range(len(sentenceGraph.tokens)):
token = sentenceGraph.tokens[i]
if goldGraph != None: # check that the tokenizations match
goldToken = goldGraph.tokens[i]
assert token.get("id") == goldToken.get("id") and token.get("charOffset") == goldToken.get("charOffset")
tokenByOffset[token.get("charOffset")] = token.get("id")
# Map gold entities to their head offsets
goldEntitiesByOffset = {}
if goldGraph != None:
for entity in goldGraph.entities:
offset = entity.get("headOffset")
assert offset != None
if not goldEntitiesByOffset.has_key(offset):
goldEntitiesByOffset[offset] = []
goldEntitiesByOffset[offset].append(entity)
# Generate examples based on interactions between entities or interactions between tokens
# interactionsByEntityId = {}
# for entity in sentenceGraph.entities:
# interactionsByEntityId[entity.get("id")] = []
# for interaction in sentenceGraph.interactions:
# if interaction.get("type") == "neg":
# continue
# e1Id = interaction.get("e1")
# interactionsByEntityId[e1Id].append(interaction)
if self.styles["no_merge"]:
mergeInput = False
entities = sentenceGraph.entities
else:
mergeInput = True
sentenceGraph.mergeInteractionGraph(True)
entities = sentenceGraph.mergedEntities
exampleIndex = 0
for entity in entities: # sentenceGraph.entities:
eType = entity.get("type")
assert eType != None, entity.attrib
eType = str(eType)
#if eType not in ["Binding", "Positive_regulation", "Negative_regulation", "Regulation"]:
# continue
#if not goldEntitiesByOffset.has_key(entity.get("headOffset")):
# continue
#interactions = interactionsByEntityId[entity.get("id")]
interactions = [x[2] for x in sentenceGraph.getOutInteractions(entity, mergeInput)]
argCombinations = self.getArgumentCombinations(eType, interactions, entity.get("id"))
#if len(argCombinations) <= 1:
# continue
assert argCombinations != None, (entity.get("id"), entity.get("type"))
for argCombination in argCombinations:
if eType != "Process":
assert len(argCombination) > 0, eType + ": " + str(argCombinations)
# Originally binary classification
if goldGraph != None:
isGoldEvent = self.eventIsGold(entity, argCombination, sentenceGraph, goldGraph, goldEntitiesByOffset)
#if eType == "Binding":
# print argCombination[0].get("e1"), len(argCombination), isGoldEvent
else:
isGoldEvent = False
# Named (multi-)class
if isGoldEvent:
#category = "event"
category = eType
if category.find("egulation") != -1:
category = "All_regulation"
elif category != "Binding":
category = "Other" #"simple6"
else:
category = "neg"
features = {}
argString = ""
for arg in argCombination:
argString += "," + arg.get("id")
extra = {"xtype":"um","e":entity.get("id"),"i":argString[1:],"etype":eType,"class":category}
assert type(extra["etype"]) == types.StringType, extra
self.exampleStats.addExample(category)
example = self.buildExample(sentenceGraph, paths, entity, argCombination, interactions)
example[0] = sentenceGraph.getSentenceId()+".x"+str(exampleIndex)
example[1] = self.classSet.getId(category)
example[3] = extra
#examples.append( example )
ExampleUtils.appendExamples([example], outfile)
exampleIndex += 1
#return examples
return exampleIndex
示例10: buildExamplesFromGraph
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import appendExamples [as 别名]
#.........这里部分代码省略.........
else: # intersentence
validInteractionsByType[interaction.get("type")].append(interaction)
interactionCounts[interaction.get("type")] += 1
interactionCountString = ",".join([key + "=" + str(interactionCounts[key]) for key in sorted(interactionCounts.keys())])
#argCombinations = self.getArgumentCombinations(eType, interactions, entity.get("id"))
intCombinations = []
validIntTypeCount = 0
maxArgCount = 0
if self.debug:
print >> sys.stderr, entity.get("id"), entity.get("type"), "int:" + interactionCountString, "validInt:" + str(validInteractionsByType)
for intType in sorted(validInteractionsByType.keys()): # for each argument type the event can have
validIntTypeCount += 1
intCombinations.append([])
minArgs, maxArgs = structureAnalyzer.getArgLimits(entity.get("type"), intType)
if maxArgs > maxArgCount:
maxArgCount = maxArgs
#if maxArgs > 1: # allow any number of arguments for cases like Binding
# maxArgs = len(validInteractionsByType[intType])
for combLen in range(minArgs, maxArgs+1): # for each valid argument count, get all possible combinations. note that there may be zero-lenght combination
for singleTypeArgCombination in combinations(validInteractionsByType[intType], combLen):
intCombinations[-1].append(singleTypeArgCombination)
# e.g. theme:[a,b], cause:[d] = [[
# intCombinations now contains a list of lists, each of which has a tuple for each valid combination
# of one argument type. Next, we'll make all valid combinations of multiple argument types
if self.debug:
print >> sys.stderr, " ", "intCombinations", intCombinations
argCombinations = combine.combine(*intCombinations)
if self.debug:
print >> sys.stderr, " ", "argCombinations", argCombinations
for i in range(len(argCombinations)):
argCombinations[i] = sum(argCombinations[i], ())
#sum(argCombinations, []) # flatten nested list
if self.debug:
print >> sys.stderr, " ", "argCombinations flat", argCombinations
for argCombination in argCombinations:
# Originally binary classification
if goldGraph != None:
isGoldEvent = self.eventIsGold(entity, argCombination, sentenceGraph, goldGraph, goldEntitiesByOffset, goldGraph.interactions)
#if eType == "Binding":
# print argCombination[0].get("e1"), len(argCombination), isGoldEvent
else:
isGoldEvent = False
# Named (multi-)class
if isGoldEvent:
# category = "zeroArg"
# if validIntTypeCount == 1:
# category = "singleArg" # event has 0-1 arguments (old simple6)
# if validIntTypeCount > 1:
# category = "multiType" # event has arguments of several types, 0-1 of each (old Regulation)
# if maxArgCount > 1:
# category = "multiArg" # event can have 2-n of at least one argument type (old Binding)
if self.styles["binary"]:
category = "pos"
else:
category = entity.get("type")
assert category != None
else:
category = "neg"
self.exampleStats.beginExample(category)
issues = defaultdict(int)
# early out for proteins etc.
if validIntTypeCount == 0 and entity.get("given") == "True":
self.exampleStats.filter("given-leaf:" + entity.get("type"))
if self.debug:
print >> sys.stderr, " ", category +"("+eType+")", "arg combination", argCombination, "LEAF"
elif structureAnalyzer.isValidEntity(entity) or structureAnalyzer.isValidEvent(entity, argCombination, self.documentEntitiesById, noUpperLimitBeyondOne=self.styles["no_arg_count_upper_limit"], issues=issues):
if self.debug:
print >> sys.stderr, " ", category, "arg combination", argCombination, "VALID"
argString = ""
for arg in argCombination:
argString += "," + arg.get("type") + "=" + arg.get("id")
extra = {"xtype":"um","e":entity.get("id"),"i":argString[1:],"etype":eType,"class":category}
extra["allInt"] = interactionCountString
assert type(extra["etype"]) in types.StringTypes, extra
assert type(extra["class"]) in types.StringTypes, category
assert type(extra["i"]) in types.StringTypes, argString
example = self.buildExample(sentenceGraph, paths, entity, argCombination, interactions)
example[0] = sentenceGraph.getSentenceId()+".x"+str(exampleIndex)
example[1] = self.classSet.getId(category)
example[3] = extra
#examples.append( example )
ExampleUtils.appendExamples([example], outfile)
exampleIndex += 1
else: # not a valid event or valid entity
if len(issues) == 0: # must be > 0 so that it gets filtered
if not structureAnalyzer.isValidEntity(entity):
issues["INVALID_ENTITY:"+eType] += 1
else:
issues["UNKNOWN_ISSUE_FOR:"+eType] += 1
for key in issues:
self.exampleStats.filter(key)
if self.debug:
print >> sys.stderr, " ", category, "arg combination", argCombination, "INVALID", issues
self.exampleStats.endExample()
#return examples
return exampleIndex