当前位置: 首页>>代码示例>>Python>>正文


Python ParentedTree.convert方法代码示例

本文整理汇总了Python中nltk.tree.ParentedTree.convert方法的典型用法代码示例。如果您正苦于以下问题:Python ParentedTree.convert方法的具体用法?Python ParentedTree.convert怎么用?Python ParentedTree.convert使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tree.ParentedTree的用法示例。


在下文中一共展示了ParentedTree.convert方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: assign_slots

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def assign_slots(tokens, tag_tree, word_tree):
    stopword_list = stopwords.words('english')
    tokens_with_slot_tags = []
    word_tree = ParentedTree.convert(word_tree)
    tag_tree = ParentedTree.convert(tag_tree)
    word_tree_with_cats = tag_words_with_categories(word_tree)
    tag_tree_with_cats = tag_words_with_categories(tag_tree)
    for i, word in enumerate(tokens):
        tag = finalize_tags(i, word, tag_tree_with_cats, word_tree_with_cats) 
        tokens_with_slot_tags.append((word, tag))
    found_query_focus = False
    for i, item in enumerate(tokens_with_slot_tags):
        word, tag = item
        if tag in ['USER','MEDIA','NETWORK'] and not found_query_focus:
            tokens_with_slot_tags[i] = (word, 'SEARCH')
            found_query_focus = True
        elif tag == UNK:
            tokens_with_slot_tags[i] = (word, 'KEYWORD')
    slots = {}
    for word, tag in tokens_with_slot_tags:
        if tag == 'SKIP':
            continue
        elif tag == 'KEYWORD':
            if 'KEYWORDS' not in slots:
                slots['KEYWORDS'] = []
            if word not in stopword_list and word not in PUNCTUATION:
                slots['KEYWORDS'].append(word)
        else:
            if tag not in slots:
                slots[tag] = word
            else:
                previous_words = slots[tag]
                slots[tag] = ' '.join([previous_words, word])
    return slots
开发者ID:sophiavanvalkenburg,项目名称:queryparser,代码行数:36,代码来源:queryparser.py

示例2: syntax_similarity_two_documents

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
 def syntax_similarity_two_documents(self, doc1, doc2, average=False): #syntax similarity of two single documents
     global numnodes
     doc1sents = self.sent_detector.tokenize(doc1.strip())
     doc2sents = self.sent_detector.tokenize(doc2.strip())
     for s in doc1sents: # to handle unusual long sentences.
         if len(s.split())>100:
             return "NA"
     for s in doc2sents:
         if len(s.split())>100:
             return "NA"
     try: #to handle parse errors. Parser errors might happen in cases where there is an unsuall long word in the sentence.
         doc1parsed = self.parser.raw_parse_sents((doc1sents))
         doc2parsed = self.parser.raw_parse_sents((doc2sents))
     except Exception as e:
         sys.stderr.write(str(e))
         return "NA"
     costMatrix = []
     doc1parsed = list(doc1parsed)
     for i in range(len(doc1parsed)):
         doc1parsed[i] = list(doc1parsed[i])[0]
     doc2parsed = list(doc2parsed)
     for i in range(len(doc2parsed)):
         doc2parsed[i] = list(doc2parsed[i])[0]
     for i in range(len(doc1parsed)):
         numnodes = 0
         sentencedoc1 = ParentedTree.convert(doc1parsed[i])
         tempnode = Node(sentencedoc1.root().label())
         new_sentencedoc1 = self.convert_mytree(sentencedoc1,tempnode)
         temp_costMatrix = []
         sen1nodes = numnodes
         for j in range(len(doc2parsed)):
             numnodes=0.0
             sentencedoc2 = ParentedTree.convert(doc2parsed[j])
             tempnode = Node(sentencedoc2.root().label())
             new_sentencedoc2 = self.convert_mytree(sentencedoc2,tempnode)
             ED = simple_distance(new_sentencedoc1, new_sentencedoc2)
             ED = ED / (numnodes + sen1nodes)
             temp_costMatrix.append(ED)
         costMatrix.append(temp_costMatrix)
     costMatrix = np.array(costMatrix)
     if average==True:
         return 1-np.mean(costMatrix)
     else:
         indexes = su.linear_assignment(costMatrix)
         total = 0
         rowMarked = [0] * len(doc1parsed)
         colMarked = [0] * len(doc2parsed)
         for row, column in indexes:
             total += costMatrix[row][column]
             rowMarked[row] = 1
             colMarked [column] = 1
         for k in range(len(rowMarked)):
             if rowMarked[k]==0:
                 total+= np.min(costMatrix[k])
         for c in range(len(colMarked)):
             if colMarked[c]==0:
                 total+= np.min(costMatrix[:,c])
         maxlengraph = max(len(doc1parsed),len(doc2parsed))
         return 1-(total/maxlengraph)
开发者ID:USC-CSSL,项目名称:CASSIM,代码行数:61,代码来源:Cassim.py

示例3: add_tree

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
    def add_tree(self, datum):
        # parse tree and binarize
        tree = Tree.fromstring(datum["raw_tree"])
        tree.chomsky_normal_form()
        tree.collapse_unary(collapsePOS=True)
        tree = ParentedTree.convert(tree)

        # assign indices to subtrees
        indices = {}
        counter = 0
        for t in tree.subtrees():
            indices[t.treeposition()] = counter
            counter += 1

        # generate parent pointers and labels
        # (labels = one instance of sent in sents by treelstm terminology)
        parents = [0] * (counter - 1)
        labels = []
        counter = 0
        for t in tree.subtrees():
            parent = t.parent()
            if parent != None:
                parents[counter] = indices[parent.treeposition()]
                counter += 1
            if type(t[0]) is str or type(t[0]) is unicode: labels.append(t[0])

        self.parents_file.write(" ".join(map(str, parents)) + "\n")
        self.sents_file.write(" ".join(labels) + "\n")
        self.trees.append(datum)
        return len(self.trees) - 1 # ID
开发者ID:Jasmeet107,项目名称:serapis,代码行数:32,代码来源:predict.py

示例4: syntax_similarity_conversation

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
 def syntax_similarity_conversation(self, documents1, average=False): #syntax similarity of each document with its before and after document
     global numnodes
     documents1parsed = []
     for d1 in range(len(documents1)):
         sys.stderr.write(str(d1)+"\n")
         # print documents1[d1]
         tempsents = (self.sent_detector.tokenize(documents1[d1].strip()))
         for s in tempsents:
             if len(s.split())>100:
                 documents1parsed.append("NA")
                 break
         else:
             temp = list(self.parser.raw_parse_sents((tempsents)))
             for i in range(len(temp)):
                 temp[i] = list(temp[i])[0]
                 temp[i] = ParentedTree.convert(temp[i])
             documents1parsed.append(list(temp))
     results = OrderedDict()
     for d1 in range(len(documents1parsed)):
         d2 = d1+1
         if d2 == len(documents1parsed):
             break
         if documents1parsed[d1] == "NA" or documents1parsed[d2]=="NA":
             continue
         costMatrix = []
         for i in range(len(documents1parsed[d1])):
             numnodes = 0
             tempnode = Node(documents1parsed[d1][i].root().label())
             new_sentencedoc1 = self.convert_mytree(documents1parsed[d1][i],tempnode)
             temp_costMatrix = []
             sen1nodes = numnodes
             for j in range(len(documents1parsed[d2])):
                 numnodes=0.0
                 tempnode = Node(documents1parsed[d2][j].root().label())
                 new_sentencedoc2 = self.convert_mytree(documents1parsed[d2][j],tempnode)
                 ED = simple_distance(new_sentencedoc1, new_sentencedoc2)
                 ED = ED / (numnodes + sen1nodes)
                 temp_costMatrix.append(ED)
             costMatrix.append(temp_costMatrix)
         costMatrix = np.array(costMatrix)
         if average==True:
             return 1-np.mean(costMatrix)
         else:
             indexes = su.linear_assignment(costMatrix)
             total = 0
             rowMarked = [0] * len(documents1parsed[d1])
             colMarked = [0] * len(documents1parsed[d2])
             for row, column in indexes:
                 total += costMatrix[row][column]
                 rowMarked[row] = 1
                 colMarked [column] = 1
             for k in range(len(rowMarked)):
                 if rowMarked[k]==0:
                     total+= np.min(costMatrix[k])
             for c in range(len(colMarked)):
                 if colMarked[c]==0:
                     total+= np.min(costMatrix[:,c])
             maxlengraph = max(len(documents1parsed[d1]),len(documents1parsed[d2]))
             results[(d1,d2)] = 1-total/maxlengraph#, minWeight/minlengraph, randtotal/lengraph
     return results
开发者ID:USC-CSSL,项目名称:CASSIM,代码行数:62,代码来源:Cassim.py

示例5: merge_tree_nnps

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def merge_tree_nnps(tree):
    """
    Takes a parse tree and merges any consecutive leaf nodes that come from NNPs
    For example if there is a segment of:
        (NP
            (JJ old)
            (NNP Pierre)
            (NNP Vinken)
        )
    Returns:
        (NP
            (JJ old)
            (NNP PierreVinken)
        )
    """

    # require a parented tree to get a subtrees tree position
    p = ParentedTree.convert(tree)

    # iterates subtrees of height 3. This is where NP's leading to NNP's leading to lexicalizations will be
    for s in p.subtrees(filter=lambda s: s.height() == 3):
        # merge NNP's in the list representation of this trees children: [(POS, word), ...] 
        new_noun_phrase = merge_tagged_nnps([(c.label(), c[0]) for c in s])
        child_str = " ".join("(%s %s)" % (pos, word) for pos, word in new_noun_phrase)
        # create new subtree with merged NNP's
        new_s = ParentedTree.fromstring("(%s %s)" % (s.label(), child_str))

        # replace old subtree with new subtree
        p[s.treeposition()] = new_s
    return Tree.convert(p)
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:32,代码来源:preprocess.py

示例6: is_pred_nominal

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def is_pred_nominal(feats):
    """WORKS"""
    if feats.sentence != feats.sentence_ref:
        return "is_pred_nominal={}".format(False)
    else:
        s_tree = ParentedTree.convert(TREES_DICTIONARY[feats.article+".raw"][int(feats.sentence)])
        NP_i = __get_parent_tree__(feats.token, s_tree)
        NP_j = __get_parent_tree__(feats.token_ref,s_tree)
        nominal= __get_max_projection__(s_tree,NP_j)
        copula_verbs = ["is","are","were","was","am"]
        def check_nominal_construction(tree):
            found = False
            for t in tree:
                if found:
                    break
                elif isinstance(t, ParentedTree):
                    if t == NP_i:
                        brother = t.right_sibling()
                        if isinstance(brother,ParentedTree) and brother.node == "VP":
                            verb = brother.leaves()[0]
                            if verb in copula_verbs:
                                for subtree in brother:
                                    if subtree == nominal:
                                        found = True
                                        break
                    else:
                        found = check_nominal_construction(t)
            return found

        return "is_pred_nominal={}".format(check_nominal_construction(s_tree))
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:32,代码来源:feature_functions.py

示例7: j_is_subject

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def j_is_subject(feats):
    "WORKS"
    sentence_tree = TREES_DICTIONARY[feats.article+".raw"][int(feats.sentence_ref)]
    ptree = ParentedTree.convert(sentence_tree)
    parent = __get_parent_tree__(feats.token_ref, ptree)
    j_subject = __is_subject__(ptree,feats.token_ref, parent,ptree)
    return "j_is_subject={}".format(j_subject)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:9,代码来源:feature_functions.py

示例8: parse_sentences

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
 def parse_sentences(self, filename, num_sentences):
     """Parses each one-line sentence into a syntax tree"""
     # Open the file and parse a given number of sentences
     f = open(filename, 'r')
     if num_sentences == 'all':
         num_sentences = -1
     count = 0
     for sentence in f.readlines()[:num_sentences]:
         if count%10==0:
             print("Number of sentences trained: ",count)
         # Get possible parse trees
         trees = self.parser.raw_parse(sentence.lower())
         for tree in trees:
             self.nonterminal_counts['ROOT'] += 1
             tokenized_sentence = self.tokenize_sentence(sentence)
             # Only extract rules from sentences with greater than 8 tokens,
             # to avoid adding rules that generate short, ungrammatical sentences
             if len(tokenized_sentence) > 8:
                 self.extract_rules(tree)
             # Convert the tree into a ParentedTree, 
             # which is an NLTK tree that keeps pointers to each node's parent
             ptree = ParentedTree.convert(tree)
             # Calculate the bigram counts for this sentence
             self.get_bigram(ptree, tokenized_sentence)
         count+=1
开发者ID:phuongkhdinh,项目名称:NaturalLanguageProcessing_F2015,代码行数:27,代码来源:training.py

示例9: apposition

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def apposition(feats): #this was driving me MAD....I SHOULD CORRECT THE STYLE...aarrrrggghhshs
    """WORKS WITH THE EXAMPLES IN UNITTEST, HOPE THEY WERE NOT A COINDIDENCE"""
    if feats.sentence!=feats.sentence_ref:
        return "apposition={}".format(False)
    else:
        sentence_tree = TREES_DICTIONARY[feats.article+".raw"][int(feats.sentence_ref)]
        ptree = ParentedTree.convert(sentence_tree)
        token_ref = set(feats.token_ref.split("_"))
        token = set(feats.token.split("_"))
        def is_j_apposition(curr_tree):
                found = False
                for child in curr_tree:
                    if found:
                        break
                    elif isinstance(child, ParentedTree):
                        child_leaves = set(child.leaves())
                        conditions = len(token_ref.intersection(child_leaves))>0 and curr_tree.node == "NP"
                        if conditions:
                            brother = child.left_sibling()
                            if isinstance(brother, ParentedTree) and brother.node == ",":
                                antecedent = brother.left_sibling()
                                if isinstance(antecedent,ParentedTree):
                                    previous_words = set(antecedent.leaves())
                                    if len(token.intersection(previous_words))>0:
                                        found = True
                        else:
                            found = is_j_apposition(child)

                return found
        return "apposition={}".format(is_j_apposition(ptree))
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:32,代码来源:feature_functions.py

示例10: span

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def span(feats):
    """WORKS"""
    if feats.sentence != feats.sentence_ref:
        return "span={}".format(False)
    else:
        s_tree = ParentedTree.convert(TREES_DICTIONARY[feats.article+".raw"][int(feats.sentence)])
        i_parent = __get_parent_tree__(feats.token, s_tree)
        j_parent = __get_parent_tree__(feats.token_ref,s_tree)
        return "span={}".format(i_parent==j_parent)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:11,代码来源:feature_functions.py

示例11: update

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
	def update(self,syntax_tree):
		ptree = ParentedTree.convert(syntax_tree)
		bad_words = [":",",",".","?",";"]
		for leaf in get_leaves(ptree):
			word = leaf[0]
			if word in bad_words:
				continue
			if not word in self:
				self.add_node(word,num=0,pos=leaf.pos()[0][1])
			self.node[word]["num"] += 1
		central_leaf = None
		for leaf in get_leaves(ptree):
			if leaf[0] == self.target_word:
				central_leaf = leaf
				break
		if not central_leaf:
			print "Error: target word not in sentence"
		for leaf in get_leaves(ptree):
			word = leaf[0]
			if word in bad_words:
				continue
			if word == self.target_word:
				for other_leaf in get_leaves(ptree):
					other_word = other_leaf[0]
					if word == other_word:
						continue
					if other_word in bad_words:
						continue
					if not (word,other_word) in self.edges():
						self.add_edge(word,other_word,weight=0)
					self.edge[word][other_word]["weight"] += 1.0/math.sqrt(get_distance(leaf,other_leaf))
			else:
				for other_leaf in get_leaves(ptree):
					other_word = other_leaf[0]
					if word == other_word:
						continue
					if other_word == self.target_word:
						continue
					if other_word in bad_words:
						continue
					if not (word,other_word) in self.edges():
						self.add_edge(word,other_word,weight=0)
					self.edge[word][other_word]["weight"] += 1.0/math.pow(
						get_distance(leaf,other_leaf)*
						get_distance(leaf,central_leaf)*
						get_distance(other_leaf,central_leaf),1/float(3)
						)
		self.invalidate_cache()
开发者ID:daniel-bulger,项目名称:word-sense-disambiguator,代码行数:50,代码来源:graph.py

示例12: same_max_NP

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def same_max_NP(feats):
    """WORKS"""
    if feats.sentence !=  feats.sentence_ref:
        return "same_max_NP={}".format(False)
    else:
        sentence_tree = TREES_DICTIONARY[feats.article+".raw"][int(feats.sentence)]
        ptree = ParentedTree.convert(sentence_tree)
        parent1 = __get_parent_tree__(feats.token, ptree)
        parent2 = __get_parent_tree__(feats.token_ref, ptree)
        #print "parent of: ", feats.token, ":", parent1
        #print "parent of: ", feats.token_ref, ":", parent2
        max_p_i = __get_max_projection__(ptree,parent1)
        max_p_j = __get_max_projection__(ptree, parent2)
        if max_p_i is not None and max_p_j is not None:
            both_NPs = max_p_i.node == "NP" and max_p_j.node == "NP"
        else:
            both_NPs = False
        return "same_max_NP={}".format(max_p_i == max_p_j and both_NPs)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:20,代码来源:feature_functions.py

示例13: read_treebank_files

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
def read_treebank_files(files, extractor,fe):
    """Read the listed treebank files and collect function tagging examples
    from each tree.

    The user-provided feature extractor is applied to each phrase in each 
    tree. The extracted feature dicts and the true function tags for each
    phrase are stored in two separate lists, which are returned.
    """
    X = []
    Y = []
    for filename in files:
        scount = 0
        for tree in treebank.parsed_sents(filename):
            tree = ParentedTree.convert(tree)
            treebank_helper.postprocess(tree)
            find_examples_in_tree(tree, X, Y, extractor,fe, filename, scount, 0)
            scount += 1
    return X, Y
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:20,代码来源:funtag.py

示例14: parse_sentences

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
 def parse_sentences(self, filename, num_sentences):
     """Parse each sentence into a tree"""
     f = open(filename, 'r')
     if num_sentences == 'all':
         num_sentences = -1
     count = 0
     for sentence in f.readlines()[:num_sentences]:
         if count%10==0:
             print(count)
         trees = self.parser.raw_parse(sentence.lower())
         for tree in trees:
             self.nonterminal_counts['ROOT'] += 1
             tokenized_sentence = self.tokenize_sentence(sentence)
             if len(tokenized_sentence) > 5:
                 self.extract_rules(tree)
             ptree = ParentedTree.convert(tree)
             #print(type(ptree))
             self.get_bigram(ptree, tokenized_sentence)
         count+=1
开发者ID:phuongkhdinh,项目名称:NaturalLanguageProcessing_F2015,代码行数:21,代码来源:parse.py

示例15: get_arginstances

# 需要导入模块: from nltk.tree import ParentedTree [as 别名]
# 或者: from nltk.tree.ParentedTree import convert [as 别名]
	def get_arginstances(self, _pbi) :
		'''
			returns a list of ARGInstances given a PropbankInstance and according to self.features
			
			Each feature is normalized according to the rules in its if-block.
			
			parameters :
				_pbi PropBankInstance
					a PropbankInstance that contains the arguments to be extracted
			return value :
				list of ARGInstances
		'''
		res = []
		for arg in _pbi.arguments : # iterate through all arguments in _pbi
			argfeatures = {}
			
			# predicate feature
			if 'predicate' in self.features :
				argfeatures['predicate'] = re.sub(r'(\w+)\..+', r'\1', _pbi.roleset) # lemmatize the predicate and then set
				# argfeatures['predicate'] = self.wnl.lemmatize(_pbi.predicate.select(_pbi.tree).leaves()[0], "v")
				# argfeatures['predicate'] = _pbi.predicate.select(_pbi.tree).leaves()[0]
			
			# path feature
			if 'path' in self.features :
				senTree = ParentedTree.convert(_pbi.tree)
				argTree = arg[0].select(senTree)
				predTree = _pbi.predicate.select(senTree)
				while argTree.label() == "*CHAIN*" or argTree.label() == "*SPLIT*":
					argTree = argTree[0]
				while predTree.label() == "*CHAIN*" or predTree.label() == "*SPLIT*":					
					predTree = predTree[0]
				
				argParents = []
				predParents = []
				while predTree != None:
					predParents.append(predTree)					
					predTree = predTree.parent()
					
				while argTree!= None:
					argParents.append(argTree)
					argTree = argTree.parent()
					
				jointNode = None
				for node in argParents:
					if node in predParents:
						jointNode = node
							
				stringPath = ""
				for i in range(0, argParents.index(jointNode), 1):	 
					node = argParents[i]
					stringPath += re.sub(r"(\w+)-.+", r"\1", node.label()) + "^"
				
				for i in range(predParents.index(jointNode) , 0, -1):
					node = predParents[i]
					stringPath += re.sub(r"(\w+)-.+", r"\1", node.label()) + "!"
				argfeatures['path'] = stringPath[:-1]
			
			# phraseType feature
			if 'phraseType' in self.features :
				argTree = arg[0].select(_pbi.tree)
				while argTree.label() == "*CHAIN*" or argTree.label() == "*SPLIT*": # traverse tree until a real constituent is found
					argTree = argTree[0]
				argfeatures['phraseType'] = re.sub(r"(\w+)[-=$\|].+", r"\1", argTree.label()) # normalize (e.g. NP-SUBJ -> NP) and set
			
			# position feature
			if 'position' in self.features :
				predTreePointer = _pbi.predicate
				while not type(predTreePointer) is PropbankTreePointer: # traverse tree while the pointer is not a real constituent
					predTreePointer = predTreePointer.pieces[0] 
				pred_wordnum = predTreePointer.wordnum # set predicate wordnumber
				arg_wordnum = None
				if type(arg[0]) is PropbankTreePointer :
					arg_wordnum = arg[0].wordnum
				# PropChainTreePointer and PropSplitTreePointer don't have wordnums and must be traversed
				elif (type(arg[0]) is PropbankChainTreePointer) or (type(arg[0]) is PropbankSplitTreePointer) :
					arg_pieces = arg[0].pieces
					# traverse the tree (always take the left-most subtree) until a PropbankTreePointer is found
					while type(arg_pieces[0]) is not PropbankTreePointer :
						arg_pieces = arg_pieces[0].pieces
					# then get the wordnum
					arg_wordnum = arg_pieces[0].wordnum
				# compare wordnumbers and normalize to 'before' or 'after'
				if arg_wordnum < pred_wordnum :
					argfeatures['position'] = 'before'
				else :
					argfeatures['position'] = 'after'
					
			# voice feature
			if 'voice' in self.features :
				# extract voice from PropBankInstance-inflection and normalize to 'active', 'passive' and 'NONE'
				if _pbi.inflection.voice == 'a' :
					argfeatures['voice'] = 'active'
				elif _pbi.inflection.voice == 'p' :
					argfeatures['voice'] = 'passive'
				else:
					argfeatures['voice'] = 'NONE'
			
			# class feature
			if 'class' in self.features :
				argfeatures['class'] = arg[1].split("-")[0]
#.........这里部分代码省略.........
开发者ID:julianbaumann,项目名称:SemanticArgumentClassification,代码行数:103,代码来源:argext.py


注:本文中的nltk.tree.ParentedTree.convert方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。