本文整理汇总了Python中nltk.tree.Tree.extend方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.extend方法的具体用法?Python Tree.extend怎么用?Python Tree.extend使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tree.Tree
的用法示例。
在下文中一共展示了Tree.extend方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_ace_file
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import extend [as 别名]
def load_ace_file(textfile, fmt):
print ' - %s' % os.path.split(textfile)[1]
annfile = textfile+'.tmx.rdc.xml'
# Read the xml file, and get a list of entities
entities = []
xml = ET.parse(open(annfile)).getroot()
for entity in xml.findall('document/entity'):
typ = entity.find('entity_type').text
for mention in entity.findall('entity_mention'):
if mention.get('TYPE') != 'NAME': continue # only NEs
s = int(mention.find('head/charseq/start').text)
e = int(mention.find('head/charseq/end').text)+1
entities.append( (s, e, typ) )
# Read the text file, and mark the entities.
text = open(textfile).read()
# Strip XML tags, since they don't count towards the indices
text = re.sub('<(?!/?TEXT)[^>]+>', '', text)
# Blank out anything before/after <TEXT>
def subfunc(m): return ' '*(m.end()-m.start()-6)
text = re.sub('[\s\S]*<TEXT>', subfunc, text)
text = re.sub('</TEXT>[\s\S]*', '', text)
# Simplify quotes
text = re.sub("``", ' "', text)
text = re.sub("''", '" ', text)
entity_types = set(typ for (s,e,typ) in entities)
# Binary distinction (NE or not NE)
if fmt == 'binary':
i = 0
toks = Tree('S', [])
for (s,e,typ) in sorted(entities):
if s < i: s = i # Overlapping! Deal with this better?
if e <= s: continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree('NE', text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
# Multiclass distinction (NE type)
elif fmt == 'multiclass':
i = 0
toks = Tree('S', [])
for (s,e,typ) in sorted(entities):
if s < i: s = i # Overlapping! Deal with this better?
if e <= s: continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree(typ, text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
else:
raise ValueError('bad fmt value')
示例2: load_ace_file
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import extend [as 别名]
def load_ace_file(textfile, fmt):
print(" - %s" % os.path.split(textfile)[1])
annfile = textfile + ".tmx.rdc.xml"
# Read the xml file, and get a list of entities
entities = []
xml = ET.parse(open(annfile)).getroot()
for entity in xml.findall("document/entity"):
typ = entity.find("entity_type").text
for mention in entity.findall("entity_mention"):
if mention.get("TYPE") != "NAME":
continue # only NEs
s = int(mention.find("head/charseq/start").text)
e = int(mention.find("head/charseq/end").text) + 1
entities.append((s, e, typ))
# Read the text file, and mark the entities.
with open(textfile) as fp:
text = fp.read()
# Strip XML tags, since they don't count towards the indices
text = re.sub("<(?!/?TEXT)[^>]+>", "", text)
# Blank out anything before/after <TEXT>
def subfunc(m):
return " " * (m.end() - m.start() - 6)
text = re.sub("[\s\S]*<TEXT>", subfunc, text)
text = re.sub("</TEXT>[\s\S]*", "", text)
# Simplify quotes
text = re.sub("``", ' "', text)
text = re.sub("''", '" ', text)
entity_types = set(typ for (s, e, typ) in entities)
# Binary distinction (NE or not NE)
if fmt == "binary":
i = 0
toks = Tree("S", [])
for (s, e, typ) in sorted(entities):
if s < i:
s = i # Overlapping! Deal with this better?
if e <= s:
continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree("NE", text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
# Multiclass distinction (NE type)
elif fmt == "multiclass":
i = 0
toks = Tree("S", [])
for (s, e, typ) in sorted(entities):
if s < i:
s = i # Overlapping! Deal with this better?
if e <= s:
continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree(typ, text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
else:
raise ValueError("bad fmt value")