本文整理汇总了Python中xml.etree.ElementTree.SubElement.len方法的典型用法代码示例。如果您正苦于以下问题:Python SubElement.len方法的具体用法?Python SubElement.len怎么用?Python SubElement.len使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xml.etree.ElementTree.SubElement
的用法示例。
在下文中一共展示了SubElement.len方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: return_mods
# 需要导入模块: from xml.etree.ElementTree import SubElement [as 别名]
# 或者: from xml.etree.ElementTree.SubElement import len [as 别名]
def return_mods(words_found, path_to_db):
"""
This functions should find the words and their modifier using
Ruben's terminology extractor. For now this function only works with
the first words found in WordNet by search_in_dwn
:param words_found: list of words that are added to a xml pattern file.
:type words_found: list
:return container: a container of words and the output of the terminology etractor and
the PPMI model search of the words in words_found
:rtype: dictionairy
"""
global my_space
top = Element('patterns')
comment = Comment('Pattern file for terminology extractor')
top.append(comment)
#ALREADY SET-UP STORAGE FOR LATER USAGE
container = {}
for word in words_found:
container[word] = defaultdict(list) #INIT DEFAULTDICT TO STORE MODIFIERS
child = SubElement(top, 'pattern', {'len':"2"})
child.len = "2"
## ONLY SEARCHES FOR A N PATTERNS. IS THE REASON NOT ALL TERMS ARE FOUND AS ENTRY IN RETURNED DICT
## CAN ADD PATTERNS HERE
SubElement(child, 'p',{
"key":"pos",
"position": "0",
"values":"a"
} )
SubElement(child, 'p',{
"key":"tokens",
"position": "1",
"values":word
} )
#STORE PATTERNS FILE
if not os.path.isdir('patterns'):
os.mkdir('patterns')
# logging.info("{} writing pattern file".format(time.strftime('%H:%M:%S')))
file_name = os.path.abspath('.')+'/patterns/xml_pattern-{}.xml'.format(time.strftime('%d-%m-%y-%H:%M:%S'))
with open(file_name, 'w', 0) as f: #0 is for not buffering
f.write(prettify(top).encode('utf8'))
## CALL THE TERMINOLOGY EXTRACTOR WITH THE NEWLY CREATED PATTERNS
cmd = ' '.join(['python', CMD_EXTRACTOR_SCRIPT, '-d', path_to_db, '-p', file_name])
# logging.info(cmd)
# logging.info("{} calling terminology extractor".format(time.strftime('%H:%M:%S')))
process = Popen(cmd, stdout=PIPE, shell=True)
output, err = process.communicate()
##STORE ALL THE TERMS AND THEIR MODIFIERS IN A DICTIONAIRY
for term_element in [line.split() for line in output.split('\n') if line]:
freq, mod, term = term_element
# print freq, term, word
try:
container[term]['modifiers'].append((mod,freq))
except KeyError:
print "not found in container: {}".format(term)
for entry_term in container.keys():
try:
# most_similar_words = model.most_similar(entry_term)
most_similar_words = my_space.get_neighbours(entry_term, 10, CosSimilarity())
except KeyError:
print "not found in model: {}".format(entry_term)
continue
container[entry_term]['similar'].extend(most_similar_words)
return container