本文整理匯總了Python中philologic.OHCOVector.Record.attrib["pos"]方法的典型用法代碼示例。如果您正苦於以下問題:Python Record.attrib["pos"]方法的具體用法?Python Record.attrib["pos"]怎麽用?Python Record.attrib["pos"]使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類philologic.OHCOVector.Record
的用法示例。
在下文中一共展示了Record.attrib["pos"]方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: tag_words
# 需要導入模塊: from philologic.OHCOVector import Record [as 別名]
# 或者: from philologic.OHCOVector.Record import attrib["pos"] [as 別名]
def tag_words(loader_obj, text):
# Set up the treetagger process
tt_args = [tt_path, "-token", "-lemma", "-prob", '-no-unknown', "-threshold", ".01", param_file]
ttout_fh = open(text["raw"] + ".ttout", "w")
tt_worker = Popen(tt_args, stdin=PIPE, stdout=ttout_fh)
raw_fh = open(text["raw"], "r")
line_count = 0
# read through the object file, pass the words to treetagger
for line in raw_fh:
type, word, id, attrib = line.split('\t')
id = id.split()
if type == "word":
word = word.decode('utf-8', 'ignore').lower().encode('utf-8')
# close and re-open the treetagger process to prevent garbage
# output.
if line_count > maxlines:
tt_worker.stdin.close()
tt_worker.wait()
new_ttout_fh = open(text["raw"] + ".ttout", "a")
tt_worker = Popen(tt_args, stdin=PIPE, stdout=new_ttout_fh)
line_count = 0
print >> tt_worker.stdin, word
line_count += 1
# finish tagging
tt_worker.stdin.close()
tt_worker.wait()
# go back through the object file, and add the treetagger results to
# each word
tmp_fh = open(text["raw"] + ".tmp", "w")
tag_fh = open(text["raw"] + ".ttout", "r")
for line in open(text["raw"], "r"):
type, word, id, attrib = line.split('\t')
id = id.split()
record = Record(type, word, id)
record.attrib = loads(attrib)
if type == "word":
tag_l = tag_fh.readline()
next_word, tag = tag_l.split("\t")[0:2]
pos, lem, prob = tag.split(" ")
if next_word != word.decode('utf-8', 'ignore').lower().encode('utf-8'):
print >> sys.stderr, "TREETAGGER ERROR:", next_word, " != ", word, pos, lem
return
else:
record.attrib["pos"] = pos
record.attrib["lemma"] = lem
print >> tmp_fh, record
else:
print >> tmp_fh, record
os.remove(text["raw"])
os.rename(text["raw"] + ".tmp", text["raw"])
os.remove(text["raw"] + ".ttout")