本文整理汇总了Python中Utils.ElementTreeUtils.encodeNewlines方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTreeUtils.encodeNewlines方法的具体用法?Python ElementTreeUtils.encodeNewlines怎么用?Python ElementTreeUtils.encodeNewlines使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.ElementTreeUtils
的用法示例。
在下文中一共展示了ElementTreeUtils.encodeNewlines方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import encodeNewlines [as 别名]
def process(input, output=None, preprocess=True, debug=False):
"""
Run MetaMap.
"""
counter = ProgressCounter(id="MetaMap")
# Create working directory
workdir = tempfile.mkdtemp()
outWriter = None
if output != None:
outWriter = ETUtils.ETWriter(output)
# Loop iteratively over elements
skip = False
for event, element in ETUtils.ETIteratorFromObj(input, ("start", "end")):
if event == "start": # element start message, element may not be fully read yet
if element.tag == "sentence":
sentence = element
counter.update(1, "Processing MetaMap ("+sentence.get("id")+"): ")
# Run metamap for the sentence element
elif element.tag == "metamap": # skip the metamap element to remove the original one
skip = True
if not skip and output != None:
outWriter.begin(element)
elif event == "end": # element is fully read in memory
if not skip and output != None:
outWriter.end(element)
if element.tag == "metamap":
skip = False # write elements again after this one
if preprocess:
element = convert(element, sentence)
outWriter.write(element) # insert the new metamap element into the output stream
if output != None:
print >> sys.stderr, "Writing output to", output
outWriter.close()
ETUtils.encodeNewlines(output)
if debug:
print >> sys.stderr, "Work directory preserved for debugging at", workdir
else:
shutil.rmtree(workdir)
return output
示例2: getSubset
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import encodeNewlines [as 别名]
def getSubset(input, output=None, fraction=1.0, seed=0, ids=None, attributes=None, invert=False, targetElementTag="document"):
distribution = None
if ids == None and attributes == None:
print >> sys.stderr, "No id-file, using pseudorandom distribution"
distribution = getSample(getElementCounts(input, [targetElementTag])[targetElementTag], fraction, seed)
elif attributes != None:
print >> sys.stderr, "Selecting subset with attributes:", attributes
for key in attributes:
assert type(attributes[key]) in (types.ListType, types.TupleType), attributes
counts = defaultdict(int)
outWriter = None
if output != None:
outWriter = ETUtils.ETWriter(output)
targetElementCount = 0
skip = False
for event in ETUtils.ETIteratorFromObj(input, ("start", "end")):
if event[0] == "start":
if event[1].tag == targetElementTag:
skip = select(targetElementCount, distribution, event[1], ids, attributes,invert)
targetElementCount += 1
if not skip:
outWriter.begin(event[1])
counts[event[1].tag + ":kept"] += 1
else:
counts[event[1].tag + ":removed"] += 1
elif event[0] == "end":
if not skip:
outWriter.end(event[1])
if event[1].tag == targetElementTag:
skip = False
if output != None:
outWriter.close()
ETUtils.encodeNewlines(output)
print >> sys.stderr, "Subset for " + str(input) + ": " + str(counts)