本文整理汇总了Python中cyordereddict.OrderedDict.items方法的典型用法代码示例。如果您正苦于以下问题:Python OrderedDict.items方法的具体用法?Python OrderedDict.items怎么用?Python OrderedDict.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cyordereddict.OrderedDict
的用法示例。
在下文中一共展示了OrderedDict.items方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_iterators
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_iterators(self):
pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
shuffle(pairs)
od = OrderedDict(pairs)
self.assertEqual(list(od), [t[0] for t in pairs])
self.assertEqual(list(od.keys()), [t[0] for t in pairs])
self.assertEqual(list(od.values()), [t[1] for t in pairs])
self.assertEqual(list(od.items()), pairs)
self.assertEqual(list(reversed(od)),
[t[0] for t in reversed(pairs)])
self.assertEqual(list(reversed(od.keys())),
[t[0] for t in reversed(pairs)])
self.assertEqual(list(reversed(od.values())),
[t[1] for t in reversed(pairs)])
self.assertEqual(list(reversed(od.items())), list(reversed(pairs)))
示例2: test_delitem
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_delitem(self):
pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
od = OrderedDict(pairs)
del od['a']
self.assertNotIn('a', od)
with self.assertRaises(KeyError):
del od['a']
self.assertEqual(list(od.items()), pairs[:2] + pairs[3:])
示例3: test_setdefault
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_setdefault(self):
pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
shuffle(pairs)
od = OrderedDict(pairs)
pair_order = list(od.items())
self.assertEqual(od.setdefault('a', 10), 3)
# make sure order didn't change
self.assertEqual(list(od.items()), pair_order)
self.assertEqual(od.setdefault('x', 10), 10)
# make sure 'x' is added to the end
self.assertEqual(list(od.items())[-1], ('x', 10))
# make sure setdefault still works when __missing__ is defined
class Missing(OrderedDict):
def __missing__(self, key):
return 0
self.assertEqual(Missing().setdefault(5, 9), 9)
示例4: test_reinsert
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_reinsert(self):
# Given insert a, insert b, delete a, re-insert a,
# verify that a is now later than b.
od = OrderedDict()
od['a'] = 1
od['b'] = 2
del od['a']
od['a'] = 1
self.assertEqual(list(od.items()), [('b', 2), ('a', 1)])
示例5: _stats
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def _stats(self):
_stats = OrderedDict()
_stats['id_string'] = self.id_string
_stats['versions'] = len(self.versions)
# _stats['submissions'] = self.submissions_count()
_stats['row_count'] = len(self[-1].schema.get('content', {})
.get('survey', []))
# returns stats in the format [ key="value" ]
return '\n\t'.join('%s="%s"' % item for item in _stats.items())
示例6: test_update
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_update(self):
with self.assertRaises(TypeError):
OrderedDict().update([('a', 1), ('b', 2)], None) # too many args
pairs = [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
od = OrderedDict()
od.update(dict(pairs))
self.assertEqual(sorted(od.items()), pairs) # dict input
od = OrderedDict()
od.update(**dict(pairs))
self.assertEqual(sorted(od.items()), pairs) # kwds input
od = OrderedDict()
od.update(pairs)
self.assertEqual(list(od.items()), pairs) # pairs input
od = OrderedDict()
od.update([('a', 1), ('b', 2), ('c', 9), ('d', 4)], c=3, e=5)
self.assertEqual(list(od.items()), pairs) # mixed input
# Issue 9137: Named argument called 'other' or 'self'
# shouldn't be treated specially.
od = OrderedDict()
od.update(self=23)
self.assertEqual(list(od.items()), [('self', 23)])
od = OrderedDict()
od.update(other={})
self.assertEqual(list(od.items()), [('other', {})])
od = OrderedDict()
od.update(red=5, blue=6, other=7, self=8)
self.assertEqual(sorted(list(od.items())),
[('blue', 6), ('other', 7), ('red', 5), ('self', 8)])
# Make sure that direct calls to update do not clear previous contents
# add that updates items are not moved to the end
d = OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 44), ('e', 55)])
d.update([('e', 5), ('f', 6)], g=7, d=4)
self.assertEqual(list(d.items()),
[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6), ('g', 7)])
self.assertRaises(TypeError, OrderedDict().update, 42)
self.assertRaises(TypeError, OrderedDict().update, (), ())
self.assertRaises(TypeError, OrderedDict.update)
示例7: test_init
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_init(self):
with self.assertRaises(TypeError):
OrderedDict([('a', 1), ('b', 2)], None) # too many args
pairs = [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
self.assertEqual(sorted(OrderedDict(dict(pairs)).items()), pairs) # dict input
self.assertEqual(sorted(OrderedDict(**dict(pairs)).items()), pairs) # kwds input
self.assertEqual(list(OrderedDict(pairs).items()), pairs) # pairs input
self.assertEqual(list(OrderedDict([('a', 1), ('b', 2), ('c', 9), ('d', 4)],
c=3, e=5).items()), pairs) # mixed input
# cyordereddict: remove this test because slot wrappers (on extension
# types) cannot be inspected
# make sure no positional args conflict with possible kwdargs
# self.assertEqual(inspect.getargspec(OrderedDict.__dict__['__init__']).args,
# ['self'])
# Make sure that direct calls to __init__ do not clear previous contents
d = OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 44), ('e', 55)])
d.__init__([('e', 5), ('f', 6)], g=7, d=4)
self.assertEqual(list(d.items()),
[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6), ('g', 7)])
示例8: test_init
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_init(self):
with self.assertRaises(TypeError):
OrderedDict([('a', 1), ('b', 2)], None) # too many args
pairs = [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
self.assertEqual(sorted(OrderedDict(dict(pairs)).items()), pairs) # dict input
self.assertEqual(sorted(OrderedDict(**dict(pairs)).items()), pairs) # kwds input
self.assertEqual(list(OrderedDict(pairs).items()), pairs) # pairs input
self.assertEqual(list(OrderedDict([('a', 1), ('b', 2), ('c', 9), ('d', 4)],
c=3, e=5).items()), pairs) # mixed input
# make sure no positional args conflict with possible kwdargs
self.assertEqual(list(OrderedDict(self=42).items()), [('self', 42)])
self.assertEqual(list(OrderedDict(other=42).items()), [('other', 42)])
self.assertRaises(TypeError, OrderedDict, 42)
self.assertRaises(TypeError, OrderedDict, (), ())
self.assertRaises(TypeError, OrderedDict.__init__)
# Make sure that direct calls to __init__ do not clear previous contents
d = OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 44), ('e', 55)])
d.__init__([('e', 5), ('f', 6)], g=7, d=4)
self.assertEqual(list(d.items()),
[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6), ('g', 7)])
示例9: read_config
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def read_config(config_file, default_config=None):
"""
This function is from tonic (author: Joe Hamman)
Return a dictionary with subdictionaries of all configFile options/values
"""
from netCDF4 import Dataset
try:
from cyordereddict import OrderedDict
except:
from collections import OrderedDict
try:
from configparser import SafeConfigParser
except:
from ConfigParser import SafeConfigParser
import configobj
config = SafeConfigParser()
config.optionxform = str
config.read(config_file)
sections = config.sections()
dict1 = OrderedDict()
for section in sections:
options = config.options(section)
dict2 = OrderedDict()
for option in options:
dict2[option] = config_type(config.get(section, option))
dict1[section] = dict2
if default_config is not None:
for name, section in dict1.items():
if name in default_config.keys():
for option, key in default_config[name].items():
if option not in section.keys():
dict1[name][option] = key
return dict1
示例10: test_copying
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_copying(self):
# Check that ordered dicts are copyable, deepcopyable, picklable,
# and have a repr/eval round-trip
pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
od = OrderedDict(pairs)
update_test = OrderedDict()
update_test.update(od)
for i, dup in enumerate([
od.copy(),
copy.copy(od),
copy.deepcopy(od),
pickle.loads(pickle.dumps(od, 0)),
pickle.loads(pickle.dumps(od, 1)),
pickle.loads(pickle.dumps(od, 2)),
pickle.loads(pickle.dumps(od, -1)),
eval(repr(od)),
update_test,
OrderedDict(od),
]):
self.assertTrue(dup is not od)
self.assertEqual(dup, od)
self.assertEqual(list(dup.items()), list(od.items()))
self.assertEqual(len(dup), len(od))
self.assertEqual(type(dup), type(od))
示例11: startexp
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def startexp(
prm, # A DictObj with the structure of parser.DEFAULTS
resultdir='results',
rerun=False):
"""Execute an experiment."""
if rerun:
if not os.path.exists(resultdir):
raise ValueError('Directory %r does not exist.\n--rerun requires a'
' directory with the grammar(s) of a previous experiment.'
% resultdir)
else:
if os.path.exists(resultdir):
raise ValueError('Directory %r exists.\n'
'Use --rerun to parse with existing grammar '
'and overwrite previous results.' % resultdir)
os.mkdir(resultdir)
# Log everything, and send it to stderr, in a format with just the message.
formatstr = '%(message)s'
if prm.verbosity == 0:
logging.basicConfig(level=logging.WARNING, format=formatstr)
elif prm.verbosity == 1:
logging.basicConfig(level=logging.INFO, format=formatstr)
elif prm.verbosity == 2:
logging.basicConfig(level=logging.DEBUG, format=formatstr)
elif 3 <= prm.verbosity <= 4:
logging.basicConfig(level=5, format=formatstr)
else:
raise ValueError('verbosity should be >= 0 and <= 4. ')
# also log to a file
fileobj = logging.FileHandler(filename='%s/output.log' % resultdir)
fileobj.setLevel(logging.DEBUG)
fileobj.setFormatter(logging.Formatter(formatstr))
logging.getLogger('').addHandler(fileobj)
logging.info('Disco-DOP %s, running on Python %s',
__version__, sys.version.split()[0])
if not rerun:
trees, sents, train_tagged_sents = loadtraincorpus(
prm.corpusfmt, prm.traincorpus, prm.binarization, prm.punct,
prm.functions, prm.morphology, prm.removeempty, prm.ensureroot,
prm.transformations, prm.relationalrealizational)
elif isinstance(prm.traincorpus.numsents, float):
raise ValueError('need to specify number of training set sentences, '
'not fraction, in rerun mode.')
testsettb = treebank.READERS[prm.corpusfmt](
prm.testcorpus.path, encoding=prm.testcorpus.encoding,
headrules=prm.binarization.headrules,
removeempty=prm.removeempty, morphology=prm.morphology,
functions=prm.functions, ensureroot=prm.ensureroot)
if isinstance(prm.testcorpus.numsents, float):
prm.testcorpus.numsents = int(prm.testcorpus.numsents
* len(testsettb.blocks()))
if prm.testcorpus.skiptrain:
prm.testcorpus.skip += ( # pylint: disable=maybe-no-member
prm.traincorpus.numsents) # pylint: disable=maybe-no-member
test_blocks = OrderedDict()
test_trees = OrderedDict()
test_tagged_sents = OrderedDict()
for n, item in testsettb.itertrees(
prm.testcorpus.skip,
prm.testcorpus.skip # pylint: disable=no-member
+ prm.testcorpus.numsents):
if 1 <= len(item.sent) <= prm.testcorpus.maxwords:
test_blocks[n] = item.block
test_trees[n] = item.tree
test_tagged_sents[n] = [(word, tag) for word, (_, tag)
in zip(item.sent, sorted(item.tree.pos()))]
logging.info('%d test sentences after length restriction <= %d',
len(test_trees), prm.testcorpus.maxwords)
lexmodel = None
simplelexsmooth = False
test_tagged_sents_mangled = test_tagged_sents
if prm.postagging and prm.postagging.method in (
'treetagger', 'stanford', 'frog'):
if prm.postagging.method == 'treetagger':
# these two tags are never given by tree-tagger,
# so collect words whose tag needs to be overriden
overridetags = ('PTKANT', 'PIDAT')
elif prm.postagging.method == 'stanford':
overridetags = ('PTKANT', )
elif prm.postagging.method == 'frog':
overridetags = ()
taglex = defaultdict(set)
for sent in train_tagged_sents:
for word, tag in sent:
taglex[word].add(tag)
overridetagdict = {tag:
{word for word, tags in taglex.items() if tags == {tag}}
for tag in overridetags}
tagmap = {'$(': '$[', 'PAV': 'PROAV'}
test_tagged_sents_mangled = lexicon.externaltagging(
prm.postagging.method, prm.postagging.model, test_tagged_sents,
overridetagdict, tagmap)
if prm.postagging.retag and not rerun:
logging.info('re-tagging training corpus')
sents_to_tag = OrderedDict(enumerate(train_tagged_sents))
train_tagged_sents = lexicon.externaltagging(prm.postagging.method,
#.........这里部分代码省略.........
示例12: FormVersion
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
#.........这里部分代码省略.........
if 'label' in _f:
if not isinstance(_f['label'], list):
_f['label'] = [_f['label']]
_labels = LabelStruct(labels=_f['label'],
translations=self.translations)
field.labels = _labels
assert 'labels' not in _f
def __repr__(self):
return '<FormVersion %s>' % self._stats()
def _stats(self):
_stats = OrderedDict()
_stats['id_string'] = self._get_id_string()
_stats['version'] = self.id
_stats['row_count'] = len(self.schema.get('content', {}).get('survey', []))
# returns stats in the format [ key="value" ]
return '\n\t'.join(map(lambda key: '%s="%s"' % (key, str(_stats[key])),
_stats.keys()))
def to_dict(self, **opts):
return flatten_content(self.schema['content'], **opts)
# TODO: find where to move that
def _load_submission_xml(self, xml):
raise NotImplementedError("This doesn't work now that submissions "
"are out of the class. Port it to Export.")
_xmljson = parse_xml_to_xmljson(xml)
_rootatts = _xmljson.get('attributes', {})
_id_string = _rootatts.get('id_string')
_version_id = _rootatts.get('version')
if _id_string != self._get_id_string():
raise ValueError('submission id_string does not match: %s != %s' %
(self._get_id_string(), _id_string))
if _version_id != self.form_pack.id_string:
raise ValueError('mismatching version id %s != %s' %
(self.form_pack.id_string, _version_id))
self.submissions.append(FormSubmission.from_xml(_xmljson, self))
def lookup(self, prop, default=None):
result = getattr(self, prop, None)
if result is None:
result = self.form_pack.lookup(prop, default=default)
return result
def _get_root_node_name(self):
return self.lookup('root_node_name', default='data')
def _get_id_string(self):
return self.lookup('id_string')
def _get_title(self):
'''
if formversion has no name, uses form's name
'''
if self.title is None:
return self.form_pack.title
return self.title
def get_labels(self, lang=UNTRANSLATED, group_sep=None):
""" Returns a mapping of labels for {section: [field_label, ...]...}
Sections and fields labels can be set to use their slug name,
their lone label, or one of the translated labels.
If a field is part of a group and a group separator is passed,
the group label is retrieved, possibly translated, and
prepended to the field label itself.
"""
all_labels = OrderedDict()
for section_name, section in self.sections.items():
section_label = section.labels.get(lang) or section_name
section_labels = all_labels[section_label] = []
for field_name, field in section.fields.items():
section_labels.extend(field.get_labels(lang, group_sep))
return all_labels
def to_xml(self, warnings=None):
# todo: collect warnings from pyxform compilation when a list is passed
survey = formversion_pyxform(
self.to_dict(remove_sheets=['translations', 'translated'],
)
)
title = self._get_title()
if title is None:
raise ValueError('cannot create xml on a survey with no title.')
survey.update({
'name': self.lookup('root_node_name', 'data'),
'id_string': self.lookup('id_string'),
'title': self.lookup('title'),
'version': self.lookup('id'),
})
return survey._to_pretty_xml().encode('utf-8')
示例13: test_setitem
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def test_setitem(self):
od = OrderedDict([('d', 1), ('b', 2), ('c', 3), ('a', 4), ('e', 5)])
od['c'] = 10 # existing element
od['f'] = 20 # new element
self.assertEqual(list(od.items()),
[('d', 1), ('b', 2), ('c', 10), ('a', 4), ('e', 5), ('f', 20)])
示例14: AttrTree
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
class AttrTree(object):
"""
An AttrTree offers convenient, multi-level attribute access for
collections of objects. AttrTree objects may also be combined
together using the update method or merge classmethod. Here is an
example of adding a ViewableElement to an AttrTree and accessing it:
>>> t = AttrTree()
>>> t.Example.Path = 1
>>> t.Example.Path #doctest: +ELLIPSIS
1
"""
_disabled_prefixes = [] # Underscore attributes that should be
_sanitizer = util.sanitize_identifier
@classmethod
def merge(cls, trees):
"""
Merge a collection of AttrTree objects.
"""
first = trees[0]
for tree in trees:
first.update(tree)
return first
def __dir__(self):
"""
The _dir_mode may be set to 'default' or 'user' in which case
only the child nodes added by the user are listed.
"""
dict_keys = self.__dict__.keys()
if self.__dict__['_dir_mode'] == 'user':
return self.__dict__['children']
else:
return dir(type(self)) + list(dict_keys)
def __init__(self, items=None, identifier=None, parent=None, dir_mode='default'):
"""
identifier: A string identifier for the current node (if any)
parent: The parent node (if any)
items: Items as (path, value) pairs to construct
(sub)tree down to given leaf values.
Note that the root node does not have a parent and does not
require an identifier.
"""
self.__dict__['parent'] = parent
self.__dict__['identifier'] = type(self)._sanitizer(identifier, escape=False)
self.__dict__['children'] = []
self.__dict__['_fixed'] = False
self.__dict__['_dir_mode'] = dir_mode # Either 'default' or 'user'
fixed_error = 'No attribute %r in this AttrTree, and none can be added because fixed=True'
self.__dict__['_fixed_error'] = fixed_error
self.__dict__['data'] = OrderedDict()
items = items.items() if isinstance(items, OrderedDict) else items
# Python 3
items = list(items) if items else items
items = [] if not items else items
for path, item in items:
self.set_path(path, item)
@property
def path(self):
"Returns the path up to the root for the current node."
if self.parent:
return '.'.join([self.parent.path, str(self.identifier)])
else:
return self.identifier if self.identifier else self.__class__.__name__
@property
def fixed(self):
"If fixed, no new paths can be created via attribute access"
return self.__dict__['_fixed']
@fixed.setter
def fixed(self, val):
self.__dict__['_fixed'] = val
def update(self, other):
"""
Updated the contents of the current AttrTree with the
contents of a second AttrTree.
"""
if not isinstance(other, AttrTree):
raise Exception('Can only update with another AttrTree type.')
fixed_status = (self.fixed, other.fixed)
(self.fixed, other.fixed) = (False, False)
for identifier, element in other.items():
if identifier not in self.data:
self[identifier] = element
else:
self[identifier].update(element)
(self.fixed, other.fixed) = fixed_status
def set_path(self, path, val):
#.........这里部分代码省略.........
示例15: externaltagging
# 需要导入模块: from cyordereddict import OrderedDict [as 别名]
# 或者: from cyordereddict.OrderedDict import items [as 别名]
def externaltagging(usetagger, model, sents, overridetag, tagmap):
"""Use an external tool to tag a list of sentences."""
logging.info('Start tagging.')
goldtags = [t for sent in sents.values() for _, t in sent]
if usetagger == 'treetagger': # Tree-tagger
if not os.path.exists('tree-tagger/bin/tree-tagger'):
raise ValueError(TREETAGGERHELP)
infile, inname = tempfile.mkstemp(text=True)
with os.fdopen(infile, 'w') as infile:
for tagsent in sents.values():
sent = map(itemgetter(0), tagsent)
infile.write('\n'.join(w.encode('utf-8')
for w in sent) + '\n<S>\n')
filtertags = ''
if not model:
model = 'tree-tagger/lib/german-par-linux-3.2-utf8.bin'
filtertags = '| tree-tagger/cmd/filter-german-tags'
tagger = Popen('tree-tagger/bin/tree-tagger -token -sgml'
' %s %s %s' % (model, inname, filtertags),
stdout=PIPE, shell=True)
tagout = tagger.stdout.read(
).decode('utf-8').split('<S>')[:-1]
os.unlink(inname)
taggedsents = OrderedDict((n, [tagmangle(a, None, overridetag, tagmap)
for a in tags.splitlines() if a.strip()])
for n, tags in zip(sents, tagout))
elif usetagger == 'stanford': # Stanford Tagger
if not os.path.exists('stanford-postagger-full-2012-07-09'):
raise ValueError(STANFORDTAGGERHELP)
infile, inname = tempfile.mkstemp(text=True)
with os.fdopen(infile, 'w') as infile:
for tagsent in sents.values():
sent = map(itemgetter(0), tagsent)
infile.write(' '.join(w.encode('utf-8')
for w in sent) + '\n')
if not model:
model = 'models/german-hgc.tagger'
tagger = Popen(args=(
'/usr/bin/java -mx2G -classpath stanford-postagger.jar'
' edu.stanford.nlp.tagger.maxent.MaxentTagger'
' -tokenize false -encoding utf-8'
' -model %s -textFile %s' % (model, inname)).split(),
cwd='stanford-postagger-full-2012-07-09',
shell=False, stdout=PIPE)
tagout = tagger.stdout.read(
).decode('utf-8').splitlines()
os.unlink(inname)
taggedsents = OrderedDict((n, [tagmangle(a, '_', overridetag, tagmap)
for a in tags.split()]) for n, tags in zip(sents, tagout))
elif usetagger == 'frog': # Dutch 'frog' tagger
tagger = Popen(args=[which('frog')] +
'-n --skip=tacmnp -t /dev/stdin'.split(),
shell=False, stdin=PIPE, stdout=PIPE)
tagout, stderr = tagger.communicate(''.join(
' '.join(w for w in map(itemgetter(0), tagsent)) + '\n'
for tagsent in sents.values()).encode('utf8'))
logging.info(stderr)
# lines consist of: 'idx token lemma POS score'
taggedsents = OrderedDict((n,
[(line.split()[1],
line.split()[3].replace('(', '[').replace(')', ']'))
for line in lines.splitlines()]) for n, lines
in zip(sents, tagout.decode('utf-8').split('\n\n')))
if len(taggedsents) != len(sents):
raise ValueError('mismatch in number of sentences after tagging.')
for n, tags in taggedsents.items():
if len(sents[n]) != len(tags):
raise ValueError('mismatch in number of tokens after tagging.\n'
'before: %r\nafter: %r' % (sents[n], tags))
newtags = [t for sent in taggedsents.values() for _, t in sent]
logging.info('Tag accuracy: %5.2f\ngold - cand: %r\ncand - gold %r',
(100 * accuracy(goldtags, newtags)),
set(goldtags) - set(newtags), set(newtags) - set(goldtags))
return taggedsents