本文整理汇总了Python中ijson.items方法的典型用法代码示例。如果您正苦于以下问题:Python ijson.items方法的具体用法?Python ijson.items怎么用?Python ijson.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ijson
的用法示例。
在下文中一共展示了ijson.items方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_data
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def _get_data(self) -> list:
out_path_train = self.root/self.out_filename
if out_path_train.exists():
train = load_language_modeling(out_path_train)
dataset = train
else:
dataset = []
with open(self.root/self.dirname, 'r', encoding='utf-8') as jfile:
for item in tqdm(ijson.items(jfile, 'item')):
text = self._normalize(item['text']).strip()
samples = list(filter(lambda x: len(x) > 0, text.split('\n'))) # split document into sentences(len > 0)
dataset += samples
# If sample is a document, use below code not above two lines.
# sample = '\n'.join(list(filter(lambda x: len(x) > 0, text.split('\n'))))
# dataset.append(sample)
# Save dataset
(self.root/self.dirname).unlink()
save_language_modeling(dataset, to_path=out_path_train)
return dataset
示例2: etree2dict
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def etree2dict(element):
"""Convert an element tree into a dict imitating how Yahoo Pipes does it.
"""
i = dict(element.items())
i.update(_make_content(i, element.text, strip=True))
for child in element:
tag = child.tag
value = etree2dict(child)
i.update(_make_content(i, value, tag))
if element.text and not set(i).difference(['content']):
# element is leaf node and doesn't have attributes
i = i.get('content')
return i
示例3: load_data_into_grakn
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def load_data_into_grakn(input, session):
'''
loads the json data into our Grakn phone_calls keyspace:
1. gets the data items as a list of dictionaries
2. for each item dictionary
a. creates a Grakn transaction
b. constructs the corresponding Graql insert query
c. runs the query
d. commits the transaction
:param input as dictionary: contains details required to parse the data
:param session: off of which a transaction will be created
'''
items = parse_data_to_dictionaries(input) # 1
for item in items: # 2
with session.transaction().write() as transaction: # a
graql_insert_query = input["template"](item) # b
print("Executing Graql Query: " + graql_insert_query)
transaction.query(graql_insert_query) # c
transaction.commit() # d
print("\nInserted " + str(len(items)) +
" items from [ " + input["file"] + ".json] into Grakn.\n")
示例4: create_test_answers_file
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def create_test_answers_file(test_file, test_answers_file):
answers = {}
with open(test_file, 'rb') as f:
json_data = ijson.items(f, 'item')
for entry in json_data:
for i, utterance in enumerate(entry['options-for-next']):
answer = utterance['utterance'] + " __eou__ "
answer_id = utterance['candidate-id']
answers[answer_id] = answer
answers["NONE"] = "None __eou__ "
with open(test_answers_file, "w") as vocabfile:
for answer_id, answer in answers.items():
vocabfile.write(str(answer_id) + "\t" + answer.replace("\n", "") + "\n")
print("Saved test answers to {}".format(test_answers_file))
return answers
示例5: _decode
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def _decode(entity):
"""
Decodes all unicode characters to avoid the `u` prefix from
proliferating in complex data structures. We should probably
instead JSON encode everything, but for now, this is fine.
This is only needed as long as Python 2 support is necessary.
:param entity: The entity to decode.
:return: The iterable without unicode.
"""
# Only necessary for Python 2
if six.PY3:
return entity
if isinstance(entity, tuple):
return tuple(_decode(e) for e in entity)
elif isinstance(entity, list):
return list(_decode(e) for e in entity)
elif isinstance(entity, dict):
return {_decode(k): _decode(v) for k, v in entity.items()}
elif isinstance(entity, six.text_type):
return entity.encode('utf8')
return entity
示例6: _validate_people_params
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def _validate_people_params(self, params):
if not params:
return "{}"
if not isinstance(params, dict):
raise JQLSyntaxError("people_params must be a dict")
for k, v in params.items():
if k != 'user_selectors':
raise JQLSyntaxError('"%s" is not a valid key in people_params' % k)
if not isinstance(v, collections.Iterable):
raise JQLSyntaxError("people_params['user_selectors'] must be iterable")
for i, e in enumerate(v):
for ek, ev in e.items():
if ek not in ('selector',):
raise JQLSyntaxError(
"'%s' is not a valid key in "
"people_params['user_selectors'][%s]" % (ek, i))
elif not isinstance(ev, six.string_types):
raise JQLSyntaxError(
"people_params['user_selectors'][%s].%s "
"must be a string" % (i, ek))
return json.dumps(params)
示例7: rank
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def rank(src, tgt):
"""
The function calculates rank for each prediction given target
Args:
src (dict): predictions by the model
tgt (dict): ground truth/ targets
Returns:
ranks (list): rank of a correct responses (default = 0)
"""
ranks = []
for idx, target in tgt.items():
ranks.append(0)
try:
predictions = src[idx]
for i, entry in enumerate(predictions):
if entry in target:
ranks[-1] = i + 1
break
except KeyError:
msg = "No matching entry found for test case with dialog-id {}".format(idx)
logging.warning(msg)
return ranks
示例8: __iter_extended_rows
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def __iter_extended_rows(self):
path = 'item'
if self.__property is not None:
path = '%s.item' % self.__property
items = ijson.items(self.__bytes, path)
for row_number, item in enumerate(items, start=1):
if isinstance(item, (tuple, list)):
yield (row_number, None, list(item))
elif isinstance(item, dict):
keys = []
values = []
for key in sorted(item.keys()):
keys.append(key)
values.append(item[key])
yield (row_number, list(keys), list(values))
else:
if not self.__force_parse:
message = 'JSON item has to be list or dict'
raise exceptions.SourceError(message)
yield (row_number, None, [])
示例9: _extract_response_data
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def _extract_response_data(self, response_text):
next_token = None
sio = BytesIO(response_text)
count = 0
# Get the next token
p = ijson.items(sio, FEED_DATA_NEXT_TOKEN_PATH)
d = [x for x in p]
if len(d) == 1:
next_token = d[0]
# Be explicit, no empty strings
if not next_token:
next_token = None
# Get the record count
# Not using the special parser for handling decimals here because this isn't on the return path, just counting records
sio.seek(0)
for i in ijson.items(sio, FEED_DATA_ITEMS_PATH):
count += 1
logger.debug('Found {} records in data chunk'.format(count))
sio.close()
return next_token, response_text, count
示例10: _get_structure_info
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def _get_structure_info(self, view):
"""
"""
#get structure info command
text = view.substr(Region(0, view.size()))
cmd = self.get_structure_info_cmd(view, text)
timeout = self.get_settings(view, "sourcekitten_timeout", 1.0)
# run structure info command
p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
structure_info = list(ijson.items(p.stdout,''))[0]
return structure_info
示例11: handle
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def handle(self, *args: Any, **options: Any) -> None:
total_count = 0
changed_count = 0
with open(options['dump1']) as dump1, open(options['dump2']) as dump2:
for m1, m2 in zip(ijson.items(dump1, 'item'), ijson.items(dump2, 'item')):
total_count += 1
if m1['id'] != m2['id']:
self.stderr.write('Inconsistent messages dump')
break
if m1['content'] != m2['content']:
changed_count += 1
self.stdout.write('Changed message id: {id}'.format(id=m1['id']))
self.stdout.write(f'Total messages: {total_count}')
self.stdout.write(f'Changed messages: {changed_count}')
示例12: parse_response
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def parse_response(json):
if 'rates' in json:
resp = {k: Decimal(v) for k, v in json['rates'].items() if v}
else:
logger.warning('invalid json response:')
logger.warning(json)
resp = {}
return resp
示例13: any2dict
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def any2dict(f, ext='xml', html5=False, path=None):
path = path or ''
if ext in {'xml', 'html'}:
xml = ext == 'xml'
root = xml2etree(f, xml, html5).getroot()
replaced = '/'.join(path.split('.'))
tree = next(xpath(root, replaced)) if replaced else root
content = etree2dict(tree)
elif ext == 'json':
content = next(items(f, path))
else:
raise TypeError("Invalid file type: '%s'" % ext)
return content
示例14: parse_data_to_dictionaries
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def parse_data_to_dictionaries(input):
'''
1. reads the file through a stream,
2. adds the dictionary to the list of items
:param input.file as string: the path to the data file, minus the format
:returns items as list of dictionaries: each item representing a data item from the file at input.file
'''
items = []
with open(input["file"] + ".json") as data: # 1
for item in ijson.items(data, "item"):
items.append(item) # 2
return items
示例15: get_dialogs
# 需要导入模块: import ijson [as 别名]
# 或者: from ijson import items [as 别名]
def get_dialogs(filename):
rows = []
with open(filename, 'rb') as f:
json_data = ijson.items(f, 'item')
for entry in json_data:
rows.append(process_dialog(entry, train=True, positive=True))
rows.extend(process_dialog(entry, train=True, positive=False, all_negative=True))
return rows