本文整理汇总了Python中jsonlines.open方法的典型用法代码示例。如果您正苦于以下问题:Python jsonlines.open方法的具体用法?Python jsonlines.open怎么用?Python jsonlines.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jsonlines
的用法示例。
在下文中一共展示了jsonlines.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: convert
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def convert(inpath, outpath):
pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
with open(inpath) as f_in:
with jsonlines.open(outpath, 'w') as f_out:
for line in f_in:
abstract_id = ''
line = line.strip()
if not line:
continue
if line.startswith('###'):
abstract_id = line
continue
label, sent = line.split('\t')
f_out.write({'label': label, 'text': sent, 'metadata':abstract_id})
示例2: setup
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def setup(self):
try:
with jsonlines.open(self._category_mapping_filepath, mode='r') as reader:
self._category_mapping = reader.read()
except IOError as e:
self._logger.exception(e)
print('label_mapping.jsonl not found')
raise e
self._categories = self.load_categories(self._path)
if not self._categories:
if click.confirm('no drawings available, would you like to download the dataset? '
'download will take approx 5gb of space'):
self.download_recurse(self._quickdraw_dataset_url, self._path)
self._categories = self.load_categories(self._path)
else:
self._logger.error('no drawings available, and user declined to download dataset')
raise ValueError('no drawings available, please download dataset')
示例3: _create
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _create(self):
file = open(self.input_path, 'rb')
su = SerialUnpickler(file)
file2 = open(self.output_path(), 'wb')
sp = SerialPickler(file2)
import jsonlines
jf=jsonlines.open(self.output_path()+'.jsonl', mode='w')
paragraph: Paragraph
for paragraph in tqdm(su, total=18484, desc='Processing %s' % str(self.__class__.__name__)):
paragraph_sequence = preprocess_paragraph_reanalyzed(paragraph)
jf.write(serialize_sample_paragraph(paragraph_sequence))
sp.add(paragraph_sequence)
file.close()
file2.close()
示例4: _read
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _read(self, file_path):
for obj in jsonlines.open(file_path):
citation_text = obj['text']
if self._clean_citation:
citation_text = regex_find_citation.sub("", citation_text)
citation_intent = None
section_name = obj['section_name']
citing_paper_id = obj['citing_paper_id']
cited_paper_id = obj['cited_paper_id']
yield self.text_to_instance(
citation_text=citation_text,
intent=citation_intent,
citing_paper_id=citing_paper_id,
cited_paper_id=cited_paper_id,
section_name=section_name
)
示例5: __init__
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def __init__(self, word_vocab, data_path, label_vocab, max_length, lower, min_length=2):
self.word_vocab = word_vocab
self.label_vocab = label_vocab
self.lower = lower
self._max_length = max_length
self._min_length = min_length
self._data = []
failed_to_parse = 0
with jsonlines.open(data_path, 'r') as reader:
for obj in reader:
try:
converted = self._convert_obj(obj)
if converted:
self._data.append(converted)
else:
failed_to_parse += 1
except ValueError:
failed_to_parse += 1
except AttributeError:
failed_to_parse += 1
print('Failed to parse {:d} instances'.format(failed_to_parse))
示例6: _read
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _read(self, file_path):
with jsonlines.open(file_path) as f_in:
for json_object in f_in:
yield self.text_to_instance(
text=json_object.get('text'),
label=json_object.get('label'),
metadata=json_object.get('metadata')
)
示例7: convert
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def convert(inpath, outpath):
with jsonlines.open(inpath) as f_in:
data = [e for e in f_in]
conll = _convert(data)
pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
with open(outpath, 'w') as f_out:
for line in conll:
f_out.write(f'{line}\n')
示例8: main
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def main(inpath, outpath, with_entity_markers):
"""
Args:
inpath: input file from sciie
outpath: output file with relations information
with_entity_markers: True/False, if True, highlight entities in the string
"""
pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
with jsonlines.open(outpath, 'w') as fout:
with jsonlines.open(inpath) as fin:
for entry in fin:
sent_start_index = 0
for original_sent, rels in zip(entry['sentences'], entry['relations']):
for rel in rels:
sent = list(original_sent)
e1_from, e1_to, e2_from, e2_to, rel_type = rel
e1_from -= sent_start_index
e1_to -= sent_start_index
e2_from -= sent_start_index
e2_to -= sent_start_index
if with_entity_markers == 'True':
if e2_to > e1_to:
sent.insert(e2_to + 1, '>>')
sent.insert(e2_from, '<<')
sent.insert(e1_to + 1, ']]')
sent.insert(e1_from, '[[')
else:
sent.insert(e1_to + 1, ']]')
sent.insert(e1_from, '[[')
sent.insert(e2_to + 1, '>>')
sent.insert(e2_from, '<<')
d = {'text': ' '.join(sent), 'label': rel_type, 'metadata': [e1_from, e1_to, e2_from, e2_to]}
fout.write(d)
sent_start_index += len(original_sent)
示例9: download_drawing_dataset
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_drawing_dataset():
try:
path = download_path / 'drawing_dataset'
with jsonlines.open(str(label_map_path), mode='r') as reader:
category_mapping = reader.read()
print('checking whether drawing files already exist...')
drawing_categories = ['face', 't-shirt', 'pants'] + category_mapping.values()
missing_files = [file for file in drawing_categories if not Path(path / Path(file).with_suffix('.bin')).exists()]
if missing_files:
print('{} drawing files missing, downloading the following files: '.format(len(missing_files)))
for f in missing_files:
print(f)
download_recurse(quickdraw_dataset_url, path, missing_files)
except IOError as e:
print('label_mapping.jsonl not found')
示例10: download_tensorflow_model
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_tensorflow_model():
print('checking if tensorflow model exists...')
if not model_path.exists():
print('tensorflow model missing, downloading the following file: \n {}'.format(str(model_path)))
filename = tensorflow_model_name + '.tar.gz'
opener = urllib.request.URLopener()
opener.retrieve(tensorflow_model_download_url + filename, filename)
print('extracting model from tarfile...')
tar_file = tarfile.open(filename)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, path=str(model_path.parents[1]))
示例11: download_recurse
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_recurse(self, url, path):
"""download all available files from url
"""
path = Path(path)
with open(str(self._categories_filepath)) as f:
categories = f.readlines()
categories = [cat.strip() for cat in categories]
for cat in categories:
site = url + cat.replace(' ', '%20') + '.bin'
fpath = self.download(site, cat + '.bin', path)
print('downloaded: {} from {}'.format(fpath, site))
示例12: unpack_drawings
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def unpack_drawings(self, path):
"""read all drawings from binary file, and return a generator
"""
with open(path, 'rb') as f:
while True:
try:
yield self._unpack_drawing(f)
except struct.error:
break
示例13: get
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def get(self):
self.load()
return pickle.load(open(self.output_path(), 'rb'))
示例14: learn
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def learn(self, path, stop=-1, start=0, ids=None):
lemma_count = collections.defaultdict(lambda: collections.defaultdict(int))
if ids is None:
ids = []
su = SerialUnpickler(open(path, 'rb'), stop=stop, start=start, ids=ids)
for paragraph in su:
for sentence, sentence_orig in paragraph:
for sample in sentence_orig:
# print(sample.features)
if 'lemma' in sample.features: # some samples doesnt have lemma, because not on gold segmentation
lemma_count[(sample.features['token'], sample.features['label'])][sample.features['lemma']] += 1
# print(lemma_count[('Morawieckiego','subst:sg:gen:m1')])
# defaultdict(<class 'int'>, {'morawieckiego': 7, 'Morawiecki': 7, 'Morawieckiego': 1})
for k, v in lemma_count.items():
# try:
# xxx = sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True)
# if xxx[0][1]==xxx[1][1]:
# print(k, xxx)
# except: pass
# if len(v)>1: print(k, sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True))
# TODO: lematyzacja w zależności od pozycji słowa w zdaniu - nie pierwsze słowo wtedy również wielka litera
best = sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True)[0] # TODO kilka z taka sama statystyka
self.lemmas[k] = best[0]
示例15: save
# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def save(self, path):
f = open(path, 'wb')
pickle.dump(self.lemmas, f)
f.close()