当前位置: 首页>>代码示例>>Python>>正文


Python jsonlines.open方法代码示例

本文整理汇总了Python中jsonlines.open方法的典型用法代码示例。如果您正苦于以下问题:Python jsonlines.open方法的具体用法?Python jsonlines.open怎么用?Python jsonlines.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在jsonlines的用法示例。


在下文中一共展示了jsonlines.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: convert

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def convert(inpath, outpath):

    pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)

    with open(inpath) as f_in:
        with jsonlines.open(outpath, 'w') as f_out:
            for line in f_in:
                abstract_id = ''
                line = line.strip()
                if not line:
                    continue
                if line.startswith('###'):
                    abstract_id = line
                    continue
                label, sent = line.split('\t')
                f_out.write({'label': label, 'text': sent, 'metadata':abstract_id}) 
开发者ID:allenai,项目名称:scibert,代码行数:18,代码来源:rct_to_text.py

示例2: setup

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def setup(self):
        try:
            with jsonlines.open(self._category_mapping_filepath, mode='r') as reader:
                self._category_mapping = reader.read()
        except IOError as e:
            self._logger.exception(e)
            print('label_mapping.jsonl not found')
            raise e
        self._categories = self.load_categories(self._path)
        if not self._categories:
            if click.confirm('no drawings available, would you like to download the dataset? '
                             'download will take approx 5gb of space'):
                self.download_recurse(self._quickdraw_dataset_url, self._path)
                self._categories = self.load_categories(self._path)
            else:
                self._logger.error('no drawings available, and user declined to download dataset')
                raise ValueError('no drawings available, please download dataset') 
开发者ID:danmacnish,项目名称:cartoonify,代码行数:19,代码来源:drawingdataset.py

示例3: _create

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _create(self):
        file = open(self.input_path, 'rb')
        su = SerialUnpickler(file)

        file2 = open(self.output_path(), 'wb')
        sp = SerialPickler(file2)

        import jsonlines
        jf=jsonlines.open(self.output_path()+'.jsonl', mode='w')

        paragraph: Paragraph
        for paragraph in tqdm(su, total=18484, desc='Processing %s' % str(self.__class__.__name__)):
            paragraph_sequence = preprocess_paragraph_reanalyzed(paragraph)

            jf.write(serialize_sample_paragraph(paragraph_sequence))
            sp.add(paragraph_sequence)

        file.close()
        file2.close() 
开发者ID:kwrobel-nlp,项目名称:krnnt,代码行数:21,代码来源:new.py

示例4: _read

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _read(self, file_path):
        for obj in jsonlines.open(file_path):
            citation_text = obj['text']

            if self._clean_citation:
                citation_text = regex_find_citation.sub("", citation_text)

            citation_intent = None
            section_name = obj['section_name']
            citing_paper_id = obj['citing_paper_id']
            cited_paper_id = obj['cited_paper_id']

            yield self.text_to_instance(
                citation_text=citation_text,
                intent=citation_intent,
                citing_paper_id=citing_paper_id,
                cited_paper_id=cited_paper_id,
                section_name=section_name
            ) 
开发者ID:allenai,项目名称:scicite,代码行数:21,代码来源:citation_data_reader_aclarc_aux.py

示例5: __init__

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def __init__(self, word_vocab, data_path, label_vocab, max_length, lower, min_length=2):
        self.word_vocab = word_vocab
        self.label_vocab = label_vocab
        self.lower = lower
        self._max_length = max_length
        self._min_length = min_length
        self._data = []
        failed_to_parse = 0
        with jsonlines.open(data_path, 'r') as reader:
            for obj in reader:
                try:
                    converted = self._convert_obj(obj)
                    if converted:
                        self._data.append(converted)
                    else:
                        failed_to_parse += 1
                except ValueError:
                    failed_to_parse += 1
                except AttributeError:
                    failed_to_parse += 1
        print('Failed to parse {:d} instances'.format(failed_to_parse)) 
开发者ID:ExplorerFreda,项目名称:TreeEnc,代码行数:23,代码来源:data.py

示例6: _read

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def _read(self, file_path):
        with jsonlines.open(file_path) as f_in:
            for json_object in f_in:
                yield self.text_to_instance(
                    text=json_object.get('text'),
                    label=json_object.get('label'),
                    metadata=json_object.get('metadata')
                ) 
开发者ID:allenai,项目名称:scibert,代码行数:10,代码来源:classification_dataset_reader.py

示例7: convert

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def convert(inpath, outpath):
    with jsonlines.open(inpath) as f_in:
        data = [e for e in f_in]

    conll = _convert(data)

    pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
    with open(outpath, 'w') as f_out:
        for line in conll:
            f_out.write(f'{line}\n') 
开发者ID:allenai,项目名称:scibert,代码行数:12,代码来源:sciie_to_conll2003.py

示例8: main

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def main(inpath, outpath, with_entity_markers):
    """
    Args:
        inpath: input file from sciie
        outpath: output file with relations information
        with_entity_markers: True/False, if True, highlight entities in the string
    """

    pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
    with jsonlines.open(outpath, 'w') as fout:
        with jsonlines.open(inpath) as fin:
            for entry in fin:
                sent_start_index = 0
                for original_sent, rels in zip(entry['sentences'], entry['relations']):
                    for rel in rels:
                        sent = list(original_sent)
                        e1_from, e1_to, e2_from, e2_to, rel_type = rel
                        e1_from -= sent_start_index
                        e1_to -= sent_start_index
                        e2_from -= sent_start_index
                        e2_to -= sent_start_index
                        if with_entity_markers == 'True':
                            if e2_to > e1_to:
                                sent.insert(e2_to + 1, '>>')
                                sent.insert(e2_from, '<<')
                                sent.insert(e1_to + 1, ']]')
                                sent.insert(e1_from, '[[')
                            else:
                                sent.insert(e1_to + 1, ']]')
                                sent.insert(e1_from, '[[')
                                sent.insert(e2_to + 1, '>>')
                                sent.insert(e2_from, '<<')

                        d = {'text': ' '.join(sent), 'label': rel_type, 'metadata': [e1_from, e1_to, e2_from, e2_to]}
                        fout.write(d)
                    sent_start_index += len(original_sent) 
开发者ID:allenai,项目名称:scibert,代码行数:38,代码来源:sciie_to_relations.py

示例9: download_drawing_dataset

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_drawing_dataset():
    try:
        path = download_path / 'drawing_dataset'
        with jsonlines.open(str(label_map_path), mode='r') as reader:
            category_mapping = reader.read()
        print('checking whether drawing files already exist...')
        drawing_categories = ['face', 't-shirt', 'pants'] + category_mapping.values()
        missing_files = [file for file in drawing_categories if not Path(path / Path(file).with_suffix('.bin')).exists()]
        if missing_files:
            print('{} drawing files missing, downloading the following files: '.format(len(missing_files)))
            for f in missing_files:
                print(f)
            download_recurse(quickdraw_dataset_url, path, missing_files)
    except IOError as e:
        print('label_mapping.jsonl not found') 
开发者ID:danmacnish,项目名称:cartoonify,代码行数:17,代码来源:download_assets.py

示例10: download_tensorflow_model

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_tensorflow_model():
    print('checking if tensorflow model exists...')
    if not model_path.exists():
        print('tensorflow model missing, downloading the following file: \n {}'.format(str(model_path)))
        filename = tensorflow_model_name + '.tar.gz'
        opener = urllib.request.URLopener()
        opener.retrieve(tensorflow_model_download_url + filename, filename)
        print('extracting model from tarfile...')
        tar_file = tarfile.open(filename)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, path=str(model_path.parents[1])) 
开发者ID:danmacnish,项目名称:cartoonify,代码行数:15,代码来源:download_assets.py

示例11: download_recurse

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def download_recurse(self, url, path):
        """download all available files from url
        """
        path = Path(path)
        with open(str(self._categories_filepath)) as f:
            categories = f.readlines()
        categories = [cat.strip() for cat in categories]
        for cat in categories:
            site = url + cat.replace(' ', '%20') + '.bin'
            fpath = self.download(site, cat + '.bin', path)
            print('downloaded: {} from {}'.format(fpath, site)) 
开发者ID:danmacnish,项目名称:cartoonify,代码行数:13,代码来源:drawingdataset.py

示例12: unpack_drawings

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def unpack_drawings(self, path):
        """read all drawings from binary file, and return a generator
        """
        with open(path, 'rb') as f:
            while True:
                try:
                    yield self._unpack_drawing(f)
                except struct.error:
                    break 
开发者ID:danmacnish,项目名称:cartoonify,代码行数:11,代码来源:drawingdataset.py

示例13: get

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def get(self):
        self.load()
        return pickle.load(open(self.output_path(), 'rb')) 
开发者ID:kwrobel-nlp,项目名称:krnnt,代码行数:5,代码来源:new.py

示例14: learn

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def learn(self, path, stop=-1, start=0, ids=None):
        lemma_count = collections.defaultdict(lambda: collections.defaultdict(int))
        if ids is None:
            ids = []
        su = SerialUnpickler(open(path, 'rb'), stop=stop, start=start, ids=ids)
        for paragraph in su:
            for sentence, sentence_orig in paragraph:
                for sample in sentence_orig:
                    # print(sample.features)
                    if 'lemma' in sample.features:  # some samples doesnt have lemma, because not on gold segmentation
                        lemma_count[(sample.features['token'], sample.features['label'])][sample.features['lemma']] += 1

        # print(lemma_count[('Morawieckiego','subst:sg:gen:m1')])
        # defaultdict(<class 'int'>, {'morawieckiego': 7, 'Morawiecki': 7, 'Morawieckiego': 1})

        for k, v in lemma_count.items():
            # try:
            #     xxx = sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True)
            #     if xxx[0][1]==xxx[1][1]:
            #         print(k, xxx)
            # except: pass

            # if len(v)>1: print(k, sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True))
            # TODO: lematyzacja w zależności od pozycji słowa w zdaniu - nie pierwsze słowo wtedy również wielka litera

            best = sorted(v.items(), key=lambda x: (x[1], x[0]), reverse=True)[0]  # TODO kilka z taka sama statystyka
            self.lemmas[k] = best[0] 
开发者ID:kwrobel-nlp,项目名称:krnnt,代码行数:29,代码来源:new.py

示例15: save

# 需要导入模块: import jsonlines [as 别名]
# 或者: from jsonlines import open [as 别名]
def save(self, path):
        f = open(path, 'wb')
        pickle.dump(self.lemmas, f)
        f.close() 
开发者ID:kwrobel-nlp,项目名称:krnnt,代码行数:6,代码来源:new.py


注:本文中的jsonlines.open方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。