当前位置: 首页>>代码示例>>Python>>正文


Python tokenization.printable_text方法代码示例

本文整理汇总了Python中bert.tokenization.printable_text方法的典型用法代码示例。如果您正苦于以下问题:Python tokenization.printable_text方法的具体用法?Python tokenization.printable_text怎么用?Python tokenization.printable_text使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在bert.tokenization的用法示例。


在下文中一共展示了tokenization.printable_text方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "document_id: %s" % (self.document_id)
        s += ", qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: %s ..." % (" ".join(self.doc_tokens[:20]))
        s += ", length of doc_tokens: %d" % (len(self.doc_tokens))
        if self.orig_answer_texts:
            s += ", orig_answer_texts: {}".format(self.orig_answer_texts)
        if self.start_positions and self.end_positions:
            s += ", start_positions: {}".format(self.start_positions)
            s += ", end_positions: {}".format(self.end_positions)
            s += ", token_answer: "
            for start, end in zip(self.start_positions, self.end_positions):
                s += "{}, ".format(" ".join(self.doc_tokens[start:(end+1)]))
        return s 
开发者ID:huminghao16,项目名称:RE3QA,代码行数:19,代码来源:triviaqa_document_utils.py

示例2: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_positions: %s" % (self.start_position)
    if self.start_position:
      s += ", end_positions: %s" % (self.end_position)
    return s 
开发者ID:thunlp,项目名称:XQA,代码行数:13,代码来源:run_bert_open_qa_train.py

示例3: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    s += ", answer_text: %s" % (self.orig_answer_text)
    return s 
开发者ID:thunlp,项目名称:XQA,代码行数:10,代码来源:run_bert_open_qa_eval.py

示例4: __str__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
    s = ""
    s += "tokens: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.tokens]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "is_random_next: %s\n" % self.is_random_next
    s += "masked_lm_positions: %s\n" % (" ".join(
        [str(x) for x in self.masked_lm_positions]))
    s += "masked_lm_labels: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
    s += "\n"
    return s 
开发者ID:blei-lab,项目名称:causal-text-embeddings,代码行数:14,代码来源:create_pretraining_data.py

示例5: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s 
开发者ID:huminghao16,项目名称:MTMSN,代码行数:13,代码来源:squad_utils.py

示例6: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", \nquestion: %s" % (" ".join(self.question_tokens))
        s += ", \npassage: %s" % (" ".join(self.passage_tokens))
        if self.numbers_in_passage:
            s += ", \nnumbers_in_passage: {}".format(self.numbers_in_passage)
        if self.number_indices:
            s += ", \nnumber_indices: {}".format(self.number_indices)
        if self.answer_type:
            s += ", \nanswer_type: {}".format(self.answer_type)
        if self.number_of_answer:
            s += ", \nnumber_of_answer: {}".format(self.number_of_answer)
        if self.passage_spans:
            s += ", \npassage_spans: {}".format(self.passage_spans)
        if self.question_spans:
            s += ", \nquestion_spans: {}".format(self.question_spans)
        if self.add_sub_expressions:
            s += ", \nadd_sub_expressions: {}".format(self.add_sub_expressions)
        if self.counts:
            s += ", \ncounts: {}".format(self.counts)
        if self.negations:
            s += ", \nnegations: {}".format(self.negations)
        if self.answer_annotations:
            s += ", \nanswer_annotations: {}".format(self.answer_annotations)
        return s 
开发者ID:huminghao16,项目名称:MTMSN,代码行数:28,代码来源:drop_utils.py

示例7: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_position: %d" % (self.start_position)
    if self.start_position:
      s += ", end_position: %d" % (self.end_position)
    if self.start_position:
      s += ", is_impossible: %r" % (self.is_impossible)
    return s 
开发者ID:ZhangShiyue,项目名称:QGforQA,代码行数:15,代码来源:test_squad.py

示例8: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.label_id:
      s += ", membership label_id: %d" % (self.label_id)
    return s 
开发者ID:google-research,项目名称:language,代码行数:11,代码来源:run_squad_membership.py

示例9: __str__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
    s = ""
    for sent in self.tokens[0]:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids[0]]))
    s += "\n"
    return s 
开发者ID:google-research,项目名称:language,代码行数:10,代码来源:preprocessing_utils.py

示例10: __str__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
    s = ""
    for sent in self.tokens:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "\n"
    return s 
开发者ID:google-research,项目名称:language,代码行数:10,代码来源:preprocessing_utils.py

示例11: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        # s += "example_id: %s" % (tokenization.printable_text(self.example_id))
        s += ", sent_tokens: [%s]" % (" ".join(self.sent_tokens))
        if self.term_texts:
            s += ", term_texts: {}".format(self.term_texts)
        # if self.start_positions:
        #     s += ", start_positions: {}".format(self.start_positions)
        # if self.end_positions:
        #     s += ", end_positions: {}".format(self.end_positions)
        if self.polarities:
            s += ", polarities: {}".format(self.polarities)
        return s 
开发者ID:huminghao16,项目名称:SpanABSA,代码行数:15,代码来源:utils.py

示例12: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        if self.start_position:
            s += ", is_impossible: %r" % (self.is_impossible)
        return s 
开发者ID:IBM,项目名称:MAX-Question-Answering,代码行数:15,代码来源:run_squad.py

示例13: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += "doc_index: %d" % (self.doc_index)
        s += "para_index: %d" % (self.para_index)
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.answer_texts is not None:
            s += ", answer_texts: ".format(self.answer_texts)
        return s 
开发者ID:huminghao16,项目名称:RE3QA,代码行数:12,代码来源:squad_open_utils.py

示例14: __repr__

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s 
开发者ID:huminghao16,项目名称:RE3QA,代码行数:12,代码来源:squad_document_utils.py

示例15: convert_examples_to_features

# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def convert_examples_to_features(examples,label_list, max_seq_length,tokenizer):
    """
    将所有的InputExamples样本数据转化成模型要输入的token形式,最后输出bert模型需要的四个变量;
    input_ids:就是text_a(分类文本)在词库对应的token,按字符级;
    input_mask:bert模型mask训练的标记,都为1;
    segment_ids:句子标记,此场景只有text_a,都为0;
    label_ids:文本标签对应的token,不是one_hot的形式;
    """
    label_map = {}
    for (i, label) in enumerate(label_list):
        label_map[label] = i

    input_data=[]
    for (ex_index, example) in enumerate(examples):
        tokens_a = tokenizer.tokenize(example.text_a)
        if ex_index % 10000 == 0:
            tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

        if len(tokens_a) > max_seq_length - 2:
            tokens_a = tokens_a[0:(max_seq_length - 2)]

        tokens = []
        segment_ids = []
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in tokens_a:
            tokens.append(token)
            segment_ids.append(0)
        tokens.append("[SEP]")
        segment_ids.append(0)
        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        input_mask = [1] * len(input_ids)

        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)
        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        label_id = label_map[example.label]
        if ex_index < 3:
            tf.logging.info("*** Example ***")
            tf.logging.info("guid: %s" % (example.guid))
            tf.logging.info("tokens: %s" % " ".join([tokenization.printable_text(x) for x in tokens]))
            tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
            tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
            tf.logging.info("label: %s (id = %d)" % (example.label, label_id))

        features = collections.OrderedDict()
        features["input_ids"] = input_ids
        features["input_mask"] = input_mask
        features["segment_ids"] = segment_ids
        features["label_ids"] =label_id
        input_data.append(features)

    return input_data 
开发者ID:cjymz886,项目名称:text_bert_cnn,代码行数:62,代码来源:loader.py


注:本文中的bert.tokenization.printable_text方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。