Python tokenization.printable_text方法代碼示例

本文整理匯總了Python中bert.tokenization.printable_text方法的典型用法代碼示例。如果您正苦於以下問題：Python tokenization.printable_text方法的具體用法？Python tokenization.printable_text怎麽用？Python tokenization.printable_text使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類bert.tokenization的用法示例。

在下文中一共展示了tokenization.printable_text方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "document_id: %s" % (self.document_id)
        s += ", qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: %s ..." % (" ".join(self.doc_tokens[:20]))
        s += ", length of doc_tokens: %d" % (len(self.doc_tokens))
        if self.orig_answer_texts:
            s += ", orig_answer_texts: {}".format(self.orig_answer_texts)
        if self.start_positions and self.end_positions:
            s += ", start_positions: {}".format(self.start_positions)
            s += ", end_positions: {}".format(self.end_positions)
            s += ", token_answer: "
            for start, end in zip(self.start_positions, self.end_positions):
                s += "{}, ".format(" ".join(self.doc_tokens[start:(end+1)]))
        return s

開發者ID:huminghao16，項目名稱:RE3QA，代碼行數:19，代碼來源:triviaqa_document_utils.py

示例2: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_positions: %s" % (self.start_position)
    if self.start_position:
      s += ", end_positions: %s" % (self.end_position)
    return s

開發者ID:thunlp，項目名稱:XQA，代碼行數:13，代碼來源:run_bert_open_qa_train.py

示例3: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    s += ", answer_text: %s" % (self.orig_answer_text)
    return s

開發者ID:thunlp，項目名稱:XQA，代碼行數:10，代碼來源:run_bert_open_qa_eval.py

示例4: str

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    s += "tokens: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.tokens]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "is_random_next: %s\n" % self.is_random_next
    s += "masked_lm_positions: %s\n" % (" ".join(
        [str(x) for x in self.masked_lm_positions]))
    s += "masked_lm_labels: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
    s += "\n"
    return s

開發者ID:blei-lab，項目名稱:causal-text-embeddings，代碼行數:14，代碼來源:create_pretraining_data.py

示例5: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s

開發者ID:huminghao16，項目名稱:MTMSN，代碼行數:13，代碼來源:squad_utils.py

示例6: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", \nquestion: %s" % (" ".join(self.question_tokens))
        s += ", \npassage: %s" % (" ".join(self.passage_tokens))
        if self.numbers_in_passage:
            s += ", \nnumbers_in_passage: {}".format(self.numbers_in_passage)
        if self.number_indices:
            s += ", \nnumber_indices: {}".format(self.number_indices)
        if self.answer_type:
            s += ", \nanswer_type: {}".format(self.answer_type)
        if self.number_of_answer:
            s += ", \nnumber_of_answer: {}".format(self.number_of_answer)
        if self.passage_spans:
            s += ", \npassage_spans: {}".format(self.passage_spans)
        if self.question_spans:
            s += ", \nquestion_spans: {}".format(self.question_spans)
        if self.add_sub_expressions:
            s += ", \nadd_sub_expressions: {}".format(self.add_sub_expressions)
        if self.counts:
            s += ", \ncounts: {}".format(self.counts)
        if self.negations:
            s += ", \nnegations: {}".format(self.negations)
        if self.answer_annotations:
            s += ", \nanswer_annotations: {}".format(self.answer_annotations)
        return s

開發者ID:huminghao16，項目名稱:MTMSN，代碼行數:28，代碼來源:drop_utils.py

示例7: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_position: %d" % (self.start_position)
    if self.start_position:
      s += ", end_position: %d" % (self.end_position)
    if self.start_position:
      s += ", is_impossible: %r" % (self.is_impossible)
    return s

開發者ID:ZhangShiyue，項目名稱:QGforQA，代碼行數:15，代碼來源:test_squad.py

示例8: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.label_id:
      s += ", membership label_id: %d" % (self.label_id)
    return s

開發者ID:google-research，項目名稱:language，代碼行數:11，代碼來源:run_squad_membership.py

示例9: str

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    for sent in self.tokens[0]:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids[0]]))
    s += "\n"
    return s

開發者ID:google-research，項目名稱:language，代碼行數:10，代碼來源:preprocessing_utils.py

示例10: str

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    for sent in self.tokens:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "\n"
    return s

開發者ID:google-research，項目名稱:language，代碼行數:10，代碼來源:preprocessing_utils.py

示例11: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        # s += "example_id: %s" % (tokenization.printable_text(self.example_id))
        s += ", sent_tokens: [%s]" % (" ".join(self.sent_tokens))
        if self.term_texts:
            s += ", term_texts: {}".format(self.term_texts)
        # if self.start_positions:
        #     s += ", start_positions: {}".format(self.start_positions)
        # if self.end_positions:
        #     s += ", end_positions: {}".format(self.end_positions)
        if self.polarities:
            s += ", polarities: {}".format(self.polarities)
        return s

開發者ID:huminghao16，項目名稱:SpanABSA，代碼行數:15，代碼來源:utils.py

示例12: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        if self.start_position:
            s += ", is_impossible: %r" % (self.is_impossible)
        return s

開發者ID:IBM，項目名稱:MAX-Question-Answering，代碼行數:15，代碼來源:run_squad.py

示例13: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += "doc_index: %d" % (self.doc_index)
        s += "para_index: %d" % (self.para_index)
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.answer_texts is not None:
            s += ", answer_texts: ".format(self.answer_texts)
        return s

開發者ID:huminghao16，項目名稱:RE3QA，代碼行數:12，代碼來源:squad_open_utils.py

示例14: repr

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s

開發者ID:huminghao16，項目名稱:RE3QA，代碼行數:12，代碼來源:squad_document_utils.py

示例15: convert_examples_to_features

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def convert_examples_to_features(examples,label_list, max_seq_length,tokenizer):
    """
    將所有的InputExamples樣本數據轉化成模型要輸入的token形式，最後輸出bert模型需要的四個變量；
    input_ids：就是text_a(分類文本)在詞庫對應的token，按字符級；
    input_mask：bert模型mask訓練的標記，都為1；
    segment_ids：句子標記，此場景隻有text_a,都為0；
    label_ids：文本標簽對應的token，不是one_hot的形式；
    """
    label_map = {}
    for (i, label) in enumerate(label_list):
        label_map[label] = i

    input_data=[]
    for (ex_index, example) in enumerate(examples):
        tokens_a = tokenizer.tokenize(example.text_a)
        if ex_index % 10000 == 0:
            tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

        if len(tokens_a) > max_seq_length - 2:
            tokens_a = tokens_a[0:(max_seq_length - 2)]

        tokens = []
        segment_ids = []
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in tokens_a:
            tokens.append(token)
            segment_ids.append(0)
        tokens.append("[SEP]")
        segment_ids.append(0)
        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        input_mask = [1] * len(input_ids)

        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)
        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        label_id = label_map[example.label]
        if ex_index < 3:
            tf.logging.info("*** Example ***")
            tf.logging.info("guid: %s" % (example.guid))
            tf.logging.info("tokens: %s" % " ".join([tokenization.printable_text(x) for x in tokens]))
            tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
            tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
            tf.logging.info("label: %s (id = %d)" % (example.label, label_id))

        features = collections.OrderedDict()
        features["input_ids"] = input_ids
        features["input_mask"] = input_mask
        features["segment_ids"] = segment_ids
        features["label_ids"] =label_id
        input_data.append(features)

    return input_data

開發者ID:cjymz886，項目名稱:text_bert_cnn，代碼行數:62，代碼來源:loader.py

注：本文中的bert.tokenization.printable_text方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。

示例1: __repr__

示例2: __repr__

示例3: __repr__

示例4: __str__

示例5: __repr__

示例6: __repr__

示例7: __repr__

示例8: __repr__

示例9: __str__

示例10: __str__

示例11: __repr__

示例12: __repr__

示例13: __repr__

示例14: __repr__