當前位置: 首頁>>代碼示例>>Python>>正文


Python tokenization.printable_text方法代碼示例

本文整理匯總了Python中bert.tokenization.printable_text方法的典型用法代碼示例。如果您正苦於以下問題:Python tokenization.printable_text方法的具體用法?Python tokenization.printable_text怎麽用?Python tokenization.printable_text使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在bert.tokenization的用法示例。


在下文中一共展示了tokenization.printable_text方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "document_id: %s" % (self.document_id)
        s += ", qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: %s ..." % (" ".join(self.doc_tokens[:20]))
        s += ", length of doc_tokens: %d" % (len(self.doc_tokens))
        if self.orig_answer_texts:
            s += ", orig_answer_texts: {}".format(self.orig_answer_texts)
        if self.start_positions and self.end_positions:
            s += ", start_positions: {}".format(self.start_positions)
            s += ", end_positions: {}".format(self.end_positions)
            s += ", token_answer: "
            for start, end in zip(self.start_positions, self.end_positions):
                s += "{}, ".format(" ".join(self.doc_tokens[start:(end+1)]))
        return s 
開發者ID:huminghao16,項目名稱:RE3QA,代碼行數:19,代碼來源:triviaqa_document_utils.py

示例2: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_positions: %s" % (self.start_position)
    if self.start_position:
      s += ", end_positions: %s" % (self.end_position)
    return s 
開發者ID:thunlp,項目名稱:XQA,代碼行數:13,代碼來源:run_bert_open_qa_train.py

示例3: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "id: %s" % (self.qid)
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    s += ", answer_text: %s" % (self.orig_answer_text)
    return s 
開發者ID:thunlp,項目名稱:XQA,代碼行數:10,代碼來源:run_bert_open_qa_eval.py

示例4: __str__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    s += "tokens: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.tokens]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "is_random_next: %s\n" % self.is_random_next
    s += "masked_lm_positions: %s\n" % (" ".join(
        [str(x) for x in self.masked_lm_positions]))
    s += "masked_lm_labels: %s\n" % (" ".join(
        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
    s += "\n"
    return s 
開發者ID:blei-lab,項目名稱:causal-text-embeddings,代碼行數:14,代碼來源:create_pretraining_data.py

示例5: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s 
開發者ID:huminghao16,項目名稱:MTMSN,代碼行數:13,代碼來源:squad_utils.py

示例6: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", \nquestion: %s" % (" ".join(self.question_tokens))
        s += ", \npassage: %s" % (" ".join(self.passage_tokens))
        if self.numbers_in_passage:
            s += ", \nnumbers_in_passage: {}".format(self.numbers_in_passage)
        if self.number_indices:
            s += ", \nnumber_indices: {}".format(self.number_indices)
        if self.answer_type:
            s += ", \nanswer_type: {}".format(self.answer_type)
        if self.number_of_answer:
            s += ", \nnumber_of_answer: {}".format(self.number_of_answer)
        if self.passage_spans:
            s += ", \npassage_spans: {}".format(self.passage_spans)
        if self.question_spans:
            s += ", \nquestion_spans: {}".format(self.question_spans)
        if self.add_sub_expressions:
            s += ", \nadd_sub_expressions: {}".format(self.add_sub_expressions)
        if self.counts:
            s += ", \ncounts: {}".format(self.counts)
        if self.negations:
            s += ", \nnegations: {}".format(self.negations)
        if self.answer_annotations:
            s += ", \nanswer_annotations: {}".format(self.answer_annotations)
        return s 
開發者ID:huminghao16,項目名稱:MTMSN,代碼行數:28,代碼來源:drop_utils.py

示例7: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.start_position:
      s += ", start_position: %d" % (self.start_position)
    if self.start_position:
      s += ", end_position: %d" % (self.end_position)
    if self.start_position:
      s += ", is_impossible: %r" % (self.is_impossible)
    return s 
開發者ID:ZhangShiyue,項目名稱:QGforQA,代碼行數:15,代碼來源:test_squad.py

示例8: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
    s = ""
    s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
    s += ", question_text: %s" % (
        tokenization.printable_text(self.question_text))
    s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
    if self.label_id:
      s += ", membership label_id: %d" % (self.label_id)
    return s 
開發者ID:google-research,項目名稱:language,代碼行數:11,代碼來源:run_squad_membership.py

示例9: __str__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    for sent in self.tokens[0]:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids[0]]))
    s += "\n"
    return s 
開發者ID:google-research,項目名稱:language,代碼行數:10,代碼來源:preprocessing_utils.py

示例10: __str__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __str__(self):
    s = ""
    for sent in self.tokens:
      s += "tokens: %s\n" % (" ".join(
          [tokenization.printable_text(x) for x in sent]))
    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
    s += "\n"
    return s 
開發者ID:google-research,項目名稱:language,代碼行數:10,代碼來源:preprocessing_utils.py

示例11: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        # s += "example_id: %s" % (tokenization.printable_text(self.example_id))
        s += ", sent_tokens: [%s]" % (" ".join(self.sent_tokens))
        if self.term_texts:
            s += ", term_texts: {}".format(self.term_texts)
        # if self.start_positions:
        #     s += ", start_positions: {}".format(self.start_positions)
        # if self.end_positions:
        #     s += ", end_positions: {}".format(self.end_positions)
        if self.polarities:
            s += ", polarities: {}".format(self.polarities)
        return s 
開發者ID:huminghao16,項目名稱:SpanABSA,代碼行數:15,代碼來源:utils.py

示例12: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        if self.start_position:
            s += ", is_impossible: %r" % (self.is_impossible)
        return s 
開發者ID:IBM,項目名稱:MAX-Question-Answering,代碼行數:15,代碼來源:run_squad.py

示例13: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += "doc_index: %d" % (self.doc_index)
        s += "para_index: %d" % (self.para_index)
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.answer_texts is not None:
            s += ", answer_texts: ".format(self.answer_texts)
        return s 
開發者ID:huminghao16,項目名稱:RE3QA,代碼行數:12,代碼來源:squad_open_utils.py

示例14: __repr__

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def __repr__(self):
        s = ""
        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
        s += ", question_text: %s" % (
            tokenization.printable_text(self.question_text))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
        if self.start_position:
            s += ", end_position: %d" % (self.end_position)
        return s 
開發者ID:huminghao16,項目名稱:RE3QA,代碼行數:12,代碼來源:squad_document_utils.py

示例15: convert_examples_to_features

# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import printable_text [as 別名]
def convert_examples_to_features(examples,label_list, max_seq_length,tokenizer):
    """
    將所有的InputExamples樣本數據轉化成模型要輸入的token形式,最後輸出bert模型需要的四個變量;
    input_ids:就是text_a(分類文本)在詞庫對應的token,按字符級;
    input_mask:bert模型mask訓練的標記,都為1;
    segment_ids:句子標記,此場景隻有text_a,都為0;
    label_ids:文本標簽對應的token,不是one_hot的形式;
    """
    label_map = {}
    for (i, label) in enumerate(label_list):
        label_map[label] = i

    input_data=[]
    for (ex_index, example) in enumerate(examples):
        tokens_a = tokenizer.tokenize(example.text_a)
        if ex_index % 10000 == 0:
            tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

        if len(tokens_a) > max_seq_length - 2:
            tokens_a = tokens_a[0:(max_seq_length - 2)]

        tokens = []
        segment_ids = []
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in tokens_a:
            tokens.append(token)
            segment_ids.append(0)
        tokens.append("[SEP]")
        segment_ids.append(0)
        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        input_mask = [1] * len(input_ids)

        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)
        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        label_id = label_map[example.label]
        if ex_index < 3:
            tf.logging.info("*** Example ***")
            tf.logging.info("guid: %s" % (example.guid))
            tf.logging.info("tokens: %s" % " ".join([tokenization.printable_text(x) for x in tokens]))
            tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
            tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
            tf.logging.info("label: %s (id = %d)" % (example.label, label_id))

        features = collections.OrderedDict()
        features["input_ids"] = input_ids
        features["input_mask"] = input_mask
        features["segment_ids"] = segment_ids
        features["label_ids"] =label_id
        input_data.append(features)

    return input_data 
開發者ID:cjymz886,項目名稱:text_bert_cnn,代碼行數:62,代碼來源:loader.py


注:本文中的bert.tokenization.printable_text方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。