本文整理汇总了Python中bert.tokenization.printable_text方法的典型用法代码示例。如果您正苦于以下问题:Python tokenization.printable_text方法的具体用法?Python tokenization.printable_text怎么用?Python tokenization.printable_text使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bert.tokenization
的用法示例。
在下文中一共展示了tokenization.printable_text方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "document_id: %s" % (self.document_id)
s += ", qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: %s ..." % (" ".join(self.doc_tokens[:20]))
s += ", length of doc_tokens: %d" % (len(self.doc_tokens))
if self.orig_answer_texts:
s += ", orig_answer_texts: {}".format(self.orig_answer_texts)
if self.start_positions and self.end_positions:
s += ", start_positions: {}".format(self.start_positions)
s += ", end_positions: {}".format(self.end_positions)
s += ", token_answer: "
for start, end in zip(self.start_positions, self.end_positions):
s += "{}, ".format(" ".join(self.doc_tokens[start:(end+1)]))
return s
示例2: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "id: %s" % (self.qid)
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_positions: %s" % (self.start_position)
if self.start_position:
s += ", end_positions: %s" % (self.end_position)
return s
示例3: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "id: %s" % (self.qid)
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
s += ", answer_text: %s" % (self.orig_answer_text)
return s
示例4: __str__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
s = ""
s += "tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.tokens]))
s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
s += "is_random_next: %s\n" % self.is_random_next
s += "masked_lm_positions: %s\n" % (" ".join(
[str(x) for x in self.masked_lm_positions]))
s += "masked_lm_labels: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.masked_lm_labels]))
s += "\n"
return s
示例5: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.start_position:
s += ", end_position: %d" % (self.end_position)
return s
示例6: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", \nquestion: %s" % (" ".join(self.question_tokens))
s += ", \npassage: %s" % (" ".join(self.passage_tokens))
if self.numbers_in_passage:
s += ", \nnumbers_in_passage: {}".format(self.numbers_in_passage)
if self.number_indices:
s += ", \nnumber_indices: {}".format(self.number_indices)
if self.answer_type:
s += ", \nanswer_type: {}".format(self.answer_type)
if self.number_of_answer:
s += ", \nnumber_of_answer: {}".format(self.number_of_answer)
if self.passage_spans:
s += ", \npassage_spans: {}".format(self.passage_spans)
if self.question_spans:
s += ", \nquestion_spans: {}".format(self.question_spans)
if self.add_sub_expressions:
s += ", \nadd_sub_expressions: {}".format(self.add_sub_expressions)
if self.counts:
s += ", \ncounts: {}".format(self.counts)
if self.negations:
s += ", \nnegations: {}".format(self.negations)
if self.answer_annotations:
s += ", \nanswer_annotations: {}".format(self.answer_annotations)
return s
示例7: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.start_position:
s += ", end_position: %d" % (self.end_position)
if self.start_position:
s += ", is_impossible: %r" % (self.is_impossible)
return s
示例8: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.label_id:
s += ", membership label_id: %d" % (self.label_id)
return s
示例9: __str__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
s = ""
for sent in self.tokens[0]:
s += "tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in sent]))
s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids[0]]))
s += "\n"
return s
示例10: __str__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __str__(self):
s = ""
for sent in self.tokens:
s += "tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in sent]))
s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
s += "\n"
return s
示例11: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
# s += "example_id: %s" % (tokenization.printable_text(self.example_id))
s += ", sent_tokens: [%s]" % (" ".join(self.sent_tokens))
if self.term_texts:
s += ", term_texts: {}".format(self.term_texts)
# if self.start_positions:
# s += ", start_positions: {}".format(self.start_positions)
# if self.end_positions:
# s += ", end_positions: {}".format(self.end_positions)
if self.polarities:
s += ", polarities: {}".format(self.polarities)
return s
示例12: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.start_position:
s += ", end_position: %d" % (self.end_position)
if self.start_position:
s += ", is_impossible: %r" % (self.is_impossible)
return s
示例13: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += "doc_index: %d" % (self.doc_index)
s += "para_index: %d" % (self.para_index)
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
if self.answer_texts is not None:
s += ", answer_texts: ".format(self.answer_texts)
return s
示例14: __repr__
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (
tokenization.printable_text(self.question_text))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.start_position:
s += ", end_position: %d" % (self.end_position)
return s
示例15: convert_examples_to_features
# 需要导入模块: from bert import tokenization [as 别名]
# 或者: from bert.tokenization import printable_text [as 别名]
def convert_examples_to_features(examples,label_list, max_seq_length,tokenizer):
"""
将所有的InputExamples样本数据转化成模型要输入的token形式,最后输出bert模型需要的四个变量;
input_ids:就是text_a(分类文本)在词库对应的token,按字符级;
input_mask:bert模型mask训练的标记,都为1;
segment_ids:句子标记,此场景只有text_a,都为0;
label_ids:文本标签对应的token,不是one_hot的形式;
"""
label_map = {}
for (i, label) in enumerate(label_list):
label_map[label] = i
input_data=[]
for (ex_index, example) in enumerate(examples):
tokens_a = tokenizer.tokenize(example.text_a)
if ex_index % 10000 == 0:
tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0:(max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
input_mask = [1] * len(input_ids)
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
label_id = label_map[example.label]
if ex_index < 3:
tf.logging.info("*** Example ***")
tf.logging.info("guid: %s" % (example.guid))
tf.logging.info("tokens: %s" % " ".join([tokenization.printable_text(x) for x in tokens]))
tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
tf.logging.info("label: %s (id = %d)" % (example.label, label_id))
features = collections.OrderedDict()
features["input_ids"] = input_ids
features["input_mask"] = input_mask
features["segment_ids"] = segment_ids
features["label_ids"] =label_id
input_data.append(features)
return input_data