本文整理匯總了Python中bert.tokenization.convert_to_unicode方法的典型用法代碼示例。如果您正苦於以下問題:Python tokenization.convert_to_unicode方法的具體用法?Python tokenization.convert_to_unicode怎麽用?Python tokenization.convert_to_unicode使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類bert.tokenization
的用法示例。
在下文中一共展示了tokenization.convert_to_unicode方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: read_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def read_examples(input_file):
"""Read a list of `InputExample`s from an input file."""
examples = []
unique_id = 0
with tf.gfile.GFile(input_file, "r") as reader:
while True:
line = tokenization.convert_to_unicode(reader.readline())
if not line:
break
line = line.strip()
text_a = None
text_b = None
m = re.match(r"^(.*) \|\|\| (.*)$", line)
if m is None:
text_a = line
else:
text_a = m.group(1)
text_b = m.group(2)
examples.append(
InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
unique_id += 1
return examples
示例2: read_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def read_examples(lst_strs):
"""Read a list of `InputExample`s from a list of strings."""
unique_id = 0
for ss in lst_strs:
line = tokenization.convert_to_unicode(ss)
if not line:
continue
line = line.strip()
text_a = None
text_b = None
m = re.match(r"^(.*) \|\|\| (.*)$", line)
if m is None:
text_a = line
else:
text_a = m.group(1)
text_b = m.group(2)
yield InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)
unique_id += 1
示例3: get_train_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def get_train_examples(self, data_dir):
"""See base class."""
lines = self._read_tsv(
os.path.join(data_dir, "multinli",
"multinli.train.%s.tsv" % self.language))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "train-%d" % (i)
text_a = tokenization.convert_to_unicode(line[0])
text_b = tokenization.convert_to_unicode(line[1])
label = tokenization.convert_to_unicode(line[2])
if label == tokenization.convert_to_unicode("contradictory"):
label = tokenization.convert_to_unicode("contradiction")
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例4: get_dev_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def get_dev_examples(self, data_dir):
"""See base class."""
lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "dev-%d" % (i)
language = tokenization.convert_to_unicode(line[0])
if language != tokenization.convert_to_unicode(self.language):
continue
text_a = tokenization.convert_to_unicode(line[6])
text_b = tokenization.convert_to_unicode(line[7])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例5: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
text_a = tokenization.convert_to_unicode(line[8])
text_b = tokenization.convert_to_unicode(line[9])
if set_type == "test":
label = "contradiction"
else:
label = tokenization.convert_to_unicode(line[-1])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例6: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
sentence_index = 0
for (i, line) in enumerate(lines):
if i == 0:
# Identify the sentence index
for j, token in enumerate(line):
if token.strip() == "sentence":
sentence_index = j
continue
guid = "%s-%s" % (set_type, i)
if set_type == "test":
text_a = tokenization.convert_to_unicode(line[sentence_index])
label = "true"
else:
text_a = tokenization.convert_to_unicode(line[sentence_index])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
return examples
示例7: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
sentence_index = 0
for (i, line) in enumerate(lines):
if i == 0:
# Identify the missing index
for j, token in enumerate(line):
if token.strip() == "sentence":
sentence_index = j
continue
guid = "%s-%s" % (set_type, i)
if set_type == "test":
text_a = tokenization.convert_to_unicode(line[sentence_index])
label = [1.0, 0]
else:
text_a = tokenization.convert_to_unicode(line[sentence_index])
label = [float(line[2]), float(line[3])]
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
return examples
示例8: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, i)
text_a = tokenization.convert_to_unicode(line[3])
text_b = tokenization.convert_to_unicode(line[4])
if set_type == "test":
label = "0"
else:
label = tokenization.convert_to_unicode(line[0])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例9: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
print("length of lines:", len(lines))
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, i)
try:
label = tokenization.convert_to_unicode(line[2])
text_a = tokenization.convert_to_unicode(line[0])
text_b = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
except Exception: # pylint: disable=broad-except
print("###error.i:", i, line)
return examples
示例10: get_train_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def get_train_examples(self, data_dir):
"""See base class."""
lines = self._read_tsv(
os.path.join(data_dir, "multinli",
"multinli.train.%s.tsv" % self.language))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "train-%d" % (i)
text_a = tokenization.convert_to_unicode(line[0])
text_b = tokenization.convert_to_unicode(line[1])
label = tokenization.convert_to_unicode(line[2])
if label == tokenization.convert_to_unicode("contradictory"):
label = tokenization.convert_to_unicode("contradiction")
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例11: get_dev_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def get_dev_examples(self, data_dir):
"""See base class."""
lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "dev-%d" % (i)
language = tokenization.convert_to_unicode(line[0])
if language != tokenization.convert_to_unicode(self.language):
continue
text_a = tokenization.convert_to_unicode(line[6])
text_b = tokenization.convert_to_unicode(line[7])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
示例12: _create_examples
# 需要導入模塊: from bert import tokenization [as 別名]
# 或者: from bert.tokenization import convert_to_unicode [as 別名]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, i)
text_a = tokenization.convert_to_unicode(line[3])
text_b = tokenization.convert_to_unicode(line[4])
if set_type == "test":
label = "0"
else:
label = tokenization.convert_to_unicode(line[0])
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples