本文整理汇总了Python中nltk.tokenize.treebank.TreebankWordDetokenizer方法的典型用法代码示例。如果您正苦于以下问题:Python treebank.TreebankWordDetokenizer方法的具体用法?Python treebank.TreebankWordDetokenizer怎么用?Python treebank.TreebankWordDetokenizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tokenize.treebank
的用法示例。
在下文中一共展示了treebank.TreebankWordDetokenizer方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from nltk.tokenize import treebank [as 别名]
# 或者: from nltk.tokenize.treebank import TreebankWordDetokenizer [as 别名]
def __init__(self, *args, **kwargs):
if 'tokenize' in kwargs:
raise TypeError('``TreebankEncoder`` does not take keyword argument ``tokenize``.')
if 'detokenize' in kwargs:
raise TypeError('``TreebankEncoder`` does not take keyword argument ``detokenize``.')
try:
import nltk
# Required for moses
nltk.download('perluniprops')
nltk.download('nonbreaking_prefixes')
from nltk.tokenize.treebank import TreebankWordTokenizer
from nltk.tokenize.treebank import TreebankWordDetokenizer
except ImportError:
print("Please install NLTK. " "See the docs at http://nltk.org for more information.")
raise
super().__init__(
*args,
tokenize=TreebankWordTokenizer().tokenize,
detokenize=TreebankWordDetokenizer().detokenize,
**kwargs)
示例2: __init__
# 需要导入模块: from nltk.tokenize import treebank [as 别名]
# 或者: from nltk.tokenize.treebank import TreebankWordDetokenizer [as 别名]
def __init__(self, config, train_data_loader, eval_data_loader, vocab, is_train=True, model=None):
self.config = config
self.epoch_i = 0
self.train_data_loader = train_data_loader
self.eval_data_loader = eval_data_loader
self.vocab = vocab
self.is_train = is_train
self.model = model
self.detokenizer = Detok()
if config.emotion or config.infersent or config.context_input_only:
self.botmoji = Botmoji()
self.botsent = Botsent(config.dataset_dir.joinpath('train'), version=1, explained_var=0.95)
# Info for saving epoch metrics to a csv file
if self.config.mode == 'train':
self.pandas_path = os.path.join(config.save_path, "metrics.csv")
self.outfile_dict = {k: getattr(config, k) for k in OUTPUT_FILE_PARAMS}
self.df = pd.DataFrame()
self.save_priming_sentences()
示例3: get_detokenize
# 需要导入模块: from nltk.tokenize import treebank [as 别名]
# 或者: from nltk.tokenize.treebank import TreebankWordDetokenizer [as 别名]
def get_detokenize():
return lambda x: TreebankWordDetokenizer().detokenize(x)
示例4: get_dekenize
# 需要导入模块: from nltk.tokenize import treebank [as 别名]
# 或者: from nltk.tokenize.treebank import TreebankWordDetokenizer [as 别名]
def get_dekenize():
return lambda x: TreebankWordDetokenizer().detokenize(x)
示例5: detokenize
# 需要导入模块: from nltk.tokenize import treebank [as 别名]
# 或者: from nltk.tokenize.treebank import TreebankWordDetokenizer [as 别名]
def detokenize(line):
"""
Detokenizes the processed CNN/DM dataset to recover the original dataset,
e.g. converts "-LRB-" back to "(" and "-RRB-" back to ")".
"""
line = line.strip().replace("``", '"').replace("''", '"').replace("`", "'")
twd = TreebankWordDetokenizer()
s_list = [
twd.detokenize(x.strip().split(" "), convert_parentheses=True)
for x in line.split("<S_SEP>")
]
return " ".join(s_list)