本文整理汇总了Python中dictionary.Dictionary方法的典型用法代码示例。如果您正苦于以下问题:Python dictionary.Dictionary方法的具体用法?Python dictionary.Dictionary怎么用?Python dictionary.Dictionary使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dictionary
的用法示例。
在下文中一共展示了dictionary.Dictionary方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_rel_separated_dictionary
# 需要导入模块: import dictionary [as 别名]
# 或者: from dictionary import Dictionary [as 别名]
def load_rel_separated_dictionary(filename):
rel1_dict = Dictionary()
rel1_dict.add_unk_token()
rel1_dict.add_pad_token()
rel2_dict = Dictionary()
rel2_dict.add_unk_token()
rel2_dict.add_pad_token()
with open(filename) as f:
for line in f:
if not line:break
line = line.strip()
if not line:continue
line = line.split('.')
rel1 = '.'.join(line[:-1])
rel2 = line[-1]
rel1_dict.add(rel1)
rel2_dict.add(rel2)
return rel1_dict, rel2_dict
示例2: creat_word_rel_dict
# 需要导入模块: import dictionary [as 别名]
# 或者: from dictionary import Dictionary [as 别名]
def creat_word_rel_dict(r_file, *q_files):
word_dict = Dictionary()
word_dict.add_unk_token()
word_dict.add_pad_token()
word_dict.add_start_token()
for q_file in q_files:
qa_data = pickle.load(open(q_file, 'rb'))
for data in qa_data:
q = data.question
tokens = q.split(' ')
for token in tokens:
word_dict.add(token)
print(len(word_dict))
rels = pickle.load(open(r_file, 'rb'))
for rel in rels:
rel_word = []
w = rel[3:].split('.')
for i in w:
rel_word.extend(i.split('_'))
for word in rel_word:
word_dict.add(word)
print(len(word_dict))
return word_dict
示例3: get_postag_data
# 需要导入模块: import dictionary [as 别名]
# 或者: from dictionary import Dictionary [as 别名]
def get_postag_data(config, train_path, dev_path, vocab_path=None, label_path=None):
use_se_marker = config.use_se_marker
raw_train_sents = get_sentences(train_path, use_se_marker)
raw_dev_sents = get_sentences(dev_path, use_se_marker)
word_to_embeddings = get_pretrained_embeddings(WORD_EMBEDDINGS[config.word_embedding])
# Prepare word dictionary.
word_dict = Dictionary(unknown_token=UNKNOWN_TOKEN)
if use_se_marker:
word_dict.add_all([START_MARKER, END_MARKER])
if vocab_path != None:
with open(vocab_path, 'r') as f_vocab:
for line in f_vocab:
word_dict.add(line.strip())
f_vocab.close()
word_dict.accept_new = False
print 'Load {} words. Dictionary freezed.'.format(word_dict.size())
# Parpare label dictionary.
label_dict = Dictionary()
if label_path != None:
with open(label_path, 'r') as f_labels:
for line in f_labels:
label_dict.add(line.strip())
f_labels.close()
label_dict.set_unknown_token(UNKNOWN_LABEL)
label_dict.accept_new = False
print 'Load {} labels. Dictionary freezed.'.format(label_dict.size())
train_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings),
string_sequence_to_ids(sent[1], label_dict)) for sent in raw_train_sents]
dev_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings),
string_sequence_to_ids(sent[1], label_dict)) for sent in raw_dev_sents]
print("Extracted {} words and {} tags".format(word_dict.size(), label_dict.size()))
print("Max training sentence length: {}".format(max([len(s[0]) for s in train_sents])))
print("Max development sentence length: {}".format(max([len(s[0]) for s in dev_sents])))
word_embedding = [word_to_embeddings[w] for w in word_dict.idx2str]
word_embedding_shape = [len(word_embedding), len(word_embedding[0])]
return (train_sents, dev_sents, word_dict, label_dict, [word_embedding], [word_embedding_shape])
示例4: load_word_dictionary
# 需要导入模块: import dictionary [as 别名]
# 或者: from dictionary import Dictionary [as 别名]
def load_word_dictionary(filename, word_dict=None):
if word_dict is None:
word_dict = Dictionary()
word_dict.add_unk_token()
word_dict.add_pad_token()
with open(filename) as f:
for line in f:
if not line:break
line = line.strip()
if not line:continue
word_dict.add(line)
return word_dict