本文整理汇总了Python中spacy.util.minibatch方法的典型用法代码示例。如果您正苦于以下问题:Python util.minibatch方法的具体用法?Python util.minibatch怎么用?Python util.minibatch使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.util
的用法示例。
在下文中一共展示了util.minibatch方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _train_model
# 需要导入模块: from spacy import util [as 别名]
# 或者: from spacy.util import minibatch [as 别名]
def _train_model(nlp, train_data, n_iter=5):
optimizer = nlp.begin_training()
batch_sizes = compounding(4.0, 32.0, 1.001)
for _ in range(n_iter):
losses = {}
random.shuffle(train_data)
batches = minibatch(train_data, size=batch_sizes)
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
示例2: pipeline
# 需要导入模块: from spacy import util [as 别名]
# 或者: from spacy.util import minibatch [as 别名]
def pipeline():
"""Create a new model, set up the pipeline and train the tagger. In order to
train the tagger with a custom tag map, we're creating a new Language
instance with a custom vocab.
"""
args = get_args()
print(args)
curr_dir = path.dirname(path.abspath(__file__))
lang = args.language
print(lang)
output_dir = path.join(path.dirname(path.abspath(__file__)), "outputs")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
model_path = "%s/spacymodels/%s/%s.model" % (curr_dir, args.language, args.tag_type)
data_path = "%s/data/train/%s/train.%s.conll" % (curr_dir, args.language, args.encoding)
file = open(data_path, "r")
TRAIN_DATA= spacy_data_reader.spacy_load_data(data_path)
nlp = spacy.blank(lang)
# add the tagger to the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
tagger = nlp.create_pipe('tagger')
# Add the tags. This needs to be done before you start training.
for tag, values in TAG_MAP.items():
tagger.add_label(tag, values)
nlp.add_pipe(tagger)
optimizer = nlp.begin_training()
for i in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4., 32., 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print('Losses', losses)
# test the trained model
test_text = "నా నా కధ అందరి అందరి ఆడపిల్లల కధే ."
doc = nlp(test_text)
print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the save model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
doc = nlp2(test_text)
print('Tags', [(t.text, t.tag_, t.pos_) for t in doc])