本文整理汇总了Python中spacy.load函数的典型用法代码示例。如果您正苦于以下问题:Python load函数的具体用法?Python load怎么用?Python load使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_spacy_model
def get_spacy_model(spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool) -> SpacyModelType:
"""
In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
keyed by the options we used to create the spacy model, so any particular configuration only
gets loaded once.
"""
options = (spacy_model_name, pos_tags, parse, ner)
if options not in LOADED_SPACY_MODELS:
disable = ['vectors', 'textcat']
if not pos_tags:
disable.append('tagger')
if not parse:
disable.append('parser')
if not ner:
disable.append('ner')
try:
spacy_model = spacy.load(spacy_model_name, disable=disable)
except OSError:
logger.warning(f"Spacy models '{spacy_model_name}' not found. Downloading and installing.")
spacy_download(spacy_model_name)
spacy_model = spacy.load(spacy_model_name, disable=disable)
LOADED_SPACY_MODELS[options] = spacy_model
return LOADED_SPACY_MODELS[options]
示例2: main
def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
"""Set up the pipeline and entity recognizer, and train the new entity."""
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
else:
nlp = spacy.blank('en') # create blank Language class
print("Created blank 'en' model")
# Add entity recognizer to model if it's not in the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'ner' not in nlp.pipe_names:
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner)
# otherwise, get it, so we can add labels to it
else:
ner = nlp.get_pipe('ner')
ner.add_label(LABEL) # add new entity label to entity recognizer
if model is None:
optimizer = nlp.begin_training()
else:
# Note that 'begin_training' initializes the models, so it'll zero out
# existing entity types.
optimizer = nlp.entity.create_optimizer()
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes): # only train NER
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
losses=losses)
print(losses)
# test the trained model
test_text = 'Do you like horses?'
doc = nlp(test_text)
print("Entities in '%s'" % test_text)
for ent in doc.ents:
print(ent.label_, ent.text)
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.meta['name'] = new_model_name # rename model
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the saved model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
doc2 = nlp2(test_text)
for ent in doc2.ents:
print(ent.label_, ent.text)
示例3: get_nlp
def get_nlp(lang="en"):
"""Load spaCy model for a given language, determined by `models' dict or by MODEL_ENV_VAR"""
instance = nlp.get(lang)
if instance is None:
import spacy
model = models.get(lang)
if not model:
models[lang] = model = os.environ.get("_".join((MODEL_ENV_VAR, lang.upper()))) or \
os.environ.get(MODEL_ENV_VAR) or DEFAULT_MODEL.get(lang, "xx")
started = time.time()
with external_write_mode():
print("Loading spaCy model '%s'... " % model, end="", flush=True)
try:
nlp[lang] = instance = spacy.load(model)
except OSError:
spacy.cli.download(model)
try:
nlp[lang] = instance = spacy.load(model)
except OSError as e:
raise OSError("Failed to get spaCy model. Download it manually using "
"`python -m spacy download %s`." % model) from e
tokenizer[lang] = instance.tokenizer
instance.tokenizer = lambda words: spacy.tokens.Doc(instance.vocab, words=words)
print("Done (%.3fs)." % (time.time() - started))
return instance
示例4: main
def main(model=None, output_dir=None, n_iter=15):
"""Load the model, set up the pipeline and train the parser."""
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
else:
nlp = spacy.blank("en") # create blank Language class
print("Created blank 'en' model")
# add the parser to the pipeline if it doesn't exist
# nlp.create_pipe works for built-ins that are registered with spaCy
if "parser" not in nlp.pipe_names:
parser = nlp.create_pipe("parser")
nlp.add_pipe(parser, first=True)
# otherwise, get it, so we can add labels to it
else:
parser = nlp.get_pipe("parser")
# add labels to the parser
for _, annotations in TRAIN_DATA:
for dep in annotations.get("deps", []):
parser.add_label(dep)
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
with nlp.disable_pipes(*other_pipes): # only train parser
optimizer = nlp.begin_training()
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print("Losses", losses)
# test the trained model
test_text = "I like securities."
doc = nlp(test_text)
print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the saved model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
doc = nlp2(test_text)
print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
示例5: __init__
def __init__(self, model='en', disable=None):
if disable is None:
disable = []
try:
self._parser = spacy.load(model, disable=disable)
except OSError:
url = 'https://spacy.io/models'
if license_prompt('Spacy {} model'.format(model), url) is False:
sys.exit(0)
spacy_download(model)
self._parser = spacy.load(model, disable=disable)
示例6: get_nlp
def get_nlp():
if nlp.instance is None:
import spacy
model_name = os.environ.get("SPACY_MODEL", "en")
nlp.instance = spacy.load(model_name)
if nlp.instance.tagger is None: # Model not really loaded
spacy.cli.download(model_name)
nlp.instance = spacy.load(model_name)
assert nlp.instance.tagger, "Failed to get spaCy model. " \
"Download it manually using `python -m spacy download %s`." % model_name
nlp.tokenizer = nlp.instance.tokenizer
nlp.instance.tokenizer = nlp.tokenizer.tokens_from_list
return nlp.instance
示例7: get_ents
def get_ents():
data = flask.request.args.get('fragment')
is_custom = flask.request.args.get('custom')
if is_custom is not None:
nlp = spacy.load(Path('./gina_haspel'))
else:
nlp = spacy.load('en')
doc = nlp(data)
print(doc)
tuples = [(str(x), x.label_)
for x
in doc.ents]
return flask.jsonify(dict(tuples))
示例8: _spacy_en
def _spacy_en():
yield None
try:
spacyen = spacy.load('en_default', via=data_path)
except RuntimeError as e:
if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
print("Need to download Spacy data. Starting download now")
sputnik.install('spacy', spacy.about.__version__,
'en_default', data_path=data_path)
spacyen = spacy.load('en_default', via=data_path)
else:
raise
while True:
yield spacyen
示例9: main
def main(model=None, output_dir=None, n_iter=15):
"""Load the model, set up the pipeline and train the parser."""
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
else:
nlp = spacy.blank("en") # create blank Language class
print("Created blank 'en' model")
# We'll use the built-in dependency parser class, but we want to create a
# fresh instance – just in case.
if "parser" in nlp.pipe_names:
nlp.remove_pipe("parser")
parser = nlp.create_pipe("parser")
nlp.add_pipe(parser, first=True)
for text, annotations in TRAIN_DATA:
for dep in annotations.get("deps", []):
parser.add_label(dep)
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
with nlp.disable_pipes(*other_pipes): # only train parser
optimizer = nlp.begin_training()
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, losses=losses)
print("Losses", losses)
# test the trained model
test_model(nlp)
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the saved model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
test_model(nlp2)
示例10: test_not_lemmatize_base_forms
def test_not_lemmatize_base_forms():
nlp = spacy.load('en', parser=False)
doc = nlp(u"Don't feed the dog")
feed = doc[2]
feed.tag_ = u'VB'
assert feed.text == u'feed'
assert feed.lemma_ == u'feed'
示例11: __init__
def __init__(self,lang='en'):
try:
import spacy
except:
raise Exception("spacy not installed. Use `pip install spacy`.")
super(SpaCy, self).__init__(name="spaCy")
self.model = spacy.load('en')
示例12: main
def main(model_dir=None):
nlp = spacy.load('en', parser=False, entity=False, add_vectors=False)
# v1.1.2 onwards
if nlp.tagger is None:
print('---- WARNING ----')
print('Data directory not found')
print('please run: `python -m spacy.en.download --force all` for better performance')
print('Using feature templates for tagging')
print('-----------------')
nlp.tagger = Tagger(nlp.vocab, features=Tagger.feature_templates)
train_data = [
(
'Who is Shaka Khan?',
[(len('Who is '), len('Who is Shaka Khan'), 'PERSON')]
),
(
'I like London and Berlin.',
[(len('I like '), len('I like London'), 'LOC'),
(len('I like London and '), len('I like London and Berlin'), 'LOC')]
)
]
ner = train_ner(nlp, train_data, ['PERSON', 'LOC'])
doc = nlp.make_doc('Who is Shaka Khan?')
nlp.tagger(doc)
ner(doc)
for word in doc:
print(word.text, word.orth, word.lower, word.tag_, word.ent_type_, word.ent_iob)
if model_dir is not None:
save_model(ner, model_dir)
示例13: train
def train(train_loc, dev_loc, shape, settings):
train_texts1, train_texts2, train_labels = read_snli(train_loc)
dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
print("Loading spaCy")
nlp = spacy.load("en_vectors_web_lg")
assert nlp.path is not None
print("Processing texts...")
train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])
print("Compiling network")
model = build_model(get_embeddings(nlp.vocab), shape, settings)
print(settings)
model.fit(
train_X,
train_labels,
validation_data=(dev_X, dev_labels),
epochs=settings["nr_epoch"],
batch_size=settings["batch_size"],
)
if not (nlp.path / "similarity").exists():
(nlp.path / "similarity").mkdir()
print("Saving to", nlp.path / "similarity")
weights = model.get_weights()
# remove the embedding matrix. We can reconstruct it.
del weights[1]
with (nlp.path / "similarity" / "model").open("wb") as file_:
pickle.dump(weights, file_)
with (nlp.path / "similarity" / "config.json").open("w") as file_:
file_.write(model.to_json())
示例14: tokenizeText
def tokenizeText(sample,parser=spacy.load('en')):
# get the tokens using spaCy
tokens = parser(sample)
# lemmatize
lemmas = []
for tok in tokens:
lemmas.append(tok.lemma_.lower().strip()
if tok.lemma_ != "-PRON-" else tok.lower_)
tokens = lemmas
# stoplist the tokens
tokens = [tok for tok in tokens if tok not in STOPLIST]
# stoplist symbols
tokens = [tok for tok in tokens if tok not in SYMBOLS]
# remove large strings of whitespace
while "" in tokens:
tokens.remove("")
while " " in tokens:
tokens.remove(" ")
while "\n" in tokens:
tokens.remove("\n")
while "\n\n" in tokens:
tokens.remove("\n\n")
return tokens
示例15: __init__
def __init__(self, vdict_path, adict_path, \
batchsize=128, max_length=15, n_ans_vocabulary=1000, mode='train', data_shape=(2048)):
self.batchsize = batchsize
self.d_vocabulary = None
self.batch_index = None
self.batch_len = None
self.rev_adict = None
self.max_length = max_length
self.n_ans_vocabulary = n_ans_vocabulary
self.mode = mode
self.data_shape = data_shape
assert self.mode == 'test'
# load vocabulary
with open(vdict_path,'r') as f:
vdict = json.load(f)
with open(adict_path,'r') as f:
adict = json.load(f)
self.n_vocabulary, self.vdict = len(vdict), vdict
self.n_ans_vocabulary, self.adict = len(adict), adict
self.nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
self.glove_dict = {} # word -> glove vector