当前位置: 首页>>代码示例>>Python>>正文


Python spacy.load函数代码示例

本文整理汇总了Python中spacy.load函数的典型用法代码示例。如果您正苦于以下问题:Python load函数的具体用法?Python load怎么用?Python load使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了load函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_spacy_model

def get_spacy_model(spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool) -> SpacyModelType:
    """
    In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
    keyed by the options we used to create the spacy model, so any particular configuration only
    gets loaded once.
    """

    options = (spacy_model_name, pos_tags, parse, ner)
    if options not in LOADED_SPACY_MODELS:
        disable = ['vectors', 'textcat']
        if not pos_tags:
            disable.append('tagger')
        if not parse:
            disable.append('parser')
        if not ner:
            disable.append('ner')
        try:
            spacy_model = spacy.load(spacy_model_name, disable=disable)
        except OSError:
            logger.warning(f"Spacy models '{spacy_model_name}' not found.  Downloading and installing.")
            spacy_download(spacy_model_name)
            spacy_model = spacy.load(spacy_model_name, disable=disable)

        LOADED_SPACY_MODELS[options] = spacy_model
    return LOADED_SPACY_MODELS[options]
开发者ID:pyknife,项目名称:allennlp,代码行数:25,代码来源:util.py

示例2: main

def main(model=None, new_model_name='animal', output_dir=None, n_iter=20):
    """Set up the pipeline and entity recognizer, and train the new entity."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank('en')  # create blank Language class
        print("Created blank 'en' model")
    # Add entity recognizer to model if it's not in the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner)
    # otherwise, get it, so we can add labels to it
    else:
        ner = nlp.get_pipe('ner')

    ner.add_label(LABEL)   # add new entity label to entity recognizer
    if model is None:
        optimizer = nlp.begin_training()
    else:
        # Note that 'begin_training' initializes the models, so it'll zero out
        # existing entity types.
        optimizer = nlp.entity.create_optimizer()



    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                nlp.update([text], [annotations], sgd=optimizer, drop=0.35,
                           losses=losses)
            print(losses)

    # test the trained model
    test_text = 'Do you like horses?'
    doc = nlp(test_text)
    print("Entities in '%s'" % test_text)
    for ent in doc.ents:
        print(ent.label_, ent.text)

    # save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.meta['name'] = new_model_name  # rename model
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

        # test the saved model
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        doc2 = nlp2(test_text)
        for ent in doc2.ents:
            print(ent.label_, ent.text)
开发者ID:limin42,项目名称:spaCy,代码行数:60,代码来源:train_new_entity_type.py

示例3: get_nlp

def get_nlp(lang="en"):
    """Load spaCy model for a given language, determined by `models' dict or by MODEL_ENV_VAR"""
    instance = nlp.get(lang)
    if instance is None:
        import spacy
        model = models.get(lang)
        if not model:
            models[lang] = model = os.environ.get("_".join((MODEL_ENV_VAR, lang.upper()))) or \
                                   os.environ.get(MODEL_ENV_VAR) or DEFAULT_MODEL.get(lang, "xx")
        started = time.time()
        with external_write_mode():
            print("Loading spaCy model '%s'... " % model, end="", flush=True)
            try:
                nlp[lang] = instance = spacy.load(model)
            except OSError:
                spacy.cli.download(model)
                try:
                    nlp[lang] = instance = spacy.load(model)
                except OSError as e:
                    raise OSError("Failed to get spaCy model. Download it manually using "
                                  "`python -m spacy download %s`." % model) from e
            tokenizer[lang] = instance.tokenizer
            instance.tokenizer = lambda words: spacy.tokens.Doc(instance.vocab, words=words)
            print("Done (%.3fs)." % (time.time() - started))
    return instance
开发者ID:danielhers,项目名称:ucca,代码行数:25,代码来源:textutil.py

示例4: main

def main(model=None, output_dir=None, n_iter=15):
    """Load the model, set up the pipeline and train the parser."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # add the parser to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "parser" not in nlp.pipe_names:
        parser = nlp.create_pipe("parser")
        nlp.add_pipe(parser, first=True)
    # otherwise, get it, so we can add labels to it
    else:
        parser = nlp.get_pipe("parser")

    # add labels to the parser
    for _, annotations in TRAIN_DATA:
        for dep in annotations.get("deps", []):
            parser.add_label(dep)

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
    with nlp.disable_pipes(*other_pipes):  # only train parser
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            # batch up the examples using spaCy's minibatch
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, losses=losses)
            print("Losses", losses)

    # test the trained model
    test_text = "I like securities."
    doc = nlp(test_text)
    print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])

    # save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

        # test the saved model
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        doc = nlp2(test_text)
        print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
开发者ID:spacy-io,项目名称:spaCy,代码行数:55,代码来源:train_parser.py

示例5: __init__

 def __init__(self, model='en', disable=None):
     if disable is None:
         disable = []
     try:
         self._parser = spacy.load(model, disable=disable)
     except OSError:
         url = 'https://spacy.io/models'
         if license_prompt('Spacy {} model'.format(model), url) is False:
             sys.exit(0)
         spacy_download(model)
         self._parser = spacy.load(model, disable=disable)
开发者ID:cdj0311,项目名称:nlp-architect,代码行数:11,代码来源:text.py

示例6: get_nlp

def get_nlp():
    if nlp.instance is None:
        import spacy
        model_name = os.environ.get("SPACY_MODEL", "en")
        nlp.instance = spacy.load(model_name)
        if nlp.instance.tagger is None:  # Model not really loaded
            spacy.cli.download(model_name)
            nlp.instance = spacy.load(model_name)
            assert nlp.instance.tagger, "Failed to get spaCy model. " \
                                        "Download it manually using `python -m spacy download %s`." % model_name
        nlp.tokenizer = nlp.instance.tokenizer
        nlp.instance.tokenizer = nlp.tokenizer.tokens_from_list
    return nlp.instance
开发者ID:aiedward,项目名称:nn4nlp-code,代码行数:13,代码来源:textutil.py

示例7: get_ents

def get_ents():
    data = flask.request.args.get('fragment')
    is_custom = flask.request.args.get('custom')
    if is_custom is not None:
        nlp = spacy.load(Path('./gina_haspel'))
    else:
        nlp = spacy.load('en')
    doc = nlp(data)
    print(doc)
    tuples = [(str(x), x.label_)
              for x
              in doc.ents]
    return  flask.jsonify(dict(tuples))
开发者ID:kognate,项目名称:presentations,代码行数:13,代码来源:app.py

示例8: _spacy_en

def _spacy_en():
    yield None
    try:
        spacyen = spacy.load('en_default', via=data_path)
    except RuntimeError as e:
        if e.message == "Model not installed. Please run 'python -m spacy.en.download' to install latest compatible model.":
            print("Need to download Spacy data. Starting download now")
            sputnik.install('spacy', spacy.about.__version__,
                            'en_default', data_path=data_path)
            spacyen = spacy.load('en_default', via=data_path)
        else:
            raise
    while True:
        yield spacyen
开发者ID:TheGadflyProject,项目名称:TheGadflyProject,代码行数:14,代码来源:spacy_singleton.py

示例9: main

def main(model=None, output_dir=None, n_iter=15):
    """Load the model, set up the pipeline and train the parser."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    # We'll use the built-in dependency parser class, but we want to create a
    # fresh instance – just in case.
    if "parser" in nlp.pipe_names:
        nlp.remove_pipe("parser")
    parser = nlp.create_pipe("parser")
    nlp.add_pipe(parser, first=True)

    for text, annotations in TRAIN_DATA:
        for dep in annotations.get("deps", []):
            parser.add_label(dep)

    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"]
    with nlp.disable_pipes(*other_pipes):  # only train parser
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(TRAIN_DATA)
            losses = {}
            # batch up the examples using spaCy's minibatch
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, losses=losses)
            print("Losses", losses)

    # test the trained model
    test_model(nlp)

    # save model to output directory
    if output_dir is not None:
        output_dir = Path(output_dir)
        if not output_dir.exists():
            output_dir.mkdir()
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

        # test the saved model
        print("Loading from", output_dir)
        nlp2 = spacy.load(output_dir)
        test_model(nlp2)
开发者ID:spacy-io,项目名称:spaCy,代码行数:48,代码来源:train_intent_parser.py

示例10: test_not_lemmatize_base_forms

def test_not_lemmatize_base_forms():
    nlp = spacy.load('en', parser=False)
    doc = nlp(u"Don't feed the dog")
    feed = doc[2]
    feed.tag_ = u'VB'
    assert feed.text == u'feed'
    assert feed.lemma_ == u'feed'
开发者ID:adamhadani,项目名称:spaCy,代码行数:7,代码来源:test_issue595.py

示例11: __init__

 def __init__(self,lang='en'):
     try:
         import spacy
     except:
         raise Exception("spacy not installed. Use `pip install spacy`.")
     super(SpaCy, self).__init__(name="spaCy")
     self.model = spacy.load('en')
开发者ID:dossanbekzhan,项目名称:snorkel,代码行数:7,代码来源:spacy.py

示例12: main

def main(model_dir=None):
    nlp = spacy.load('en', parser=False, entity=False, add_vectors=False)

    # v1.1.2 onwards
    if nlp.tagger is None:
        print('---- WARNING ----')
        print('Data directory not found')
        print('please run: `python -m spacy.en.download --force all` for better performance')
        print('Using feature templates for tagging')
        print('-----------------')
        nlp.tagger = Tagger(nlp.vocab, features=Tagger.feature_templates)

    train_data = [
        (
            'Who is Shaka Khan?',
            [(len('Who is '), len('Who is Shaka Khan'), 'PERSON')]
        ),
        (
            'I like London and Berlin.',
            [(len('I like '), len('I like London'), 'LOC'),
            (len('I like London and '), len('I like London and Berlin'), 'LOC')]
        )
    ]
    ner = train_ner(nlp, train_data, ['PERSON', 'LOC'])

    doc = nlp.make_doc('Who is Shaka Khan?')
    nlp.tagger(doc)
    ner(doc)
    for word in doc:
        print(word.text, word.orth, word.lower, word.tag_, word.ent_type_, word.ent_iob)

    if model_dir is not None:
        save_model(ner, model_dir)
开发者ID:geovedi,项目名称:spaCy,代码行数:33,代码来源:train_ner.py

示例13: train

def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
    nlp = spacy.load("en_vectors_web_lg")
    assert nlp.path is not None
    print("Processing texts...")
    train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
    dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])

    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)

    print(settings)
    model.fit(
        train_X,
        train_labels,
        validation_data=(dev_X, dev_labels),
        epochs=settings["nr_epoch"],
        batch_size=settings["batch_size"],
    )
    if not (nlp.path / "similarity").exists():
        (nlp.path / "similarity").mkdir()
    print("Saving to", nlp.path / "similarity")
    weights = model.get_weights()
    # remove the embedding matrix.  We can reconstruct it.
    del weights[1]
    with (nlp.path / "similarity" / "model").open("wb") as file_:
        pickle.dump(weights, file_)
    with (nlp.path / "similarity" / "config.json").open("w") as file_:
        file_.write(model.to_json())
开发者ID:spacy-io,项目名称:spaCy,代码行数:32,代码来源:__main__.py

示例14: tokenizeText

def tokenizeText(sample,parser=spacy.load('en')):

    # get the tokens using spaCy
    tokens = parser(sample)

    # lemmatize
    lemmas = []
    for tok in tokens:
        lemmas.append(tok.lemma_.lower().strip()
                      if tok.lemma_ != "-PRON-" else tok.lower_)
    tokens = lemmas

    # stoplist the tokens
    tokens = [tok for tok in tokens if tok not in STOPLIST]

    # stoplist symbols
    tokens = [tok for tok in tokens if tok not in SYMBOLS]

    # remove large strings of whitespace
    while "" in tokens:
        tokens.remove("")
    while " " in tokens:
        tokens.remove(" ")
    while "\n" in tokens:
        tokens.remove("\n")
    while "\n\n" in tokens:
        tokens.remove("\n\n")

    return tokens
开发者ID:carltoews,项目名称:app_demo,代码行数:29,代码来源:poeml_utility.py

示例15: __init__

    def __init__(self, vdict_path, adict_path, \
        batchsize=128, max_length=15, n_ans_vocabulary=1000, mode='train', data_shape=(2048)):

        self.batchsize = batchsize
        self.d_vocabulary = None
        self.batch_index = None
        self.batch_len = None
        self.rev_adict = None
        self.max_length = max_length
        self.n_ans_vocabulary = n_ans_vocabulary
        self.mode = mode
        self.data_shape = data_shape

        assert self.mode == 'test'

        # load vocabulary
        with open(vdict_path,'r') as f:
            vdict = json.load(f)
        with open(adict_path,'r') as f:
            adict = json.load(f)
        self.n_vocabulary, self.vdict = len(vdict), vdict
        self.n_ans_vocabulary, self.adict = len(adict), adict

        self.nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        self.glove_dict = {} # word -> glove vector
开发者ID:ronghanghu,项目名称:vqa-mcb,代码行数:25,代码来源:vqa_data_provider_layer.py


注:本文中的spacy.load函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。