當前位置: 首頁>>代碼示例>>Python>>正文


Python spacy.cli方法代碼示例

本文整理匯總了Python中spacy.cli方法的典型用法代碼示例。如果您正苦於以下問題:Python spacy.cli方法的具體用法?Python spacy.cli怎麽用?Python spacy.cli使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在spacy的用法示例。


在下文中一共展示了spacy.cli方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: load_lang_model

# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import cli [as 別名]
def load_lang_model(lang: str, disable: List[str]):
        """Load spaCy language model or download if
            model is available and not installed
        
        Arguments:
            lang {str} -- language
            disable {List[str]} -- If only using tokenizer, can disable ['parser', 'ner', 'textcat']
        
        Returns:
            [type] -- [description]
        """
        if 'coref' in lang:
            try:
                return spacy.load(lang, disable=disable) # 
            except Exception as e:
                return SpacyAnnotator.load_lang_model(lang.split('_')[0], disable=disable)
        try:
            return spacy.load(lang, disable=disable)
        except OSError:
            logger.warning(f"Spacy models '{lang}' not found.  Downloading and installing.")
            spacy_download(lang)
            # NOTE(mattg): The following four lines are a workaround suggested by Ines for spacy
            # 2.1.0, which removed the linking that was done in spacy 2.0.  importlib doesn't find
            # packages that were installed in the same python session, so the way `spacy_download`
            # works in 2.1.0 is broken for this use case.  These four lines can probably be removed
            # at some point in the future, once spacy has figured out a better way to handle this.
            # See https://github.com/explosion/spaCy/issues/3435.
            from spacy.cli import link
            from spacy.util import get_package_path
            package_path = get_package_path(lang)
            link(lang, lang, model_path=package_path)
            return spacy.load(lang, disable=disable) 
開發者ID:uwdata,項目名稱:errudite,代碼行數:34,代碼來源:spacy_annotator.py

示例2: load_nlp_pipeline

# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import cli [as 別名]
def load_nlp_pipeline(language='xx'):
    if language not in language_module_registry:
        logger.error(
            'Language {} is not supported.'
            'Suported languages are: {}'.format(
                language,
                language_module_registry.keys()
            ))
        raise ValueError
    else:
        spacy_module_name = language_module_registry[language]
    global nlp_pipelines
    if nlp_pipelines[language] is None:
        logger.info('Loading NLP pipeline')
        try:
            import spacy
        except ImportError:
            logger.error(
                ' spacy is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        try:
            nlp_pipelines[language] = spacy.load(
                spacy_module_name,
                disable=['parser', 'tagger', 'ner']
            )
        except OSError:
            logger.info(
                ' spaCy {} model is missing, downloading it '
                '(this will only happen once)'
            )
            from spacy.cli import download
            download(spacy_module_name)
            nlp_pipelines[language] = spacy.load(
                spacy_module_name,
                disable=['parser', 'tagger', 'ner']
            )

    return nlp_pipelines[language] 
開發者ID:uber,項目名稱:ludwig,代碼行數:44,代碼來源:nlp_utils.py

示例3: _process_embeddings_for_spacy

# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import cli [as 別名]
def _process_embeddings_for_spacy(tmp_file_path: str, meta_info: dict,
                                  cache_dir: str = DEFAULT_CACHE_DIR,
                                  clean_up_raw_data: bool = True,
                                  verbose: bool = False):
    """
    To use pretrained embeddings with spaCy the embeddings need to be stored in
    a specific format. This function converts embeddings saved in the binary
    word2vec format to a spaCy model with the init_model() function from
    spaCy. The generated files will be saved in the cache_dir under a
    folder called <pretrained_embedding>.spacy

    More information on converting pretrained word embeddings to spaCy models here:
    https://spacy.io/usage/vectors-similarity#custom

    :param str tmp_file_path: the file name of the embedding binary file
    :param str cache_dir: the directory for storing cached data
    :param bool verbose:
    """
    from pathlib import Path
    from spacy.cli import init_model

    embeddings = meta_info['name']

    bin_file_path = os.path.join(cache_dir, embeddings + ".bin")

    if not os.path.isfile(
            bin_file_path):  # Preprocess to transform to word2vec .bin format
        _process_downloaded_embeddings(tmp_file_path, meta_info, cache_dir,
                                       clean_up_raw_data, verbose)

    vec_file = embeddings + ".vec"

    word_vecs = KeyedVectors.load_word2vec_format(bin_file_path, binary=True,
                                                  encoding='utf8')
    assert_wv_dimensions(word_vecs, embeddings)
    word_vecs.save_word2vec_format(vec_file, binary=False)

    spacy_dir = os.path.join(cache_dir, embeddings + '.spacy')
    os.makedirs(spacy_dir, exist_ok=True)

    if os.path.isabs(spacy_dir):
        full_spacy_dir = Path(spacy_dir)
    else:
        full_spacy_dir = Path(os.path.join(os.getcwd(), spacy_dir))

    init_model('da', full_spacy_dir, vectors_loc=vec_file)

    os.remove(vec_file)  # Clean up the vec file 
開發者ID:alexandrainst,項目名稱:danlp,代碼行數:50,代碼來源:embeddings.py


注:本文中的spacy.cli方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。