本文整理汇总了Python中spacy.cli方法的典型用法代码示例。如果您正苦于以下问题:Python spacy.cli方法的具体用法?Python spacy.cli怎么用?Python spacy.cli使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy
的用法示例。
在下文中一共展示了spacy.cli方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_lang_model
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import cli [as 别名]
def load_lang_model(lang: str, disable: List[str]):
"""Load spaCy language model or download if
model is available and not installed
Arguments:
lang {str} -- language
disable {List[str]} -- If only using tokenizer, can disable ['parser', 'ner', 'textcat']
Returns:
[type] -- [description]
"""
if 'coref' in lang:
try:
return spacy.load(lang, disable=disable) #
except Exception as e:
return SpacyAnnotator.load_lang_model(lang.split('_')[0], disable=disable)
try:
return spacy.load(lang, disable=disable)
except OSError:
logger.warning(f"Spacy models '{lang}' not found. Downloading and installing.")
spacy_download(lang)
# NOTE(mattg): The following four lines are a workaround suggested by Ines for spacy
# 2.1.0, which removed the linking that was done in spacy 2.0. importlib doesn't find
# packages that were installed in the same python session, so the way `spacy_download`
# works in 2.1.0 is broken for this use case. These four lines can probably be removed
# at some point in the future, once spacy has figured out a better way to handle this.
# See https://github.com/explosion/spaCy/issues/3435.
from spacy.cli import link
from spacy.util import get_package_path
package_path = get_package_path(lang)
link(lang, lang, model_path=package_path)
return spacy.load(lang, disable=disable)
示例2: load_nlp_pipeline
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import cli [as 别名]
def load_nlp_pipeline(language='xx'):
if language not in language_module_registry:
logger.error(
'Language {} is not supported.'
'Suported languages are: {}'.format(
language,
language_module_registry.keys()
))
raise ValueError
else:
spacy_module_name = language_module_registry[language]
global nlp_pipelines
if nlp_pipelines[language] is None:
logger.info('Loading NLP pipeline')
try:
import spacy
except ImportError:
logger.error(
' spacy is not installed. '
'In order to install all text feature dependencies run '
'pip install ludwig[text]'
)
sys.exit(-1)
try:
nlp_pipelines[language] = spacy.load(
spacy_module_name,
disable=['parser', 'tagger', 'ner']
)
except OSError:
logger.info(
' spaCy {} model is missing, downloading it '
'(this will only happen once)'
)
from spacy.cli import download
download(spacy_module_name)
nlp_pipelines[language] = spacy.load(
spacy_module_name,
disable=['parser', 'tagger', 'ner']
)
return nlp_pipelines[language]
示例3: _process_embeddings_for_spacy
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import cli [as 别名]
def _process_embeddings_for_spacy(tmp_file_path: str, meta_info: dict,
cache_dir: str = DEFAULT_CACHE_DIR,
clean_up_raw_data: bool = True,
verbose: bool = False):
"""
To use pretrained embeddings with spaCy the embeddings need to be stored in
a specific format. This function converts embeddings saved in the binary
word2vec format to a spaCy model with the init_model() function from
spaCy. The generated files will be saved in the cache_dir under a
folder called <pretrained_embedding>.spacy
More information on converting pretrained word embeddings to spaCy models here:
https://spacy.io/usage/vectors-similarity#custom
:param str tmp_file_path: the file name of the embedding binary file
:param str cache_dir: the directory for storing cached data
:param bool verbose:
"""
from pathlib import Path
from spacy.cli import init_model
embeddings = meta_info['name']
bin_file_path = os.path.join(cache_dir, embeddings + ".bin")
if not os.path.isfile(
bin_file_path): # Preprocess to transform to word2vec .bin format
_process_downloaded_embeddings(tmp_file_path, meta_info, cache_dir,
clean_up_raw_data, verbose)
vec_file = embeddings + ".vec"
word_vecs = KeyedVectors.load_word2vec_format(bin_file_path, binary=True,
encoding='utf8')
assert_wv_dimensions(word_vecs, embeddings)
word_vecs.save_word2vec_format(vec_file, binary=False)
spacy_dir = os.path.join(cache_dir, embeddings + '.spacy')
os.makedirs(spacy_dir, exist_ok=True)
if os.path.isabs(spacy_dir):
full_spacy_dir = Path(spacy_dir)
else:
full_spacy_dir = Path(os.path.join(os.getcwd(), spacy_dir))
init_model('da', full_spacy_dir, vectors_loc=vec_file)
os.remove(vec_file) # Clean up the vec file