本文整理汇总了Python中nltk.tokenize.moses.MosesTokenizer方法的典型用法代码示例。如果您正苦于以下问题:Python moses.MosesTokenizer方法的具体用法?Python moses.MosesTokenizer怎么用?Python moses.MosesTokenizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tokenize.moses
的用法示例。
在下文中一共展示了moses.MosesTokenizer方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from nltk.tokenize import moses [as 别名]
# 或者: from nltk.tokenize.moses import MosesTokenizer [as 别名]
def __init__(self):
try:
from nltk.tokenize.moses import MosesTokenizer
except Exception as ex:
import nltk
nltk.download('perluniprops')
nltk.download('nonbreaking_prefixes')
self.tokenizer = MosesTokenizer()
示例2: get_tokenizer
# 需要导入模块: from nltk.tokenize import moses [as 别名]
# 或者: from nltk.tokenize.moses import MosesTokenizer [as 别名]
def get_tokenizer(tokenizer, decap=False):
if callable(tokenizer):
return tokenizer
if tokenizer == "spacy":
try:
import spacy
spacy_en = spacy.load('en')
return lambda s: [tok.text for tok in spacy_en.tokenizer(s)]
except ImportError:
print("Please install SpaCy and the SpaCy English tokenizer. "
"See the docs at https://spacy.io for more information.")
raise
except AttributeError:
print("Please install SpaCy and the SpaCy English tokenizer. "
"See the docs at https://spacy.io for more information.")
raise
elif tokenizer == "moses":
try:
from nltk.tokenize.moses import MosesTokenizer
moses_tokenizer = MosesTokenizer()
return moses_tokenizer.tokenize
except ImportError:
print("Please install NLTK. "
"See the docs at http://nltk.org for more information.")
raise
except LookupError:
print("Please install the necessary NLTK corpora. "
"See the docs at http://nltk.org for more information.")
raise
elif tokenizer == 'revtok':
try:
import revtok
return revtok.tokenize
except ImportError:
print("Please install revtok.")
raise
elif tokenizer == 'subword':
try:
import revtok
return revtok.tokenize
except ImportError:
print("Please install revtok.")
raise
raise ValueError("Requested tokenizer {}, valid choices are a "
"callable that takes a single string as input, "
"\"revtok\" for the revtok reversible tokenizer, "
"\"subword\" for the revtok caps-aware tokenizer, "
"\"spacy\" for the SpaCy English tokenizer, or "
"\"moses\" for the NLTK port of the Moses tokenization "
"script.".format(tokenizer))
示例3: get_tokenizer
# 需要导入模块: from nltk.tokenize import moses [as 别名]
# 或者: from nltk.tokenize.moses import MosesTokenizer [as 别名]
def get_tokenizer(tokenizer):
if callable(tokenizer):
return tokenizer
if tokenizer == "spacy":
try:
import spacy
spacy_en = spacy.load('en')
return lambda s: [tok.text for tok in spacy_en.tokenizer(s)]
except ImportError:
print("Please install SpaCy and the SpaCy English tokenizer. "
"See the docs at https://spacy.io for more information.")
raise
except AttributeError:
print("Please install SpaCy and the SpaCy English tokenizer. "
"See the docs at https://spacy.io for more information.")
raise
elif tokenizer == "moses":
try:
from nltk.tokenize.moses import MosesTokenizer
moses_tokenizer = MosesTokenizer()
return moses_tokenizer.tokenize
except ImportError:
print("Please install NLTK. "
"See the docs at http://nltk.org for more information.")
raise
except LookupError:
print("Please install the necessary NLTK corpora. "
"See the docs at http://nltk.org for more information.")
raise
elif tokenizer == 'revtok':
try:
import revtok
return revtok.tokenize
except ImportError:
print("Please install revtok.")
raise
elif tokenizer == 'subword':
try:
import revtok
return lambda x: revtok.tokenize(x, decap=True)
except ImportError:
print("Please install revtok.")
raise
raise ValueError("Requested tokenizer {}, valid choices are a "
"callable that takes a single string as input, "
"\"revtok\" for the revtok reversible tokenizer, "
"\"subword\" for the revtok caps-aware tokenizer, "
"\"spacy\" for the SpaCy English tokenizer, or "
"\"moses\" for the NLTK port of the Moses tokenization "
"script.".format(tokenizer))