本文整理汇总了Python中nltk.internals.java函数的典型用法代码示例。如果您正苦于以下问题:Python java函数的具体用法?Python java怎么用?Python java使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了java函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
def train(model_filename, featuresets, quiet=True):
# Make sure we can find java & weka.
config_weka()
# Build an ARFF formatter.
formatter = ARFF_Formatter.from_train(featuresets)
temp_dir = tempfile.mkdtemp()
try:
# Write the training data file.
train_filename = os.path.join(temp_dir, 'train.arff')
formatter.write(train_filename, featuresets)
# Train the weka model.
cmd = ['weka.classifiers.bayes.NaiveBayes',
'-d', model_filename, '-t', train_filename]
if quiet: stdout = subprocess.PIPE
else: stdout = None
java(cmd, classpath=_weka_classpath, stdout=stdout)
# Return the new classifier.
return WekaClassifier(formatter, model_filename)
finally:
for f in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f))
os.rmdir(temp_dir)
示例2: train
def train(cls, model_filename, featuresets,
classifier='naivebayes', options=[], quiet=True):
# Make sure we can find java & weka.
config_weka()
# Build an ARFF formatter.
formatter = ARFF_Formatter.from_train(featuresets)
temp_dir = tempfile.mkdtemp()
try:
# Write the training data file.
train_filename = os.path.join(temp_dir, 'train.arff')
formatter.write(train_filename, featuresets)
if classifier in cls._CLASSIFIER_CLASS:
javaclass = cls._CLASSIFIER_CLASS[classifier]
elif classifier in cls._CLASSIFIER_CLASS.values():
javaclass = classifier
else:
raise ValueError('Unknown classifier %s' % classifier)
# Train the weka model.
cmd = [javaclass, '-d', model_filename, '-t', train_filename]
cmd += list(options)
if quiet: stdout = subprocess.PIPE
else: stdout = None
java(cmd, classpath=_weka_classpath, stdout=stdout)
# Return the new classifier.
return WekaClassifier(formatter, model_filename)
finally:
for f in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f))
os.rmdir(temp_dir)
示例3: batch_tag
def batch_tag(self, sentences):
encoding = self._encoding
default_options = ' '.join(_java_options)
config_java(options=self.java_options, verbose=False)
# Create a temporary input file
_input_fh, self._input_file_path = tempfile.mkstemp(text=True)
if encoding:
self._cmd.extend(['-encoding', encoding])
# Write the actual sentences to the temporary input file
_input_fh = os.fdopen(_input_fh, 'w')
_input = '\n'.join((' '.join(x) for x in sentences))
if isinstance(_input, compat.text_type) and encoding:
_input = _input.encode(encoding)
_input_fh.write(_input)
_input_fh.close()
# Run the tagger and get the output
stanpos_output, _stderr = java(self._cmd,classpath=self._stanford_jar, \
stdout=PIPE, stderr=PIPE)
if encoding:
stanpos_output = stanpos_output.decode(encoding)
# Delete the temporary file
os.unlink(self._input_file_path)
# Return java configurations to their default values
config_java(options=default_options, verbose=False)
return self.parse_output(stanpos_output)
示例4: _batch_classify
def _batch_classify(self, featuresets, options):
# Make sure we can find java & weka.
config_weka()
temp_dir = tempfile.mkdtemp()
try:
# Write the test data file.
test_filename = os.path.join(temp_dir, 'test.arff')
self._formatter.write(test_filename, featuresets)
# Call weka to classify the data.
cmd = ['weka.classifiers.bayes.NaiveBayes',
'-l', self._model, '-T', test_filename] + options
(stdout, stderr) = java(cmd, classpath=_weka_classpath,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# Check if something went wrong:
if stderr and not stdout:
if 'Illegal options: -distribution' in stderr:
raise ValueError('The installed verison of weka does '
'not support probability distribution '
'output.')
else:
raise ValueError('Weka failed to generate output:\n%s'
% stderr)
# Parse weka's output.
return self.parse_weka_output(stdout.split('\n'))
finally:
for f in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f))
os.rmdir(temp_dir)
示例5: _execute
def _execute(self, cmd, input_, verbose=False):
encoding = self._encoding
cmd.extend(['-charset', encoding])
_options_cmd = self._options_cmd
if _options_cmd:
cmd.extend(['-options', self._options_cmd])
default_options = ' '.join(_java_options)
# Configure java.
config_java(options=self.java_options, verbose=verbose)
# Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
# Write the actual sentences to the temporary input file
if isinstance(input_, text_type) and encoding:
input_ = input_.encode(encoding)
input_file.write(input_)
input_file.flush()
cmd.append(input_file.name)
# Run the tagger and get the output.
stdout, stderr = java(cmd, classpath=self._stanford_jar,
stdout=PIPE, stderr=PIPE)
stdout = stdout.decode(encoding)
os.unlink(input_file.name)
# Return java configurations to their default values.
config_java(options=default_options, verbose=False)
return stdout
示例6: tag
def tag(self, text, options=['-mx2g']):
command = ['edu.stanford.nlp.tagger.maxent.MaxentTagger']
command.extend(['-model', self._model])
command.extend(['-outputFormat', 'xml'])
command.extend(['-outputFormatOptions', 'lemmatize'])
command.extend(options)
with tempfile.NamedTemporaryFile(mode='wb', delete=False) as text_file:
text_file.write(text.encode('utf-8'))
text_file.flush()
command.extend(['-textFile', text_file.name])
stderr = subprocess.DEVNULL if not self._verbose else None
stdout, _ = java(command, classpath=self._libs,
stderr=stderr, stdout=subprocess.PIPE)
output = stdout.decode('utf-8')
tagged = []
for line in output.splitlines():
match = self._xml_regex.fullmatch(line)
if match:
tagged.append((match.group(3), match.group(2), match.group(1)))
return tagged
示例7: _batch_classify
def _batch_classify(self, featuresets, options):
# Make sure we can find java & weka.
config_weka()
temp_dir = tempfile.mkdtemp()
try:
# Write the test data file.
test_filename = os.path.join(temp_dir, "test.arff")
self._formatter.write(test_filename, featuresets)
# Call weka to classify the data.
cmd = ["weka.classifiers.bayes.NaiveBayes", "-l", self._model, "-T", test_filename] + options
(stdout, stderr) = java(cmd, classpath=_weka_classpath, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Check if something went wrong:
if stderr and not stdout:
if "Illegal options: -distribution" in stderr:
raise ValueError(
"The installed version of weka does " "not support probability distribution " "output."
)
else:
raise ValueError("Weka failed to generate output:\n%s" % stderr)
# Parse weka's output.
return self.parse_weka_output(stdout.decode(stdin.encoding).split("\n"))
finally:
for f in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f))
os.rmdir(temp_dir)
示例8: _execute
def _execute(self, cmd, input_, verbose=False):
encoding = self._encoding
cmd.extend(['-encoding', encoding])
if self.corenlp_options:
cmd.append(self.corenlp_options)
default_options = ' '.join(_java_options)
# Configure java.
config_java(options=self.java_options, verbose=verbose)
# Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
# Write the actual sentences to the temporary input file
if isinstance(input_, text_type) and encoding:
input_ = input_.encode(encoding)
input_file.write(input_)
input_file.flush()
# Run the tagger and get the output.
if self._USE_STDIN:
input_file.seek(0)
stdout, stderr = java(
cmd,
classpath=self._classpath,
stdin=input_file,
stdout=PIPE,
stderr=PIPE,
)
else:
cmd.append(input_file.name)
stdout, stderr = java(
cmd, classpath=self._classpath, stdout=PIPE, stderr=PIPE
)
stdout = stdout.replace(b'\xc2\xa0', b' ')
stdout = stdout.replace(b'\x00\xa0', b' ')
stdout = stdout.decode(encoding)
os.unlink(input_file.name)
# Return java configurations to their default values.
config_java(options=default_options, verbose=False)
return stdout
示例9: detokenize
def detokenize(self, text, options=['-mx2g']):
command = ['edu.stanford.nlp.process.PTBTokenizer', '-untok']
command.extend(options)
stderr = subprocess.DEVNULL if not self._verbose else None
jproc = java(command, classpath=self._libs, blocking=False,
stderr=stderr, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, _ = jproc.communicate(text.encode('utf-8'))
output = stdout.decode('utf-8')
return output
示例10: call_mxpost
def call_mxpost(classpath=None, stdin=None, stdout=None, stderr=None,
blocking=False):
if not classpath:
config_mxpost()
if not classpath:
classpath = _mxpost_classpath
elif 'mxpost.jar' not in classpath:
classpath += ':%s' % _mxpost_classpath
cmd = ['tagger.TestTagger', '%s/%s' % (_mxpost_home, 'wsj-02-21.mxpost')]
return java(cmd, classpath, stdin, stdout, stderr, blocking)
示例11: _execute
def _execute(self, cmd, verbose=False):
encoding = self._encoding
#cmd.extend(['-inputEncoding', encoding])
_options_cmd = self._options_cmd
if _options_cmd:
cmd.extend(['-options', self._options_cmd])
default_options = ' '.join(_java_options)
config_java(options=self.java_options, verbose=verbose) # Configure java.
stdout, _stderr = java(cmd,classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE)
stdout = stdout.decode(encoding)
config_java(options=default_options, verbose=verbose) # Return java configurations to their default values.
return stdout
示例12: call_mallet
def call_mallet(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=True):
"""
Call `nltk.internals.java` with the given command, and with the classpath
modified to include both ``nltk.jar`` and all the ``.jar`` files defined by
Mallet.
See `nltk.internals.java` for parameter and return value descriptions.
"""
if _mallet_classpath is None:
config_mallet()
# Set up the classpath
if classpath is None:
classpath = _mallet_classpath
else:
classpath += os.path.pathsep + _mallet_classpath
# Delegate to java()
return java(cmd, classpath, stdin, stdout, stderr, blocking)
示例13: _classify_using_weka
def _classify_using_weka(self, test_comments, feature_extractor):
test_set = nltk.classify.util.apply_features(feature_extractor.extract, test_comments)
temp_dir = tempfile.mkdtemp()
self.test_filename = os.path.join(temp_dir, 'test.arff')
logger.info('Writing Test WEKA File: ' + self.test_filename)
self._write_ARFF_file(self.test_filename, test_set)
cmd = [self.javaclass, '-t', self.train_filename, '-T', self.test_filename] + ['-p', '0']
logger.info('Executing WEKA: ' + str(cmd))
config_java(options='-Xmx2000M')
(stdout, stderr) = java(cmd, classpath=weka_classpath,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return self.parse_weka_output(stdout.split('\n'))
示例14: _batch_classify
def _batch_classify(self, featuresets, options):
# Make sure we can find java & weka.
config_weka()
temp_dir = tempfile.mkdtemp()
try:
# Write the test data file.
test_filename = os.path.join(temp_dir, 'test.arff')
self._formatter.write(test_filename, featuresets)
# Call weka to classify the data.
cmd = ['weka.classifiers.bayes.NaiveBayes',
'-l', self._model, '-T', test_filename] + options
(stdout, stderr) = java(cmd, classpath=_weka_classpath,
stdout=subprocess.PIPE)
# Parse weka's output.
return self.parse_weka_output(stdout.split('\n'))
finally:
for f in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f))
os.rmdir(temp_dir)
示例15: start
def start(self):
import requests
cmd = ['edu.stanford.nlp.pipeline.StanfordCoreNLPServer']
if self.corenlp_options:
cmd.extend(self.corenlp_options)
# Configure java.
default_options = ' '.join(_java_options)
config_java(options=self.java_options, verbose=self.verbose)
try:
# TODO: it's probably a bad idea to pipe stdout, as it will
# accumulate when lots of text is being parsed.
self.popen = java(
cmd,
classpath=self._classpath,
blocking=False,
stdout='pipe',
stderr='pipe',
)
finally:
# Return java configurations to their default values.
config_java(options=default_options, verbose=self.verbose)
# Check that the server is istill running.
returncode = self.popen.poll()
if returncode is not None:
_, stderrdata = self.popen.communicate()
raise CoreNLPServerError(
returncode,
'Could not start the server. '
'The error was: {}'.format(stderrdata.decode('ascii'))
)
for i in range(30):
try:
response = requests.get(requests.compat.urljoin(self.url, 'live'))
except requests.exceptions.ConnectionError:
time.sleep(1)
else:
if response.ok:
break
else:
raise CoreNLPServerError(
'Could not connect to the server.'
)
for i in range(60):
try:
response = requests.get(requests.compat.urljoin(self.url, 'ready'))
except requests.exceptions.ConnectionError:
time.sleep(1)
else:
if response.ok:
break
else:
raise CoreNLPServerError(
'The server is not ready.'
)