本文整理汇总了Python中nltk.parse.stanford.StanfordParser方法的典型用法代码示例。如果您正苦于以下问题:Python stanford.StanfordParser方法的具体用法?Python stanford.StanfordParser怎么用?Python stanford.StanfordParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.parse.stanford
的用法示例。
在下文中一共展示了stanford.StanfordParser方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
self.datasets_path = datasets_path
self.corpus_name = corpus_name
self.corpus_path = path.join(datasets_path, corpus_name)
self.docs_path = path.join(self.corpus_path, "docs")
self.topics_file = path.join(self.corpus_path, "topics.xml")
self.models_path = path.join(self.corpus_path, "models")
self.smodels_path = path.join(self.corpus_path, "smodels")
self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars")
os.environ['CLASSPATH'] = self.jar_path
self.cleaned_path = path.join(datasets_path, "processed")
if parse_type == 'parse':
if lang == 'english':
self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
if lang == 'german':
self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
# self.cleaned_path = path.join(datasets_path, "processed.parse")
if parse_type == 'props': # TODO
if lang == 'english':
self.props_parser = ClausIE.get_instance()
if lang == 'german':
self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
示例2: __init__
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def __init__(self):
"""
Initialize the SVO Methods
"""
self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
self.adjective_types = ["JJ", "JJR", "JJS"]
self.pred_verb_phrase_siblings = None
self.parser = stanford.StanfordParser()
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
示例3: main
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def main(argv):
debug = False
try:
opts, args = getopt.getopt(argv, "hd",["help","debug"])
except getopt.GetoptError as e:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ["-h", "help"]:
usage()
sys.exit(2)
if opt in ["-d", "debug"]:
debug = True
parser = stanford.StanfordParser()
line = raw_input("Enter line: ")
while line != 'stop':
sent = list(parser.raw_parse(line))[0]
if debug:
print sent # print parse tree
if sent[0].label() == "SBARQ":
print answer(sent)
else:
try:
describe(sent)
except ValueError as e:
print "Error describing sentence. " + e
if debug:
print smap # print semantic map
line = raw_input("Enter line: ")
示例4: clear_data
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def clear_data(self):
self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
self.first_NP = ''
self.first_VP = ''
self.parse_tree = None
self.subject = RDF_Triple.RDF_SOP('subject')
self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
self.Object = RDF_Triple.RDF_SOP('object')
示例5: setup_extractor
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def setup_extractor(self):
self.splitter = PunktSentenceSplitter(self.language)
self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
java_options=' -mx2G -Djava.ext.dirs=dev/')
self.token_to_lemma = {}
for lemma, tokens in self.lemma_to_token.iteritems():
for t in tokens:
self.token_to_lemma[t] = lemma
self.all_verbs = set(self.token_to_lemma.keys())
示例6: main
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def main(corpus, verbs, processes, outfile, sub_sentences):
""" Compute the LU distribution in the corpus, i.e. how many LUs per sentence
"""
global splitter, tagger, parser, all_verbs
splitter = PunktSentenceSplitter('en')
tagger = TTPosTagger('en')
parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
java_options=' -mx1G -Djava.ext.dirs=dev/') # no way to make classpath work
all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set())
all_verbs.discard('be')
all_verbs.discard('have')
args = load_corpus(corpus, 'bio', text_only=True)
worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences
counter = defaultdict(int)
for i, counts in enumerate(parallel.map(worker, args, processes)):
for k, v in counts.iteritems():
counter[k] += v
if (i + 1) % 10000 == 0:
logger.info('Processed %d documents', i + 1)
counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k))
for k, v in counter.iteritems():
print k, v
json.dump(counter, outfile, indent=2)
示例7: __init__
# 需要导入模块: from nltk.parse import stanford [as 别名]
# 或者: from nltk.parse.stanford import StanfordParser [as 别名]
def __init__(self):
self.parser = StanfordParser()