本文整理汇总了Python中lexicon.Lexicon类的典型用法代码示例。如果您正苦于以下问题:Python Lexicon类的具体用法?Python Lexicon怎么用?Python Lexicon使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Lexicon类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dir_only_shows_real_keys
def dir_only_shows_real_keys(self):
"dir() only shows real keys-as-attrs, not aliases"
a = Lexicon({'key1': 'val1', 'key2': 'val2'})
a.alias('myalias', 'key1')
assert 'key1' in dir(a)
assert 'key2' in dir(a)
assert 'myalias' not in dir(a)
示例2: getLexicons
def getLexicons(self, l_type='Senti'):
lexicon = Lexicon()
if l_type == 'Senti':
lexicon.getLexiconsFromSentiment()
elif l_type == 'PN':
lexicon.getLexiconsFromPN()
self.lexicons = lexicon.lexicons
示例3: Collection
class Collection(object):
def __init__(self):
self.tasks = Lexicon()
self.default = None
def add_task(self, name, task, aliases=(), default=False):
"""
Adds callable object ``task`` to this collection under name ``name``.
If ``aliases`` is given, will be used to set up additional aliases for
this task.
``default`` may be set to ``True`` to set the task as this collection's
default invocation.
"""
self.tasks[name] = task
for alias in aliases:
self.tasks.alias(alias, to=name)
if default:
if self.default:
msg = "'%s' cannot be the default because '%s' already is!"
raise ValueError(msg % (name, self.default))
self.default = name
def __getitem__(self, name=None):
"""
Returns task named ``name``. Honors aliases.
If this collection has a default task, it is returned when ``name`` is
empty or ``None``. If empty input is given and no task has been
selected as the default, ValueError will be raised.
"""
if not name:
if self.default:
return self[self.default]
else:
raise ValueError("This collection has no default task.")
return self.tasks[name]
def to_contexts(self):
"""
Returns all contained tasks and subtasks as a list of parser contexts.
"""
result = []
for name, task in self.tasks.iteritems():
context = Context(name=name, aliases=task.aliases)
argspec = task.argspec
for name, default in argspec.iteritems():
# Handle arg options
opts = {}
if default is not None:
opts['kind'] = type(default)
# Handle aliases (auto shortflags, etc)
names = [name]
names.extend(argspec.aliases_of(name))
# Create/add the argument
context.add_arg(names=names, **opts)
result.append(context)
return result
示例4: InvertedIndex
class InvertedIndex():
def __init__(self):
self.invertedindex = {}
self.lexicon = Lexicon()
self.tokenizer = Tokenizer()
self.doc_reader = DocReader()
self.build_index()
def build_index(self):
#comments?
cache = self.doc_reader.get_cache()
docs = self.doc_reader.read_docs(cache)
print "\nINVERTEDINDEX : Indexing %d documents..\n" % len(docs)
for d in range(len(docs)):
print "Indexing document '%s'" % (settings.PATH_DOCS + str(d))
self.add_document(docs[d], d)
print "Indexed total %d unique terms" % self.lexicon.size()
def get_postinglist(self, lex_id):
return self.invertedindex[lex_id]
def add_document(self, doc, document_id):
"""FIXME:
-Needs doc
-Too slow?
-Remove stop words
-Reduce number of tokens
"""
tokens = self.tokenizer.tokenize(doc)
for t in tokens:
lex_id = self.lexicon.lookup(t.get_value())
if(lex_id == settings.INVALID):
lex_id = self.lexicon.add_value(t.get_value())
pl = PostingList()
pl.append_posting(Posting(document_id, t.get_position()))
self.invertedindex[lex_id] = pl
else:
pl = self.get_postinglist(lex_id)
if pl.get_last_posting().get_document_id() != document_id:
pl.append_posting(Posting(document_id, t.get_position()))
else:
p = pl.get_last_posting()
p.append_position(t.get_position())
def size(self):
return len(self.invertedindex)
def debugprint(self):
voc = self.lexicon.get_vocabulary()
for v in voc:
lid = self.lexicon.lookup(v)
pl = self.get_postinglist(lid)
print "[%s]" % v
pl.info()
示例5: create_lexicon
def create_lexicon():
lexicon = Lexicon()
words_file = open(ENGLISH_WORDS, 'r')
print '=== CREATING LEXICON ==='
print '=== READING WORDS FILE ==='
for word in words_file:
lexicon.add_word(word)
print '=== LEXICON CREATED ==='
return lexicon
示例6: get_ccg_lexicon
def get_ccg_lexicon():
lexicon = Lexicon()
filename = os.path.join(DB_DIR, 'lexicon.txt')
entries = []
with open(filename) as f:
for line in f:
x, y = line.strip().split(' :- NP : ')
y = y.replace(':', ':_')
entries.append((x, y))
lexicon.add_entries(entries)
return lexicon
示例7: make_lexicon
def make_lexicon(self):
lexicon = Lexicon()
the = Lexical("ART", {"ROOT": "?the", "AGR": "?v"})
the.set_variable("?the", ["the"])
the.set_variable("?v", ["3s", "3p"])
lexicon.add_word("the", [the])
dog = Lexical("N", {"ROOT": "?dog1", "AGR": "?3s"})
dog.set_variable("?3s", ["3s"])
dog.set_variable("?dog1", ["DOG1"])
lexicon.add_word("dog", [dog])
return lexicon
示例8: __init__
def __init__(self, code = "zxx"):
"""
Create a lect object.
A I{lect} is language variety; it can either be a spoken or a written form, and a colloquial, mediatic or standard form, and so on.
It wraps serialization and high-level features.
It contains three independent internal members:
- L{lexicon<lexicon>}
- L{grammar<grammar>}
- L{inflections<inflection>}
@type code: str
@param code:
A language code according to U{ISO<http://www.iso.org>} standard.
For the language codes, refer to 639-3 specifications.
A country/variety code and a representation system might be added: C{eng-US}, C{esp:ERG}, C{por-BR:IPA}
"""
self.code = code
self.name = u""
self.english_name = ""
self.__p_o_s = ()
self.__lemma_categories = {}
self.__categories = {}
self.grammar = Grammar(code)
self.lexicon = Lexicon()
self.inflections = Inflections()
self.properties = {"separator" : " ", "capitalization" : "3"} #Lexical and Initials
示例9: read_Tischendorf_WH_Matthew_compare_them
def read_Tischendorf_WH_Matthew_compare_them():
lexicon = Lexicon()
tischrd = read_AccentedTischendorf_MT()
ma = ManualAnalyses("./manual_analyses.txt")
#whrd = read_WH_writeMQL()
whrd = read_WH_MT();
#trstephrd = read_Stephanus()
#byzrd = read_Byzantine()
#lexicon = byzrd.produceLexicon(lexicon)
#lexicon = trstephrd.produceLexicon(lexicon)
whrd.compareTischendorf(tischrd, lexicon, ma)
tischrd.applyMappings()
tischrd.writeBooks_MORPH_style(tisch_out_basedir, "TSP", kind.kBETA)
lexicon = whrd.lexicon
lexicon.writeLexicon("lexicon_nonunique.txt", False)
tischlexicon = Lexicon()
tischrd.produceLexicon(tischlexicon).writeLexicon("tischlexicon_nonunique.txt", False)
return tischrd
示例10: __init__
def __init__(self, name=None, aliases=(), args=()):
"""
Create a new ``Context`` named ``name``, with ``aliases``.
``name`` is optional, and should be a string if given. It's used to
tell Context objects apart, and for use in a Parser when determining
what chunk of input might belong to a given Context.
``aliases`` is also optional and should be an iterable containing
strings. Parsing will honor any aliases when trying to "find" a given
context in its input.
May give one or more ``args``, which is a quick alternative to calling
``for arg in args: self.add_arg(arg)`` after initialization.
"""
self.args = Lexicon()
self.flags = Lexicon()
self.name = name
self.aliases = aliases
for arg in args:
self.add_arg(arg)
示例11: __init__
def __init__(self, cfg):
self.cfg = cfg
self.lang = self.cfg.get("deps", "lang")
self.out_fn = self.cfg.get("machine", "definitions_binary_out")
ensure_dir(os.path.dirname(self.out_fn))
self.dependency_processor = DependencyProcessor(self.cfg)
dep_map_fn = cfg.get("deps", "dep_map")
self.read_dep_map(dep_map_fn)
self.undefined = set()
self.lemmatizer = Lemmatizer(cfg)
self.lexicon_fn = self.cfg.get("machine", "definitions_binary")
self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
self.word2lemma = {}
示例12: Collection
class Collection(object):
def __init__(self):
self.tasks = Lexicon()
self.default = None
def add_task(self, name, task, aliases=(), default=False):
"""
Adds callable object ``task`` to this collection under name ``name``.
If ``aliases`` is given, will be used to set up additional aliases for
this task.
``default`` may be set to ``True`` to set the task as this collection's
default invocation.
"""
self.tasks[name] = task
for alias in aliases:
self.tasks.alias(alias, to=name)
if default:
if self.default:
msg = "'%s' cannot be the default because '%s' already is!"
raise ValueError(msg % (name, self.default))
self.default = name
def get(self, name=None):
"""
Returns task named ``name``. Honors aliases.
If this collection has a default task, it is returned when ``name`` is
empty or ``None``. If empty input is given and no task has been
selected as the default, ValueError will be raised.
"""
if not name:
if self.default:
return self.get(self.default)
else:
raise ValueError("This collection has no default task.")
return self.tasks[name]
示例13: Parser
class Parser(object):
def __init__(self, contexts=(), initial=None):
self.initial = initial
self.contexts = Lexicon()
for context in contexts:
debug("Adding %s" % context)
if not context.name:
raise ValueError("Non-initial contexts must have names.")
exists = "A context named/aliased %r is already in this parser!"
if context.name in self.contexts:
raise ValueError(exists % context.name)
self.contexts[context.name] = context
for alias in context.aliases:
if alias in self.contexts:
raise ValueError(exists % alias)
self.contexts.alias(alias, to=context.name)
def parse_argv(self, argv):
"""
Parse an argv-style token list ``argv``.
Returns a list of ``Context`` objects matching the order they were
found in the ``argv`` and containing ``Argument`` objects with updated
values based on any flags given.
Assumes any program name has already been stripped out. Good::
Parser(...).parse_argv(['--core-opt', 'task', '--task-opt'])
Bad::
Parser(...).parse_argv(['invoke', '--core-opt', ...])
"""
machine = ParseMachine(initial=self.initial, contexts=self.contexts)
for token in argv:
machine.handle(token)
machine.finish()
return machine.result
示例14: __init__
def __init__(self, contexts=(), initial=None):
self.initial = initial
self.contexts = Lexicon()
for context in contexts:
debug("Adding %s" % context)
if not context.name:
raise ValueError("Non-initial contexts must have names.")
exists = "A context named/aliased %r is already in this parser!"
if context.name in self.contexts:
raise ValueError(exists % context.name)
self.contexts[context.name] = context
for alias in context.aliases:
if alias in self.contexts:
raise ValueError(exists % alias)
self.contexts.alias(alias, to=context.name)
示例15: __init__
def __init__(self, contexts=(), initial=None, ignore_unknown=False):
self.initial = initial
self.contexts = Lexicon()
self.ignore_unknown = ignore_unknown
for context in contexts:
debug("Adding {0}".format(context))
if not context.name:
raise ValueError("Non-initial contexts must have names.")
exists = "A context named/aliased {0!r} is already in this parser!"
if context.name in self.contexts:
raise ValueError(exists.format(context.name))
self.contexts[context.name] = context
for alias in context.aliases:
if alias in self.contexts:
raise ValueError(exists.format(alias))
self.contexts.alias(alias, to=context.name)