本文整理汇总了Python中pymorphy.get_morph函数的典型用法代码示例。如果您正苦于以下问题:Python get_morph函数的具体用法?Python get_morph怎么用?Python get_morph使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_morph函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: formalize
def formalize(filename, morph_dict=DICTS_DIR, stop_dict=stop_dict_path):
morph = get_morph(morph_dict)
stop_words = get_stop_words()
dict = {}
words = 0.0
try:
f = open(filename)
except IOError:
raise
for line in f.readlines():
for word in line.split():
word = word.decode("utf-8", 'ignore')
word = word.strip(u'[,.:;\"\')$«»(?<>!-_—//=]\n\t')
word = word.replace('.', '_')
word = morph.normalize(word.upper())
if isinstance(word, set):
word = word.pop()
else:
continue
word = word.lower()
words += 1
if word in stop_words or not word:
continue
if not word in dict:
dict[word] = 1.0
else:
dict[word] += 1.0
for key in dict:
dict[key] /= words
return dict
示例2: post
def post(self, request):
morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
last_name = self.DATA['last_name']
first_name = self.DATA['first_name']
patronymic = self.DATA['patronymic']
# для склонения фамилии надо определить пол
try:
sex = morph.get_graminfo(first_name.upper())[0]['info'].split(',', 1)[0]
except IndexError:
# get_graminfo() вернул []
# print 'get_graminfo failed on ', first_name
sex = u'жр'
# фамилия
last_name_inflated = firstcaps(lastnames_ru.inflect(morph,
last_name.upper(), sex + u',дт'))
# имя
first_name_inflated = firstcaps(morph.inflect_ru(first_name.upper(), u'дт'))
# отчество
patronymic_inflated = firstcaps(morph.inflect_ru(patronymic.upper(), sex + u',дт'))
return {
'last_name': last_name_inflated,
'first_name': first_name_inflated,
'patronymic': patronymic_inflated,
'user': self.get_user(),
}
示例3: __init__
def __init__(self, corpus=None):
"""Initialize your data structures in the constructor."""
self.unigramCounts = collections.defaultdict(lambda: 0)
self.bigramCounts = collections.defaultdict(lambda: 0)
self.V = 0
self.morph = get_morph(DICTS_DIR)
if corpus:
self.train(corpus)
示例4: __init__
def __init__(self, page, working_list, mutex, dbname = 'crawler_db'):
Db_manager.__init__(self, dbname)
threading.Thread.__init__(self)
self.working_list = working_list
self.page = page
self.mutex = mutex
self.morph = get_morph('dicts')
with self.mutex:
working_list.append(page)
示例5: download_morph
def download_morph():
# скачиваем и используем словарь для получения грамматической информации о слове (часть речи)
path_to_dictionary = os.path.realpath(os.path.curdir)
morph_path = join(path_to_dictionary, 'morph_dicts')
if not os.path.exists(morph_path):
subprocess.call(['wget', 'https://bitbucket.org/kmike/pymorphy/downloads/ru.sqlite-json.zip'])
subprocess.call(['unzip', 'ru.sqlite-json.zip', '-d', 'morph_dicts'])
morph = get_morph(morph_path)
return morph
示例6: get
def get(self, request, *args, **kwargs):
params = request.GET
COUNT_ELEMENTS = 5
errors = []
limit = COUNT_ELEMENTS
offset = 0
form = forms.SearchForm(params)
if form.is_valid():
#pointsreq = MainModels.Person.objects;
name = form.cleaned_data.get("s")
users_list = []
morph = get_morph('/home/tenoclock/yasenput/dicts')
if name:
#pointsreq = MainModels.Person.search.query(params.get("s"))
#search = SphinxSearch()
search = SphinxQuerySet(index="auth_user")
name_morph = morph.normalize(name.upper())
file1 = open('file1.txt','w')
file1.write(str(list(name_morph)))
file1.close()
phrase_list = name.split(' ')
for phrase in phrase_list:
if phrase != '':
name_morph = morph.normalize(phrase.upper())
for name_m in name_morph:
search_query = search.query(name_m)
for splited_item in search_query:
if not MainModels.Person.objects.get(id = splited_item['id']) in users_list:
users_list.append(MainModels.Person.objects.get(id = splited_item['id']))
content = form.cleaned_data.get("content")
if content == 'new':
pointsreq = pointsreq.order_by('-id')
elif content == "popular":
pointsreq = pointsreq.annotate(usfiliwers=Count('followers__id')).order_by('-usfiliwers', '-id')
else:
pointsreq = users_list
points = users_list[offset:limit]
YpJson = YpSerialiser()
return HttpResponse(YpJson.serialize(points, fields=("username", "first_name", "last_name")),
mimetype="application/json")
else:
e = form.errors
for er in e:
errors.append(er +':'+e[er][0])
return JsonHTTPResponse({"status": 0, "txt": ", ".join(errors)});
示例7: __init__
def __init__(self):
super(NumWordRU,self).__init__()
# initializing morphology module for inflecting
from pymorphy import get_morph
import ConfigParser
config = ConfigParser.RawConfigParser()
config.read('/home/soshial/text-normalization/normalization.cfg')
dicts_folder = config.get('lms','dicts')
import os
if not os.path.exists(dicts_folder): quit('Please put existing dictionaries into "'+dicts_folder+'" folder!')
self.morph = get_morph(dicts_folder)
self.inflection_case = u"им" # todo add gender for the ending of numeral ('жр')
示例8: main
def main():
(options, args) = parser.parse_args()
if not options.word or not options.dict:
print 'inflect -h for help.'
return
morph = get_morph(options.dict)
word = options.word.decode(chardet.detect(options.word)['encoding']).upper()
word = unicode(word)
a = morph.inflect_ru(word, u'пр', u'С')
print a.encode('utf8')
示例9: misc_utilites
def misc_utilites():
morpher = get_morph('static/res/pymorphy/')
def pluralize(number, word):
return morpher.pluralize_inflected_ru(word.upper(), number).lower()
def is_logged_in():
try:
int(session['logged'])
return True
except (ValueError, KeyError):
return False
return {'pluralize': pluralize, 'is_logged_in': is_logged_in }
示例10: handle
def handle(self, *args, **options):
morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
self.dialog = Dialog()
listeners = Listener.objects.filter(first_name__exact=u'')
total = listeners.count()
index = 0
self.dialog.gauge_start()
for listener in listeners:
listener.normalize_name(morph)
text = u'Склонение: %s %s %s' % (listener.last_name, listener.first_name, listener.patronymic)
self.dialog.gauge_update(int(float(index)/total*100),
text=text.encode('utf-8'),
update_text=True)
index += 1
self.dialog.gauge_stop()
示例11: initializeResources
def initializeResources(self):
"""Pre-initialization"""
self.animationTimer = ()
self.progressTimer = ()
self.grid_layout =()
"""Initialize Options"""
self.options = Options()
"""Initialize Statistics"""
self.stats = Stats()
"""Config Here"""
self.initializeComposition()
self.initializeComponents()
self.setMenus()
self.trayIcon.show()
#self.startTrayLoading()
""""Initialize Dictionaries (will take a some time!)"""
time_start = datetime.now()
self.dict = EdictParser()
self.dict.loadDict()
self.morphy = get_morph(PATH_TO_RES + DICT_EN)
self.trayIcon.showMessage('Loading...', 'Initializing dictionaries', QSystemTrayIcon.MessageIcon.Information, 20000 ) #TODO: change into loading dialog... or not
"""Initializing srs system"""
self.trayIcon.showMessage('Loading...', 'Initializing databases', QSystemTrayIcon.MessageIcon.Information, 20000 )
self.srs = srsScheduler()
self.srs.initializeAll()
self.srs.initializeCurrentSession(self.options.getSessionSize())
"""Global hotkeys hook"""
#TODO: add multiple hotkeys and fix stop()
#self.hooker = GlobalHotkeyManager(toggleQDictFlag, 'Q')
# self.hooker = GlobalHotkeyManager(toggleWidgetFlag(self.qdict), 'Q')
# self.hooker.setDaemon(True) #temporarily, should work using stop()
# self.hooker.start()
time_end = datetime.now()
self.loadingTime = time_end - time_start
示例12: create_triads
def create_triads(path_item, path_rel, path_attr):
dicts = "c:\\Python27\\Lib\\site-packages\\pymorphy\\ru.sqlite-json\\"
morph = get_morph(dicts)
# read items
with open(path_item) as f:
items = f.readlines()
# read relations
with open(path_rel) as f:
relations = f.readlines()
# read attributes
with open(path_attr) as f:
attributes = f.readlines()
# split attributes according to different parts of speech
attrsN, attrsV, attrsAdj, attrsIs = [[],[],[],[]]
for at in attributes:
if 'N' in at: attrsN.append(re.split(',', at)[0].decode('cp1251').lower())
if 'V' in at: attrsV.append(re.split(',', at)[0].decode('cp1251').lower())
if 'Adj' in at: attrsAdj.append(re.split(',', at)[0].decode('cp1251').lower())
if 'Is' in at: attrsIs.append(re.split(',', at)[0].decode('cp1251').lower())
# assemble triads
triads = []
for it in items:
it = it.replace('\n', '').decode('cp1251')
for rel in relations:
rel = rel.replace('\n', '').decode('cp1251')
if rel == u'может':
for attr in attrsV: triads.append([it, rel, attr])
if rel == u'имеет':
for attr in attrsN: triads.append([it, rel, attr])
if rel == u'является':
for attr in attrsIs: triads.append([it, rel, attr])
if u'как' in rel:
for attr in attrsAdj: triads.append([it, '', attr])
# test
for triad in triads:
print triad[0] + ', ' + triad[1] + ', ' + triad[2]
return triads
示例13: get_words
def get_words(file_name, index):
morph = get_morph('')
print "Getting words from " + file_name + "..."
words = []
pattern = re.compile("(([\w]+[-'])*[\w']+'?)", re.U)
# try:
f = open(file_name, 'r')
file_text = f.read()
f.close()
file_text = unicode(file_text, 'utf8').upper()
file_text = file_text.replace('--', ' -- ')
tokens = file_text.split()
previous_percentage = -1
for idx, token in enumerate(tokens):
m = pattern.match(token)
if m:
word = m.group()
info = morph.get_graminfo(word)
if len(info) < 2:
continue
if not info[0]['class'] in [u"П", u"С", u"Г"]:
continue
norm = info[0]['norm']
words.append(norm)
if norm in index:
index[norm] += 1
else:
index[norm] = 1
percentage = 100 * idx / len(tokens)
if percentage != previous_percentage and percentage % 5 == 0:
print "Getting words: " + str(percentage) + "% done"
previous_percentage = percentage
# except:
# print "error occured"
return words
示例14: get_morph
# coding=utf-8
from pymorphy import get_morph
morph = get_morph('/home/ilya/github/ru.sqlite-json') #dict path
ins = open("adjective_opinion_words.txt", "r")
array = []
for line in ins:
ind = line.index(' ')
if ind!=-1:
line = line[0:ind]
array.append(line)
ins.close()
file = open("pyDict", "w")
for i in range(len(array)):
word = array[i]
word = word.decode("utf-8").upper()
info1= morph.inflect_ru(unicode(word), u'мр')
info2 = morph.inflect_ru(unicode(word), u'жр')
info3 = morph.inflect_ru(unicode(word), u'ср')
res = word.lower().encode("utf-8")+" "+info1.lower().encode("utf-8")+" "+info2.lower().encode("utf-8")+" "+info3.lower().encode("utf-8")
# print res
file.write(res+"\n")
示例15: get_morph
(u'SOCIAL', u'ACTIVITY'),
(u'CURRENT', u'FRESHMAN'),
(u'CURRENT', u'SOPHOMORE'),
(u'FOUR-YEAR', u'UNIVERSITY'),
(u'ACADEMIC', u'RECORD'),
(u'DEMONSTRATE', u'PASSION'),
(u'HIGH', u'STUDENT'),
(u'POTENTIAL', u'STUDENT'),
(u'EXCITING', u'PROGRAM'),
(u'FAST-PACED', u'PROGRAM'),
(u'INTERACTIVE', u'COURCE'),
(u'FORMER', u'CAMPER'),
(u'MANY', u'INFORMATION')
]
morph = get_morph("../dicts/en")
def get_grammars_precision_and_recall(grammar, dir_path):
retrieved = 0.0
relevant = 0.0
for root, dirs, files in os.walk(dir_path):
for file_name in files:
path = os.path.join(root, file_name)
for result in data_gathering_iterator(path, morph, grammar):
for subresult in result:
if subresult in normilized_right_result_list:
relevant += 1.0
retrieved += 1.0
return relevant / retrieved, relevant / len(normilized_right_result_list)