本文整理汇总了Python中nltk.corpus.util.LazyCorpusLoader.xml方法的典型用法代码示例。如果您正苦于以下问题:Python LazyCorpusLoader.xml方法的具体用法?Python LazyCorpusLoader.xml怎么用?Python LazyCorpusLoader.xml使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.corpus.util.LazyCorpusLoader
的用法示例。
在下文中一共展示了LazyCorpusLoader.xml方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Driver
# 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]
# 或者: from nltk.corpus.util.LazyCorpusLoader import xml [as 别名]
#connection = pyodbc.connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=E:\\farsnettest.mdb")
#cursor = connection.cursor()
#
#cursor.execute("select * from synsets")
##for item in cursor:
# print item
corp= LazyCorpusLoader('farsnet\synset', XMLCorpusReader, r'(?!\.).*\.xml')
#print len(corp.xml())
mapExistsCont=0
itemnum=0
for item in corp.xml():
itemnum+=1
# print item.getchildren()
synsetID=item.getchildren()[0].text
#pos=item.getchildren()[1].text
pos=item.getchildren()[2].text
example=item.getchildren()[4].text
gloss=item.getchildren()[5].text
i=item.getchildren()[7].text
wordnetID = item.getchildren()[-2]
if len(wordnetID.getchildren())!=0:
mapExistsCont+=1
print wordnetID.getchildren()[0].text
示例2: LazyCorpusLoader
# 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]
# 或者: from nltk.corpus.util.LazyCorpusLoader import xml [as 别名]
test = LazyCorpusLoader(
'bamana/test', PlaintextCorpusReader, r'source.txt', word_tokenizer=orthographic_word, encoding='utf-8')
wordlist = LazyCorpusLoader(
'bamana/wordlist', PlaintextCorpusReader, r'bailleul.clean.wordlist', word_tokenizer=orthographic_word, encoding='utf-8')
properlist = LazyCorpusLoader(
'bamana/propernames', PlaintextCorpusReader, r'.*\.clean\.wordlist', word_tokenizer=orthographic_word, encoding='utf-8')
propernames = LazyCorpusLoader(
'bamana/propernames', ToolboxCorpusReader, '.*\.txt', encoding='utf-8')
bailleul = LazyCorpusLoader(
'bamana/bailleul', ToolboxCorpusReader, r'bailleul.txt', encoding='utf-8')
lexicon = ElementTree(bailleul.xml('bailleul.txt'))
for file in propernames.fileids():
for e in ElementTree(propernames.xml(file)).findall('record'):
ge = Element('ge')
ge.text = e.find('lx').text
e.append(ge)
ps = Element('ps')
ps.text = 'n.prop'
e.append(ps)
lexicon.getroot().append(e)
wl = {}
wl_detone = {}
def normalize_bailleul(word):
示例3: Driver
# 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]
# 或者: from nltk.corpus.util.LazyCorpusLoader import xml [as 别名]
connection = pyodbc.connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=E:\\farsnettest.mdb")
c = connection.cursor()
#c.execute("select number,example from shir")
corpus = LazyCorpusLoader('hamshahricorpus',XMLCorpusReader, r'(?!\.).*\.xml')
word=u'شیر'
targ = 0
c.execute("select * from shir")
for row in c:
print row
#out = codecs.open('d:\\shirham.txt','w','utf-8')
for file in corpus.fileids():
#
# #if num==1000: break
for doc in corpus.xml(file).getchildren():
#
cat=doc.getchildren()[3].text#
text=doc.getchildren()[5].text
newtext=correctPersianString(text)
allwords=text.split()
sents=newtext.split('.')
for sent in sents:
if word in sent.split():
targ+=1
# print targ
if sent:
c.execute("insert into shir(number, example) values ('pyodbc', 'awesome library')")