本文整理汇总了Python中re.L属性的典型用法代码示例。如果您正苦于以下问题:Python re.L属性的具体用法?Python re.L怎么用?Python re.L使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类re
的用法示例。
在下文中一共展示了re.L属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pynlp_build_key_word
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def pynlp_build_key_word(filename):
d={}
with open(filename, encoding="utf-8") as fp:
for line in fp:
s = line
p = re.compile(r'http?://.+$') # 正则表达式,提取URL
result = p.findall(line) # 找出所有url
if len(result):
for i in result:
s = s.replace(i, '') # 一个一个的删除
temp = pynlpir.segment(s, pos_tagging=False) # 分词
for i in temp:
if '@' in i:
temp.remove(i) # 删除分词中的名字
p = re.compile(r'\w', re.L)
result = p.sub("", i)
if not result or result == ' ': # 空字符
continue
if len(i) > 1: # 避免大量无意义的词语进入统计范围
d[i] = d.get(i, 0) + 1
kw_list = sorted(d, key=lambda x: d[x], reverse=True)
size = int(len(kw_list) * 0.2) # 取最前的30%
mood = set(kw_list[:size])
return list(mood - set(stop)- set('\u200b') - set(' ') - set('\u3000'))
示例2: loadDataSet
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def loadDataSet(path): # 返回每条微博的分词与标签
line_cut = []
label = []
with open(path, encoding="utf-8") as fp:
for line in fp:
temp = line.strip()
try:
sentence = temp[2:].lstrip() # 每条微博
label.append(int(temp[:2])) # 获取标注
word_list = []
sentence = str(sentence).replace('\u200b', '')
for word in jieba.cut(sentence.strip()):
p = re.compile(r'\w', re.L)
result = p.sub("", word)
if not result or result == ' ': # 空字符
continue
word_list.append(word)
word_list = list(set(word_list) - set(stop) - set('\u200b')
- set(' ') - set('\u3000') - set('️'))
line_cut.append(word_list)
except Exception:
continue
return line_cut, label # 返回每条微博的分词和标注
示例3: getmatch
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def getmatch(self, haystack):
if not isinstance(haystack, basestring):
return None
flags = 0
if self.flags is not None:
if "i" in self.flags or "I" in self.flags:
flags |= re.I
if "l" in self.flags or "L" in self.flags:
flags |= re.L
if "m" in self.flags or "M" in self.flags:
flags |= re.M
if "s" in self.flags or "S" in self.flags:
flags |= re.S
if "u" in self.flags or "U" in self.flags:
flags |= re.U
if "x" in self.flags or "X" in self.flags:
flags |= re.X
if re.match(self.pattern, haystack, flags=flags) is None:
return None
elif self.to is None:
return Match(haystack, haystack)
else:
return Match(haystack, re.sub(self.pattern, self.to, haystack, flags=flags))
示例4: loadDataSet
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def loadDataSet(path): # 返回每条微博的分词与标签
line_cut = []
label = []
with open(path, encoding="utf-8") as fp:
for line in fp:
temp = line.strip()
try:
sentence = temp[2:].lstrip() # 每条微博
label.append(int(temp[:2])) # 获取标注
word_list = []
sentence = str(sentence).replace('\u200b', '')
for word in jieba.cut(sentence.strip()):
p = re.compile(b'\w', re.L)
result = p.sub(b"", bytes(word, encoding="utf-8")).decode("utf-8")
if not result or result == ' ': # 空字符
continue
word_list.append(word)
word_list = list(set(word_list) - set(stop) - set('\u200b')
- set(' ') - set('\u3000') - set('️'))
line_cut.append(word_list)
except Exception:
continue
return line_cut, label # 返回每条微博的分词和标注
示例5: preprocessing
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def preprocessing(content):
remove_punc = ('。 ; 。 、 」 「 , ( ) —').split(' ')
## preprocessing #1 : remove XXenglishXX and numbers
preprocessing_1 = re.compile(r'\d*',re.L) ## only substitute numbers
#preprocessing_1 = re.compile(r'\w*',re.L) ## substitute number & English
content = preprocessing_1.sub("",content)
## preprocessing #2 : remove punctuation
preprocessing_2 = re.compile('[%s]' % re.escape(string.punctuation))
content = preprocessing_2.sub("",content)
## preprocessing #3 : remove Chinese punctuation and multiple whitspaces
content = content.replace('\n','')
for punc in remove_punc:
content = content.replace(punc,'')
try:
content = parsing.strip_multiple_whitespaces(content)
except:
print 'Warning : failed to strip whitespaces @ '
return content
示例6: _branch_flags
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def _branch_flags(flags):
flagsbyte = 0
for flag in flags:
if flag == "i":
flagsbyte += re.I
elif flag == "L":
flagsbyte += re.L
elif flag == "m":
flagsbyte += re.M
elif flag == "s":
flagsbyte += re.S
elif flag == "u":
flagsbyte += re.U
elif flag == "x":
flagsbyte += re.X
return flagsbyte
示例7: build_key_word
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def build_key_word(path): # 通过词频产生key word
d = {}
with open(path, encoding="utf-8") as fp:
for line in fp:
for word in jieba.cut(line.strip()):
p = re.compile(r'\w', re.L)
result = p.sub("", word)
if not result or result == ' ': # 空字符
continue
if len(word) > 1: # 避免大量无意义的词语进入统计范围
d[word] = d.get(word, 0) + 1
kw_list = sorted(d, key=lambda x: d[x], reverse=True)
size = int(len(kw_list) * 0.15) # 取最前的30%
mood = set(kw_list[:size])
return list(mood - set(stop))
示例8: get_word_feature
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def get_word_feature(sentence):
wordlist = []
sentence = str(sentence).replace('\u200b', '')
for word in jieba.cut(sentence.strip()):
p = re.compile(r'\w', re.L)
result = p.sub("", word)
if not result or result == ' ': # 空字符
continue
wordlist.append(word)
return list(set(wordlist) - set(stop) - set(' '))
示例9: build_key_word
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def build_key_word(path): # 通过词频产生特征
d = {}
with open(path, encoding="utf-8") as fp:
for line in fp:
for word in jieba.cut(line.strip()):
p = re.compile(r'\w', re.L)
result = p.sub("", word)
if not result or result == ' ': # 空字符
continue
if len(word) > 1: # 避免大量无意义的词语进入统计范围
d[word] = d.get(word, 0) + 1
kw_list = sorted(d, key=lambda x: d[x], reverse=True)
size = int(len(kw_list) * 0.2) # 取最前的30%
mood = set(kw_list[:size])
return list(mood - set(stop))
示例10: loadDataSet
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def loadDataSet(path): # 返回每条微博的分词与标签
line_cut = []
label = []
with open(path, encoding="utf-8") as fp:
for line in fp:
temp = line.strip()
try:
sentence = temp[2:].lstrip() # 每条微博
first_label = int(temp[:2])
if first_label == 3:
continue
label.append(first_label) # 获取标注
word_list = []
sentence = str(sentence).replace('\u200b', '')
for word in jieba.cut(sentence.strip()):
p = re.compile(r'\w', re.L)
result = p.sub("", word)
if not result or result == ' ': # 空字符
continue
word_list.append(word)
word_list = list(set(word_list) - set(stop) - set('\u200b')
- set(' ') - set('\u3000') - set('️'))
line_cut.append(word_list)
except Exception:
continue
return line_cut, label # 返回每条微博的分词和标注
示例11: build_key_word
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def build_key_word(path): # 通过词频产生特征
d = {}
with open(path, encoding="utf-8") as fp:
for line in fp:
for word in jieba.cut(line.strip()):
p = re.compile(b'\w', re.L)
result = p.sub(b"", bytes(word, encoding="utf-8")).decode("utf-8")
if not result or result == ' ': # 空字符
continue
if len(word) > 1: # 避免大量无意义的词语进入统计范围
d[word] = d.get(word, 0) + 1
kw_list = sorted(d, key=lambda x: d[x], reverse=True)
size = int(len(kw_list) * 0.2) # 取最前的30%
mood = set(kw_list[:size])
return list(mood - set(stop))
示例12: test_constants
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def test_constants(self):
self.assertEqual(re.I, re.IGNORECASE)
self.assertEqual(re.L, re.LOCALE)
self.assertEqual(re.M, re.MULTILINE)
self.assertEqual(re.S, re.DOTALL)
self.assertEqual(re.X, re.VERBOSE)
示例13: test_flags
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def test_flags(self):
for flag in [re.I, re.M, re.X, re.S, re.L]:
self.assertNotEqual(re.compile('^pattern$', flag), None)
示例14: testParseErrors
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def testParseErrors(self):
self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, 'a', re.I)
self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, 'a', re.U)
self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, 'a', re.L)
示例15: test_flags
# 需要导入模块: import re [as 别名]
# 或者: from re import L [as 别名]
def test_flags(self):
for flag in [re.I, re.M, re.X, re.S, re.L]:
self.assertTrue(re.compile('^pattern$', flag))