本文整理汇总了Python中pinyin.get方法的典型用法代码示例。如果您正苦于以下问题:Python pinyin.get方法的具体用法?Python pinyin.get怎么用?Python pinyin.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pinyin
的用法示例。
在下文中一共展示了pinyin.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_item
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def process_item(self, item, spider):
if spider.name not in ['meituan']:
return item
if self.filter_dic.get(item['restaurant_name']) == item['address']:
print(item['restaurant_name'])
print(item['address'])
raise DropItem("Duplicate item found: %s" % item)
else:
self.filter_dic[item['restaurant_name']] = item['address']
try:
item['lng'], item['lat'] = gaode_to_baidu(float(item['lng']), float(item['lat']))
item['province_code'] = pinyin.get(item['province'])
item['city_code'] = pinyin.get(item['city'])
item['region_code'] = pinyin.get(item['region'])
item['area_code'] = pinyin.get(item['area'])
except BaseException as e:
print(e)
return item
示例2: segment_lyric_convert_pinyin_mir1k
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def segment_lyric_convert_pinyin_mir1k():
openCC = OpenCC('tw2s')
folder_lyrics_mir1k = os.path.join(mir1k_root, 'Lyrics')
filenames_lyrics_mir1k = list(set(get_filenames_in_folder(folder_lyrics_mir1k)))
for fn in filenames_lyrics_mir1k:
fn_txt = os.path.join(folder_lyrics_mir1k, fn+'.txt')
try:
list_line = read_mir1k_lyrics(fn_txt)
line_simplified = openCC.convert(list_line[0])
line_pinyin = pinyin.get(line_simplified, format='strip', delimiter=' ')
line_char = ' '.join(fool.cut(line_simplified)[0])
write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_char.txt'),
line=line_char)
write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_pinyin.txt'),
line=line_pinyin)
except UnicodeDecodeError:
print(fn)
示例3: pinyin
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def pinyin(value):
import pinyin
result = pinyin.get(value, delimiter=' ')
patterns_replacements = (
(r'\{.*( ).*\}', ''),
(r' +', ' ',),
(r'\( ', '(',),
(r' \) ', ')'),
(r'\[ ', '[',),
(r' \] ', ']'),
)
for pattern, replacement in patterns_replacements:
result = re.sub(pattern, replacement, result)
return result
示例4: request
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def request(cls, url: str, data: Dict[str, Any], method: str = "POST") -> Dict[str, Any]:
results = {}
status = requests.codes.ok
text = ""
try:
if method == "GET":
resp = cls.session.get(url, params=data, timeout=20)
else:
resp = cls.session.post(url, data=data, timeout=20)
results = resp.json()
text = resp.text
status = resp.status_code
except Exception as e:
results = {}
logger.debug(e)
if status != requests.codes.ok or not text:
results = {}
return results
示例5: test
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def test(model, step_num, loss):
model.eval()
text = "相对论直接和间接的催生了量子力学的诞生 也为研究微观世界的高速运动确立了全新的数学模型"
text = pinyin.get(text, format="numerical", delimiter=" ")
sequence = np.array(text_to_sequence(text))[None, :]
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().long()
with torch.no_grad():
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
plot_data((mel_outputs.float().data.cpu().numpy()[0],
mel_outputs_postnet.float().data.cpu().numpy()[0],
alignments.float().data.cpu().numpy()[0].T))
title = 'step={0}, loss={1:.5f}'.format(step_num, loss)
plt.title(title)
filename = 'images/temp.jpg'
plt.savefig(filename)
img = cv.imread(filename)
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
img = img / 255.
return img
示例6: parse_start_url
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def parse_start_url(self, response):
js = response.xpath('//script[@type="commment"]').extract()[0]
data = re.search('\{[\s\S]*\}', js).group(0)
data = json.loads(data)
region_list = data['BizAreaList']
category_list = data['CateList'][0]['subCategories']
for category in category_list:
if category['name'] == u'全部':
continue
for region in region_list:
if region['name'] == u'全城':
continue
for area in region['subareas']:
if area['name'] == u'全部':
continue
item = MeituanItem()
item['province'], item['city'] = [s.split('=')[1] for s in response.xpath('//meta[@name="location"]/@content').extract()[0].split(';')]
item['region'] = region['name'].strip()
item['area'] = area['name'].strip()
item['category'] = category['name'].strip()
url = 'http://i.meituan.com/%s?cid=%d&bid=%d&cateType=poi&stid=_b1'%(pinyin.get(item['city']), category['id'], area['id'])
yield Request(url,
method='GET',
meta={'item': item, 'url': url},
headers=headers,
cookies=None,
body=None,
priority=0,
errback=None,
encoding=response.encoding,
callback=self.parse_category_area)
示例7: prepare_corpus
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def prepare_corpus(input_files):
sentences = []
cnt = 0
for input_file in input_files:
with tf.gfile.GFile(input_file, "r") as reader:
while True:
line = reader.readline()
if not line:
break
line = line.strip().split()
line_pinyin = []
for word in line:
char_cn = ch_pattern.findall(word)
if len(char_cn) >= 1:
line_pinyin.extend(pinyin.get(item, format="strip", delimiter=" "))
else:
if len(word) >= 1:
line_pinyin.extend(word.split())
if cnt <= 10:
print(line, line_pinyin)
cnt += 1
# Empty lines are used as document delimiters
if not line:
# all_documents.append([])
continue
sentences.append(line_pinyin)
return sentences
示例8: pinyin
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def pinyin():
parser = argparse.ArgumentParser()
parser.add_argument("chars", help="Input chinese words")
args = parser.parse_args()
if not args.chars:
parser.print_help()
return
print(get(u(args.chars)))
示例9: BLOBParser_human
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def BLOBParser_human(blob):
# http://mac-alias.readthedocs.io/en/latest/bookmark_fmt.html
try:
b = Bookmark.from_bytes(blob)
return "/" + u"/".join(b.get(0x1004, default=None))
except Exception as e:
print(e)
# for 10.13
示例10: convert2Pinyin
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def convert2Pinyin(filename):
# convert "你好" to " ni hao "
def c2p(matchObj):
return " " + pinyin.get(matchObj.group(), format="strip", delimiter=" ") + " "
# replace chinese character with pinyin
return re.sub(ur'[\u4e00-\u9fff]+', c2p, filename)
示例11: segmentation_conversion_helper
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def segmentation_conversion_helper(fn, list_line, sub_folder, phrase_syllable="phrase"):
list_line_char = [[line[0], line[1], ' '.join(fool.cut(line[2])[0])] for line in list_line if
len(line[2].replace(" ", "")) > 0]
list_line_pinyin = [[line[0], line[1], pinyin.get(line[2], format='strip', delimiter=' ')] for line in list_line if
len(line[2].replace(" ", "")) > 0]
write_line(filename=os.path.join(mandarin_kugou_root, sub_folder, fn + '_' + phrase_syllable + '_char.txt'),
list_line=list_line_char)
write_line(filename=os.path.join(mandarin_kugou_root, sub_folder, fn + '_' + phrase_syllable + '_pinyin.txt'),
list_line=list_line_pinyin)
示例12: pinyinfy
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def pinyinfy(name: str) -> str:
"""
Returns the pinyin of the input name.
Args:
name (str): Input name string
Returns:
str: Pinyin or english of the input name
"""
return pinyin.get(name, delimiter='', format='strip').lower()
示例13: get_lyric
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def get_lyric(idx) -> str:
row_data = {"csrf_token": "", "id": idx, "lv": -1, "tv": -1}
data = NeteaseRequest.encrypted_request(row_data)
return NeteaseRequest.request(url="https://music.163.com/weapi/song/lyric", method="POST", data=data).get("lrc", {}).get("lyric", "")
示例14: save_checkpoint
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def save_checkpoint(epoch, epochs_since_improvement, model, optimizer, loss, is_best):
state = {'epoch': epoch,
'epochs_since_improvement': epochs_since_improvement,
'loss': loss,
'model': model,
'optimizer': optimizer}
filename = 'checkpoint.tar'
torch.save(state, filename)
# If this checkpoint is the best so far, store a copy so it doesn't get overwritten by a worse checkpoint
if is_best:
torch.save(state, 'BEST_checkpoint.tar')
示例15: parse_start_url
# 需要导入模块: import pinyin [as 别名]
# 或者: from pinyin import get [as 别名]
def parse_start_url(self, response):
city_cnt = 0
big_city_list = response.xpath('//ul[@id="divArea"]/li[1]/div/a/strong/text()').extract()
big_city_code_list = response.xpath('//ul[@id="divArea"]/li[1]/div/a/@href').extract()
for index, city in enumerate(big_city_list):
item = DazhongdianpingItem()
item['province'] = ''
item['province_code'] = ''
item['city_code'] = big_city_code_list[index]
item['city'] = city
url = 'http://www.dianping.com/' + item['city_code'].strip('/') + '/food'
city_cnt += 1
print('大城市数量:\t'+str(city_cnt))
yield Request(url,
method='GET',
meta={'item': item},
headers=headers,
cookies=None,
body=None,
priority=0,
errback=None,
encoding=response.encoding,
callback=self.parse_city)
city_cnt = 0
province_list = response.xpath('//li[@class="root"]//dl[@class="terms"]').extract()
for province in province_list:
province = Selector(text=province)
city_list = province.xpath('//strong/text()').extract()
city_code_list = province.xpath('//a/@href').extract()
for index, city in enumerate(city_list):
item = DazhongdianpingItem()
item['province'] = province.xpath('//dt/text()').extract()[0]
item['province_code'] = pinyin.get(item['province']).strip()
item['city'] = city.strip()
item['city_code'] = city_code_list[index].strip('\r\n\t/ ')
url = 'http://www.dianping.com/' + item['city_code'].strip('/') + '/food'
city_cnt += 1
print('小城市数量:\t'+str(city_cnt))
yield Request(url,
method='GET',
meta={'item': item},
headers=headers,
cookies=None,
body=None,
priority=0,
errback=None,
encoding=response.encoding,
callback=self.parse_city)