本文整理汇总了Python中Main.clean_data方法的典型用法代码示例。如果您正苦于以下问题:Python Main.clean_data方法的具体用法?Python Main.clean_data怎么用?Python Main.clean_data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Main
的用法示例。
在下文中一共展示了Main.clean_data方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: readShopAll
# 需要导入模块: import Main [as 别名]
# 或者: from Main import clean_data [as 别名]
def readShopAll(fileShopName):
"""
使用到的变量:
gloV.communityVariableName
gloV.communityVariableNamePosition
gloV.communityDataType
存储的变量
gloV.communityVariableNameStr
gloV.communityDataAll
"""
# ---得到编码
fileShopEncoding = Main.try_encoding(fileShopName)
if fileShopEncoding == "NO_FILE.":
print('<%s> 文件无法打开. 程序退出*********' % fileShopName)
return -1
if fileShopEncoding == "FAILED.":
print("<%s> 文件编码不明. 程序退出*********" % fileShopName)
return -2
fileShop = open(fileShopName, encoding=fileShopEncoding)
# ---读取列标题
gloV.shopVariableNameStr = fileShop.readline().strip()
gloV.shopVariableName = gloV.shopVariableNameStr.split(',')
if not (gloV.shopVariableName[0] == '经度' and gloV.shopVariableName[1] == '纬度'):
print('<全国宠物店.csv> 文件前两列不是规定的 [经度, 纬度] 顺序, 程序退出******')
return -3
# ---读取全国数据
readFailed = False
shopRead = []
tmpReadLine = 'Begin'
tmpReadCount = 1
while tmpReadLine:
try:
tmpReadLine = fileShop.readline()
except Exception:
tmpReadCount += 1
print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount)
readFailed = True
continue
tmpReadCount += 1
i = tmpReadLine
# 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分
tmpRead = i.strip()
tmpCountDouHao = tmpRead.count(',')
tmpCountYinHao = tmpRead.count('"')
if tmpCountDouHao == (len(gloV.shopVariableName) - 1):
tmpReadResult = tmpRead.split(",")
shopRead.append(tmpReadResult)
elif tmpCountYinHao == 2:
# 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析
tmpReadList = tmpRead.split('"')
tmpReadHead = tmpReadList[0][:-1].split(",")
tmpReadTail = tmpReadList[2][1:].split(',')
tmpReadHead.append("NULL")
tmpReadHead.extend(tmpReadTail)
tmpReadResult = tmpReadHead
tmpCountDouHaoAgain = 0
for k in tmpReadResult:
tmpCountDouHaoAgain += k.count(',')
if tmpCountDouHaoAgain == 0:
shopRead.append(tmpReadResult)
else:
readFailed = True
print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead))
else:
readFailed = True
print('文件第 %d 行出错: %s' % (tmpReadCount, tmpRead))
fileShop.close()
if readFailed:
print("****** 以上内容无法解析, 列出的数据跳过 ******\n")
# ---数据清洗
for i in shopRead:
tmpData = []
for j in range(0, len(i)):
tmpData0 = i[j]
if gloV.shopVariableName[j] in gloV.shopDataType:
tmpData1 = Main.clean_data(tmpData0, gloV.shopDataType[gloV.shopVariableName[j]])
else:
tmpData1 = Main.clean_data(tmpData0, 6)
tmpData.append(tmpData1)
gloV.shopDataAll.append(tmpData)
# ---gloV.SAVE[0] 保存变量名称的字符串
gloV.SAVE = [""] # [0]
gloV.SAVE[0] += "shopVariableNameStr,"
gloV.SAVE.append(gloV.shopVariableNameStr) # [1]
gloV.SAVE[0] += "shopDataAll,"
gloV.SAVE.append(gloV.shopDataAll) # [2]
return 0
示例2: readCommunityAll
# 需要导入模块: import Main [as 别名]
# 或者: from Main import clean_data [as 别名]
def readCommunityAll(fileCommunityName):
"""
临时使用的变量:
gloV.communityVariableName
gloV.communityVariableNamePosition
gloV.communityDataType
存储的变量
gloV.communityVariableNameStr
gloV.communityDataAll
"""
# ---得到编码
fileCommunityEncoding = Main.try_encoding(fileCommunityName)
if fileCommunityEncoding == "NO_FILE.":
print('<%s> 文件无法打开. 程序退出*********' % fileCommunityName)
return -1
if fileCommunityEncoding == "FAILED.":
print("<%s> 文件编码不明. 程序退出*********" % fileCommunityName)
return -2
fileCommunity = open(fileCommunityName, encoding=fileCommunityEncoding)
# ---读取列标题
gloV.communityVariableNameStr = fileCommunity.readline().strip()
gloV.communityVariableName = gloV.communityVariableNameStr.split(',')
if not (gloV.communityVariableName[0] == "经度" and gloV.communityVariableName[1] == "纬度" and
gloV.communityVariableName[2] == "区县" and gloV.communityVariableName[3] == "容积率" and
gloV.communityVariableName[4] == "均价" and gloV.communityVariableName[5] == "现有户数"):
print("<Community.csv> 前6列不是规定的 [经度,纬度,区县,容积率,均价,现有户数] 顺序,程序退出******")
return -3
for i in range(0, len(gloV.communityVariableName)):
gloV.communityVariableNamePosition[gloV.communityVariableName[i]] = i
# ---数据读取
readFailed = False
communityRead = []
tmpReadLine = 'Begin'
tmpReadCount = 1
while tmpReadLine:
try:
tmpReadLine = fileCommunity.readline()
except Exception:
tmpReadCount += 1
print('文件第 %d 行包含未知汉字, 读取失败, 已跳过.' % tmpReadCount)
readFailed = True
continue
tmpReadCount += 1
i = tmpReadLine
# 检测 i 中是否有<引号">,如果有则必须成对出现,而且中间部分必须不以<逗号,>切分
tmpRead = i.strip()
tmpCountDouHao = tmpRead.count(',')
tmpCountYinHao = tmpRead.count('"')
if tmpCountDouHao == (len(gloV.communityVariableName) - 1):
tmpReadResult = tmpRead.split(",")
communityRead.append(tmpReadResult)
elif tmpCountYinHao == 2:
# 当恰有一对引号出现, 而且没有引号出现在结尾or开头处时, 这段程序可以解析
tmpReadList = tmpRead.split('"')
tmpReadHead = tmpReadList[0][:-1].split(",")
tmpReadTail = tmpReadList[2][1:].split(',')
tmpReadHead.append("NULL")
tmpReadHead.extend(tmpReadTail)
tmpReadResult = tmpReadHead
tmpCountDouHaoAgain = 0
for k in tmpReadResult:
tmpCountDouHaoAgain += k.count(',')
if tmpCountDouHaoAgain == 0:
communityRead.append(tmpReadResult)
else:
readFailed = True
print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead))
else:
readFailed = True
print('文件第 %d 行不符合CSV文件格式: %s' % (tmpReadCount, tmpRead))
fileCommunity.close()
if readFailed:
print("****** 以上内容无法解析, 列出的数据跳过 ******\n")
print('全国社区数据已读入 <%d> 行数据.\n' % len(communityRead))
# ---数据清洗
for i in communityRead:
tmpData1 = []
for j in range(0, len(i)):
tmpData0 = i[j]
if gloV.communityVariableName[j] in gloV.communityDataType:
tmpData = Main.clean_data(tmpData0, gloV.communityDataType[gloV.communityVariableName[j]])
else:
tmpData = Main.clean_data(tmpData0, 6)
tmpData1.append(tmpData)
gloV.communityDataAll.append(tmpData1)
# ---Save
gloV.SAVE_COMMUNITY = [""] # [0]
gloV.SAVE_COMMUNITY[0] += "communityVariableNameStr,"
gloV.SAVE_COMMUNITY.append(gloV.communityVariableNameStr) # [1]
#.........这里部分代码省略.........