本文整理汇总了Python中table.Table.tableStartIndex方法的典型用法代码示例。如果您正苦于以下问题:Python Table.tableStartIndex方法的具体用法?Python Table.tableStartIndex怎么用?Python Table.tableStartIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类table.Table
的用法示例。
在下文中一共展示了Table.tableStartIndex方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: saveTable
# 需要导入模块: from table import Table [as 别名]
# 或者: from table.Table import tableStartIndex [as 别名]
def saveTable(self, tmpTable, rowNum, columnNum, pageElementNum):
myTable = Table(rowNum,columnNum)
myTable.tableStartIndex = tmpTable.tableStartIndex
myTable.tableEndIndex = tmpTable.tableEndIndex
myTable.rowNum = rowNum
myTable.columnNum = columnNum
myTable.pageNum = tmpTable.pageNum
#判断是否有前后续表
if tmpTable.tableStartIndex < 5:
myTable.preExtend = 1
else:
myTable.preExtend = 0
if (pageElementNum - tmpTable.tableEndIndex) < 5:
myTable.aftExtend = 1
else:
myTable.aftExtend = 0
#保存表格内容
for row in range(0,rowNum):
for column in range(0,columnNum):
myTable.setCellValue(row, column, tmpTable.getCellValue(row,column))
self.logger.debug("Save cell (row, column, tmpTablevalue, myTablevalue) (%s, %s, %s ,%s)" % (row, column, tmpTable.getCellValue(row,column), myTable.getCellValue(row, column)))
self.tableList.append(myTable)
return
示例2: getTablesinPage
# 需要导入模块: from table import Table [as 别名]
# 或者: from table.Table import tableStartIndex [as 别名]
def getTablesinPage(self, pageNum, startElementIndex=0, endElementIndex=65535):
self.logger.info("Page %s fetch start" % pageNum)
pageContent = self.getPageContent(pageNum)
if pageContent == None:
self.logger.error("Get page content None")
return None
pageElement = pageContent.div.div
pageElementNum = self.getPageElementIndexOrTotalNum(pageNum)
elementIndex = 0
#table信息相关临时标量
rowNum = 1 #保存有多少行 兼 rowIndex
columnNum = 1 #保存有多少列
columnIndex = 1 #记录列的序号
tableStartIndex = 0 #保存table开始时的元素位置
tableEndIndex = 0 #保存table结束时的元素位置
tmpTable = None
while True:
if elementIndex >= startElementIndex:
if (None != pageElement and elementIndex <= endElementIndex):
#找到连续class C,判断是不是table开始
if 'c' == pageElement['class'][0] and 'c' == pageElement.previous_sibling['class'][0]:
if tmpTable == None:
tmpTable = Table(50,20)
#得到前列个元素
compareColumnElemnt = self.getCompareColumnElemnt(pageElement, columnNum)
self.logger.debug("pageElement= %s, pre-pageElement= %s, compareColumnElemnt= %s" % (pageElement.get_text(), pageElement.previous_sibling.get_text(), compareColumnElemnt.get_text()))
self.logger.debug("pageElement Y= %s, pre-pageElement Y= %s" % (pageElement['class'][2], pageElement.previous_sibling['class'][2]))
self.logger.debug("pageElement X= %s, compareColumnElemnt X= %s" % (pageElement['class'][1], compareColumnElemnt['class'][1]))
# 与前一个元素的Y坐标比较
if pageElement.previous_sibling['class'][2] == pageElement['class'][2]:
columnIndex += 1
#与前一个元素的Y等,与前列个元素的 X不等 ----第一行元素
if pageElement['class'][1] != compareColumnElemnt['class'][1]:
columnNum = columnIndex
# 如果table没开始,则找到第一个1/2cell
if tableStartIndex == 0:
tableStartIndex = elementIndex
self.logger.info("Find a table in page:%s, table start index:%s" % (pageNum, tableStartIndex))
# Save cur-cell, pre-cell, tableStartIndex
tableSavedFlag = False
tmpTable.tableStartIndex = tableStartIndex
tmpTable.setCellValue(rowNum-1, columnIndex-2, pageElement.previous_sibling.get_text())
tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
self.logger.debug("Find 1st/2nd cell (rowIndex,columnIndex,value) (%s,%s,%s), (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex-1,pageElement.previous_sibling.get_text(),rowNum, columnIndex,pageElement.get_text()))
#找到第一行其他cell
else:
tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
self.logger.debug("Find other 1st row cell (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex,pageElement.get_text()))
# Save cur-cell
#与前一个元素的Y等,与前列个元素的 X相等----除第一行第一列以外的元素
else:
tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
self.logger.debug("Find Cell row>1,column>1 (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex,pageElement.get_text()))
else:
#与前一个元素的Y不等,与前列个元素的X相等----除(1,1)以外的第一列元素
if pageElement['class'][1] == compareColumnElemnt['class'][1]:
if tableStartIndex != 0:
rowNum += 1
columnNum = columnIndex
columnIndex = 1
self.logger.debug("Find first column cell(rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex, pageElement.get_text()))
tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
else:
# 与前一个Y坐标不等,与前列个X坐标不等----table结束
if tableStartIndex != 0:
tableEndIndex = elementIndex
self.logger.info("Table is ended due to Y!=,X!= with info(rowNum,columnNum,tableEndIndex,content,pageNum) (%s,%s,%s,%s,%s)" % (rowNum, columnIndex, tableEndIndex, pageElement.get_text(),pageNum))
#Save tableEndIndex, rowNum, columnNum append tableList
if tableSavedFlag == False and rowNum >= 1 and columnNum > 1:
tmpTable.tableEndIndex = tableEndIndex
tmpTable.rowNum = rowNum
tmpTable.columnNum = columnNum
self.saveTable(tmpTable,rowNum,columnNum,pageElementNum)
tableSavedFlag = True
# 结束表清空tmptable相关变量
tableStartIndex = 0
rowNum = 1
columnNum = 1
columnIndex = 1
tableEndIndex = 0
tmpTable.reset()
elif 't' == pageElement['class'][0]:
if tableStartIndex != 0:
tableEndIndex = elementIndex
self.logger.info("Table is ended due to find a class t info(rowNum,columnNum,tableEndIndex,pageNum) (%s,%s,%s,%s)" % (rowNum, columnIndex, tableEndIndex,pageNum))
#Save tableEndIndex, rowNum, columnNum
if tableSavedFlag == False and rowNum >= 1 and columnNum > 1:
tmpTable.tableEndIndex = tableEndIndex
tmpTable.rowNum = rowNum
#.........这里部分代码省略.........