当前位置: 首页>>代码示例>>Python>>正文


Python Table.tableStartIndex方法代码示例

本文整理汇总了Python中table.Table.tableStartIndex方法的典型用法代码示例。如果您正苦于以下问题:Python Table.tableStartIndex方法的具体用法?Python Table.tableStartIndex怎么用?Python Table.tableStartIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在table.Table的用法示例。


在下文中一共展示了Table.tableStartIndex方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: saveTable

# 需要导入模块: from table import Table [as 别名]
# 或者: from table.Table import tableStartIndex [as 别名]
    def saveTable(self, tmpTable, rowNum, columnNum, pageElementNum):
        myTable = Table(rowNum,columnNum)
        myTable.tableStartIndex = tmpTable.tableStartIndex
        myTable.tableEndIndex = tmpTable.tableEndIndex
        myTable.rowNum = rowNum
        myTable.columnNum = columnNum
        myTable.pageNum = tmpTable.pageNum
        
        #判断是否有前后续表
        if tmpTable.tableStartIndex < 5:
            myTable.preExtend = 1
        else:
            myTable.preExtend = 0
        if (pageElementNum - tmpTable.tableEndIndex) < 5:
            myTable.aftExtend = 1
        else:
            myTable.aftExtend = 0
            
        #保存表格内容
        for row in range(0,rowNum):
            for column in range(0,columnNum):
                myTable.setCellValue(row, column, tmpTable.getCellValue(row,column))
                self.logger.debug("Save cell (row, column, tmpTablevalue, myTablevalue) (%s, %s, %s ,%s)" % (row, column, tmpTable.getCellValue(row,column), myTable.getCellValue(row, column)))
                 
        self.tableList.append(myTable)

        return
开发者ID:huyou963,项目名称:pdfhtmlextract,代码行数:29,代码来源:pdfhtmlextractor.py

示例2: getTablesinPage

# 需要导入模块: from table import Table [as 别名]
# 或者: from table.Table import tableStartIndex [as 别名]
 def getTablesinPage(self, pageNum, startElementIndex=0, endElementIndex=65535):
     
     self.logger.info("Page %s fetch start" % pageNum)
     pageContent = self.getPageContent(pageNum)
     if pageContent == None:
         self.logger.error("Get page content None")
         return None
     
     pageElement = pageContent.div.div
     pageElementNum =  self.getPageElementIndexOrTotalNum(pageNum)
     elementIndex = 0
     
     #table信息相关临时标量
     rowNum = 1            #保存有多少行 兼 rowIndex
     columnNum = 1         #保存有多少列
     columnIndex = 1       #记录列的序号
     tableStartIndex = 0   #保存table开始时的元素位置
     tableEndIndex = 0     #保存table结束时的元素位置
     tmpTable = None
     while True:
         if elementIndex >= startElementIndex:
             if (None != pageElement and elementIndex <= endElementIndex):
                 #找到连续class C,判断是不是table开始
                 if 'c' == pageElement['class'][0] and 'c' == pageElement.previous_sibling['class'][0]:
                     if tmpTable == None:
                         tmpTable = Table(50,20)
                     #得到前列个元素
                     compareColumnElemnt = self.getCompareColumnElemnt(pageElement, columnNum)
                     
                     self.logger.debug("pageElement= %s, pre-pageElement= %s, compareColumnElemnt= %s" % (pageElement.get_text(), pageElement.previous_sibling.get_text(), compareColumnElemnt.get_text()))                    
                     self.logger.debug("pageElement Y= %s, pre-pageElement Y= %s" % (pageElement['class'][2], pageElement.previous_sibling['class'][2]))
                     self.logger.debug("pageElement X= %s, compareColumnElemnt X= %s" % (pageElement['class'][1], compareColumnElemnt['class'][1]))
 
                     # 与前一个元素的Y坐标比较
                     if pageElement.previous_sibling['class'][2] == pageElement['class'][2]:    
                         columnIndex += 1
                         #与前一个元素的Y等,与前列个元素的 X不等 ----第一行元素
                         if pageElement['class'][1] != compareColumnElemnt['class'][1]:
                             columnNum = columnIndex
                             # 如果table没开始,则找到第一个1/2cell
                             if tableStartIndex == 0:
                                 tableStartIndex = elementIndex
                                 self.logger.info("Find a table in page:%s, table start index:%s" % (pageNum, tableStartIndex))
                                 
                                 # Save cur-cell, pre-cell, tableStartIndex
                                 tableSavedFlag = False
                                  
                                 tmpTable.tableStartIndex = tableStartIndex
                                 tmpTable.setCellValue(rowNum-1, columnIndex-2, pageElement.previous_sibling.get_text())
                                 tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
                                
                                 self.logger.debug("Find 1st/2nd cell (rowIndex,columnIndex,value) (%s,%s,%s), (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex-1,pageElement.previous_sibling.get_text(),rowNum, columnIndex,pageElement.get_text()))
                             #找到第一行其他cell
                             else:
                                 tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
                                 self.logger.debug("Find other 1st row cell (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex,pageElement.get_text()))
                                 # Save cur-cell
                         #与前一个元素的Y等,与前列个元素的 X相等----除第一行第一列以外的元素        
                         else:
                             tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())
                             self.logger.debug("Find Cell row>1,column>1 (rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex,pageElement.get_text()))
                             
                     else:
                         #与前一个元素的Y不等,与前列个元素的X相等----除(1,1)以外的第一列元素
                         if pageElement['class'][1] == compareColumnElemnt['class'][1]:
                             if tableStartIndex != 0:
                                 rowNum += 1
                                 columnNum = columnIndex
                                 columnIndex = 1
                             self.logger.debug("Find first column cell(rowIndex,columnIndex,value) (%s,%s,%s)" % (rowNum, columnIndex, pageElement.get_text()))
                             tmpTable.setCellValue(rowNum-1, columnIndex-1, pageElement.get_text())                            
                            
                         else:
                             # 与前一个Y坐标不等,与前列个X坐标不等----table结束
                             if tableStartIndex != 0:
                                 tableEndIndex = elementIndex
                                 self.logger.info("Table is ended due to Y!=,X!= with info(rowNum,columnNum,tableEndIndex,content,pageNum) (%s,%s,%s,%s,%s)" % (rowNum, columnIndex, tableEndIndex, pageElement.get_text(),pageNum))
                                 #Save tableEndIndex, rowNum, columnNum append tableList
                                 if tableSavedFlag == False and rowNum >= 1 and columnNum > 1:
                                     tmpTable.tableEndIndex = tableEndIndex
                                     tmpTable.rowNum = rowNum
                                     tmpTable.columnNum = columnNum
                                     self.saveTable(tmpTable,rowNum,columnNum,pageElementNum)
                                     tableSavedFlag = True
                                 # 结束表清空tmptable相关变量
                                 tableStartIndex = 0
                                 rowNum = 1
                                 columnNum = 1
                                 columnIndex = 1
                                 tableEndIndex = 0
                                 tmpTable.reset()
                                 
                 elif 't' == pageElement['class'][0]:
                     if tableStartIndex != 0:
                         tableEndIndex = elementIndex
                         self.logger.info("Table is ended due to find a class t info(rowNum,columnNum,tableEndIndex,pageNum) (%s,%s,%s,%s)" % (rowNum, columnIndex, tableEndIndex,pageNum))
                         #Save tableEndIndex, rowNum, columnNum
                         if tableSavedFlag == False and rowNum >= 1 and columnNum > 1:
                             tmpTable.tableEndIndex = tableEndIndex
                             tmpTable.rowNum = rowNum
#.........这里部分代码省略.........
开发者ID:huyou963,项目名称:pdfhtmlextract,代码行数:103,代码来源:pdfhtmlextractor.py


注:本文中的table.Table.tableStartIndex方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。