当前位置: 首页>>代码示例>>Python>>正文


Python OCO_Matrix.dims方法代码示例

本文整理汇总了Python中OCO_Matrix.OCO_Matrix.dims方法的典型用法代码示例。如果您正苦于以下问题:Python OCO_Matrix.dims方法的具体用法?Python OCO_Matrix.dims怎么用?Python OCO_Matrix.dims使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在OCO_Matrix.OCO_Matrix的用法示例。


在下文中一共展示了OCO_Matrix.dims方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_data_object

# 需要导入模块: from OCO_Matrix import OCO_Matrix [as 别名]
# 或者: from OCO_Matrix.OCO_Matrix import dims [as 别名]
def get_data_object(data_filename):

    # Try to load data using OCO_Matrix class
    try:
        data_obj = OCO_Matrix(data_filename)
        return data_obj
    except:
        pass

    # Now load file as tabled data
    table_file_obj = open(data_filename, 'r')
    file_lines = table_file_obj.readlines()
    table_file_obj.close()

    # Seperate each line by spaces. Keep count of maximum
    # number of columns seen for when file is added so we can
    # know how to size the resultng matrix
    max_cols = 0
    file_rows = []
    for line in file_lines:
        if line.find('#') < 0 and len(line.strip()) != 0:
            line_cols = line.strip().split()
            file_rows.append(line_cols)
            max_cols = max(max_cols, len(line_cols))

#    data_mat = numpy.zeros((len(file_rows), max_cols), dtype=float)
    data_mat = numpy.zeros((len(file_rows), max_cols), dtype=numpy.chararray)

    for row_idx in range(len(file_rows)):
        num_cols = len(file_rows[row_idx])
        for col_idx in range(num_cols):
            col_value = file_rows[row_idx][col_idx]
            data_mat[row_idx][col_idx] = col_value
#            try:
#                data_mat[row_idx][col_idx] = float(col_value)
#            except:
#                data_mat[row_idx][col_idx] = fill_value

    # Create label names based on filename and index or else can
    # not select specific columns
    label_base = os.path.basename(data_filename)
    label_base = label_base[0:label_base.rfind('.')] # Remove extension

    data_labels = []    
    for col_idx in range(max_cols):
        data_labels.append( get_column_format(max_cols) % (label_base, col_idx) )
    
    # Save data into OCO Matrix object
    data_obj = OCO_Matrix()
    data_obj.dims = [len(file_rows), max_cols]
    data_obj.labels = data_labels
    data_obj.data = data_mat
    
    return data_obj
开发者ID:E-LLP,项目名称:RtRetrievalFramework,代码行数:56,代码来源:gather_data.py

示例2: remove_bad_data_last

# 需要导入模块: from OCO_Matrix import OCO_Matrix [as 别名]
# 或者: from OCO_Matrix.OCO_Matrix import dims [as 别名]
def remove_bad_data_last(input_file, output_file, check_col, check_val):

    # Load existing file
    file_obj = OCO_Matrix(input_file)
    num_rows = file_obj.dims[0]

    if check_col.isdigit():
        check_col = int(check_col)
    else:
        check_col = file_obj.labels_lower.index(check_col.lower())

    last_good_index = -1
    for row_idx in range(num_rows-1, 1, -1):
        if not re.search(str(check_val).lower(), str(file_obj.data[row_idx, check_col]).lower()):
            last_good_index = row_idx
            break
            
    print "Last good index = ", last_good_index

    file_obj.dims = [last_good_index+1, file_obj.dims[1]]
    file_obj.write(output_file, use_set_dims=True, auto_size_cols=False)
开发者ID:E-LLP,项目名称:RtRetrievalFramework,代码行数:23,代码来源:remove_bad_data.py

示例3: standalone_main

# 需要导入模块: from OCO_Matrix import OCO_Matrix [as 别名]
# 或者: from OCO_Matrix.OCO_Matrix import dims [as 别名]

#.........这里部分代码省略.........
                continue        

            # Get list of columns to take data from
            used_columns = []
            row_specifiers = []
            renamed_columns = []
            if options.columns == None or len(options.columns) <= 0:
                used_columns = data_obj.labels
            else:
                for col_option in options.columns:
                    col_parts = col_option.split('#')
                    col_name = col_parts[0]

                    if len(col_parts) >= 2:
                        row_spec = col_parts[1]
                        if row_spec.isdigit():
                            row_spec = '%d' % int(row_spec)
                    else:
                        row_spec = ':'

                    if len(col_parts) >= 3:
                        col_new_name = col_parts[2]
                    else:
                        col_new_name = col_name

                    if col_name.isdigit():
                        # If column name is a integer then try and look it up
                        # in the labels, failing that use the index if it is not
                        # larger than the number of columns
                        if int(col_name) >= 0 and int(col_name) < len(data_obj.labels):
                            used_columns.append(data_obj.labels[int(col_name)])
                            if col_new_name == col_name:
                                col_new_name = data_obj.labels[int(col_name)]
                        elif int(col_name) < data_obj.dims[1]:
                            used_columns.append(col_name)
                        row_specifiers.append(row_spec)

                        renamed_columns.append(col_new_name)
                    elif col_name in data_obj.labels:
                        # Use the column name as is since it appears in the file's label list
                        used_columns.append(col_name)
                        row_specifiers.append(row_spec)

                        renamed_columns.append(col_new_name)

            # Get data for each used column
            for (col_orig_name, row_spec, col_new_name) in zip(used_columns, row_specifiers, renamed_columns):
                # Find the index for the column so we know how to extract it
                if col_orig_name.isdigit():
                    col_index = int(col_orig_name)
                else:
                    col_index = data_obj.labels.index(col_orig_name)

                col_data = data_obj.data[:, col_index]

                # Try the row_spec as a range for an array otherwise use as a filter
                all_data_range = range(0, data_obj.dims[0])
                try:
                    used_data_range = eval('all_data_range[' + row_spec + ']')
                except:
                    used_data_range = []
                    for row_index in all_data_range:
                        for col_index in range(data_obj.dims[1]):
                            if re.search(row_spec, str(data_obj.data[row_index, col_index])):
                                used_data_range.append(row_index)
                                break
开发者ID:E-LLP,项目名称:RtRetrievalFramework,代码行数:70,代码来源:gather_data.py


注:本文中的OCO_Matrix.OCO_Matrix.dims方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。