当前位置: 首页>>代码示例>>Python>>正文


Python csvkit.CSVKitReader类代码示例

本文整理汇总了Python中csvkit.CSVKitReader的典型用法代码示例。如果您正苦于以下问题:Python CSVKitReader类的具体用法?Python CSVKitReader怎么用?Python CSVKitReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CSVKitReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

    def main(self):
        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_names = self.args.columns.split(',')

        part_count = 0
        output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
        output.writerow(column_names)

        count = 0
        for row in rows:
            if (self.args.lines > 0) and (count == self.args.lines):
                part_count += 1
                count = 0
                # couldn't find a better way to close the file
                del output
                output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
                output.writerow(column_names)

            output.writerow(row)
            count += 1
开发者ID:Mistobaan,项目名称:csvkit,代码行数:31,代码来源:csvsplit.py

示例2: main

    def main(self):
        if len(self.args.files) < 2:
            self.argparser.error('You must specify at least two files to stack.')

        if self.args.group_by_filenames:
            groups = [os.path.split(f.name)[1] for f in self.args.files] 
        elif self.args.groups:
            groups = self.args.groups.split(',')

            if len(groups) != len(self.args.files):
                self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
        else:
            groups = None
                
        group_name = self.args.group_name if self.args.group_name else 'group'

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for i, f in enumerate(self.args.files):
            rows = CSVKitReader(f, **self.reader_kwargs)
            headers = rows.next()

            if i == 0:
                if groups:
                    headers.insert(0, group_name)
                
                output.writerow(headers)

            for row in rows:
                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)
开发者ID:binarydud,项目名称:csvkit,代码行数:33,代码来源:csvstack.py

示例3: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.regex and not self.args.pattern and not self.args.matchfile:
            self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
        
        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern
            
        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for i, row in enumerate(filter_reader):
            output.writerow(row)
开发者ID:mattdudys,项目名称:csvkit,代码行数:30,代码来源:csvgrep.py

示例4: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        #Read in header and rows
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)
        column_names = reader.next()
        if self.args.columns is None:
            grouped_columns_ids = []
        else:
            grouped_columns_ids = parse_column_identifiers(self.args.columns,
                                                       column_names,
                                                       self.args.zero_based)
        aggregations = []
        try:
            for (fun, cols) in map(lambda (f, cols): (
            f, parse_column_identifiers(cols, column_names, self.args.zero_based)),
                                   self.args.aggregations):
                for col in cols:
                    aggregations.append(aggregate_functions[fun](col))
        except KeyError:
            self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys()))
        #Determine columns to group by, default to all columns


        #Write the output
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        for row in group_rows(column_names, reader, grouped_columns_ids,
                              aggregations):
            output.writerow(row)
开发者ID:dcreado,项目名称:csvkit,代码行数:31,代码来源:csvgroup.py

示例5: from_csv

    def from_csv(cls, f, name='from_csv_table', **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        sample = contents
        dialect = sniffer.sniff_dialect(sample, **kwargs)

        f = StringIO(contents) 
        reader = CSVKitReader(f, dialect=dialect, **kwargs)

        headers = reader.next()

        data_columns = [[] for c in headers] 

        for row in reader:
            for i, d in enumerate(row):
                try:
                    data_columns[i].append(d.strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

        columns = []

        for i, c in enumerate(data_columns): 
            columns.append(Column(i, headers[i], c))

        return Table(columns, name=name)
开发者ID:thatmattbone,项目名称:csvkit,代码行数:32,代码来源:table.py

示例6: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for i, row in enumerate(rows):
            out_row = [row[c] if c < len(row) else None for c in column_ids] 

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue
            
            output.writerow(out_row)
开发者ID:GMADIGITAL,项目名称:csvkit,代码行数:30,代码来源:csvcut.py

示例7: load

    def load(self):
        '''
            Loads the cleaned up csv files into the database
            Checks record count against csv line count
        '''
        ## get a list of tables in the database
        c = connection.cursor()
        c.execute('SHOW TABLES')
        table_list = [t[0] for t in c.fetchall()]

        ### build a dictionary of tables and the paths to the csvs for loading
        table_dict = {}
        for name in os.listdir(self.csv_dir):

            csv_path = os.path.join(
                self.csv_dir,
                name
            )

            for table in table_list:
                if table ==  name.replace('.csv', '').upper():
                    table_dict[name] = {'table_name': table, 'csv_path': csv_path}

        ## load up the data
        for csv_name, query_dict in table_dict.items():
            #print 'working on %s' % csv_name
            table_name = query_dict['table_name']
            csv_path = query_dict['csv_path']

            c.execute('DELETE FROM %s' % table_name)
            #print 'deleted records from %s' % table_name

            bulk_sql_load_part_1 = '''
                LOAD DATA LOCAL INFILE '%s'
                INTO TABLE %s
                FIELDS TERMINATED BY ','
                OPTIONALLY ENCLOSED BY '"'
                IGNORE 1 LINES
                (
            ''' % (csv_path, table_name)
            infile = open(csv_path)
            csv_reader = CSVKitReader(infile)
            headers = csv_reader.next()

            infile.close()
            infile = open(csv_path)
            csv_record_cnt = len(infile.readlines()) - 1
            infile.close()

            sql_fields = ['`%s`' % h for h in headers]
            bulk_sql_load =  bulk_sql_load_part_1 + ','.join(sql_fields) + ')'
            cnt = c.execute(bulk_sql_load)
            transaction.commit_unless_managed()

            # check load, make sure record count matches
            if cnt == csv_record_cnt:
                print "record counts match\t\t\t\t%s" % csv_name
            else:
                print 'table_cnt: %s\tcsv_lines: %s\t\t%s' % (cnt, csv_record_cnt, csv_name)
开发者ID:paigestjohn,项目名称:django-calaccess-parser,代码行数:59,代码来源:downloadcalaccess.py

示例8: __init__

    def __init__(self, schema):
        self.fields = [] # A list of FixedWidthFields

        schema_reader = CSVKitReader(schema)
        schema_decoder = SchemaDecoder(schema_reader.next())

        for row in schema_reader:
            self.fields.append(schema_decoder(row))
开发者ID:bsilverthorn,项目名称:csvkit,代码行数:8,代码来源:fixed.py

示例9: infer_types

def infer_types(f, sample_size=100):
    reader = CSVKitReader(f)
    headers = reader.next()

    sample = islice(reader, sample_size)
    normal_types, normal_values = normalize_table(sample)

    return zip(headers, [t.__name__ for t in normal_types])
开发者ID:netconstructor,项目名称:panda,代码行数:8,代码来源:utils.py

示例10: main

    def main(self):
        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        # Make a default header row if none exists
        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_names = list(column_names)

        # prepend 'line_number' column with line numbers if --linenumbers option
        if self.args.line_numbers:
            column_names.insert(0, 'line_number')
            rows = [list(itertools.chain([str(i + 1)], row)) for i, row in enumerate(rows)]


        # Convert to normal list of rows
        rows = list(rows)

        # Insert the column names at the top
        rows.insert(0, column_names)

        widths = []

        for row in rows:
            for i, v in enumerate(row):
                try:
                    if len(v) > widths[i]:
                        widths[i] = len(v)
                except IndexError:
                    widths.append(len(v))

        # Dashes span each width with '+' character at intersection of
        # horizontal and vertical dividers.
        divider = '|--' + '-+-'.join('-'* w for w in widths) + '--|'

        write = lambda t: self.output_file.write(t.encode('utf-8'))

        write('%s\n' % divider)

        for i, row in enumerate(rows):
            output = []

            for j, d in enumerate(row):
                if d is None:
                    d = ''
                output.append(' %s ' % six.text_type(d).ljust(widths[j]))

            write('| %s |\n' % ('|'.join(output)))

            if (i == 0 or i == len(rows) - 1):
                write('%s\n' % divider)
开发者ID:Trawnee,项目名称:csvkit,代码行数:58,代码来源:csvlook.py

示例11: from_csv

    def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.

        Note: the column_ids argument will cause only those columns with a matching identifier
        to be parsed, type inferred, etc. However, their order/index property will reflect the
        original data (e.g. column 8 will still be "order" 7, even if it's the third column
        in the resulting Table.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        # snifflimit == 0 means do not sniff
        if snifflimit is None:
            kwargs['dialect'] = sniffer.sniff_dialect(contents)
        elif snifflimit > 0:
            kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])

        f = StringIO(contents)
        rows = CSVKitReader(f, **kwargs)

        if no_header_row:
            # Peek at a row to infer column names from
            row = next(rows) 

            headers = make_default_headers(len(row))
            column_ids = parse_column_identifiers(column_ids, headers, zero_based)
            headers = [headers[c] for c in column_ids]
            data_columns = [[] for c in headers]

            # Put row back on top
            rows = itertools.chain([row], rows)
        else:
            headers = rows.next()
            
            if column_ids:
                column_ids = parse_column_identifiers(column_ids, headers, zero_based)
                headers = [headers[c] for c in column_ids]
            else:
                column_ids = range(len(headers))
        
            data_columns = [[] for c in headers]

        for i, row in enumerate(rows):
            for j, d in enumerate(row):
                try:
                    data_columns[j].append(row[column_ids[j]].strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

        columns = []

        for i, c in enumerate(data_columns):
            columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types))

        return Table(columns, name=name)
开发者ID:pnaimoli,项目名称:csvkit,代码行数:58,代码来源:table.py

示例12: print_column_names

def print_column_names(f, output, **reader_kwargs):
    """
    Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout).
    """
    rows = CSVKitReader(f, **reader_kwargs)
    column_names = rows.next()

    for i, c in enumerate(column_names):
        output.write('%3i: %s\n' % (i + 1, c))
开发者ID:brwilson,项目名称:csvkit,代码行数:9,代码来源:cli.py

示例13: extract_column_names

def extract_column_names(path, dialect_parameters, encoding='utf-8'):
    with open(path, 'r') as f:
        reader = CSVKitReader(f, encoding=encoding, **dialect_parameters)

        try:
            headers = reader.next()
        except UnicodeDecodeError:
            raise DataSamplingError(_('This CSV file contains characters that are not %s encoded. You need to input the correct encoding in order to import data from this file.') % encoding)

        return headers
开发者ID:Rawadx,项目名称:panda,代码行数:10,代码来源:csvdata.py

示例14: main

 def main(self):
   reader = CSVKitReader(self.args.file, **self.reader_kwargs)
   cnames = reader.next()
   cids   = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
   mods   = {idx: self.args.expr for idx in cids}
   output = CSVKitWriter(self.output_file, **self.writer_kwargs)
   reader = sed.CsvFilter(reader, mods, header=False)
   output.writerow(cnames)
   for row in reader:
     output.writerow(row)
开发者ID:pombredanne,项目名称:csvsed,代码行数:10,代码来源:cli.py

示例15: __init__

    def __init__(self, schema):
        self.fields = [] # A list of FixedWidthFields

        schema_reader = CSVKitReader(schema)
        schema_decoder = SchemaDecoder(schema_reader.next())

        for i,row in enumerate(schema_reader):
            try:
                self.fields.append(schema_decoder(row))
            except Exception,e:
                raise ValueError("Error reading schema at line %i: %s" % (i + 2,e))
开发者ID:binarydud,项目名称:csvkit,代码行数:11,代码来源:fixed.py


注:本文中的csvkit.CSVKitReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。