当前位置: 首页>>代码示例>>Python>>正文


Python CSVKitReader.next方法代码示例

本文整理汇总了Python中csvkit.CSVKitReader.next方法的典型用法代码示例。如果您正苦于以下问题:Python CSVKitReader.next方法的具体用法?Python CSVKitReader.next怎么用?Python CSVKitReader.next使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在csvkit.CSVKitReader的用法示例。


在下文中一共展示了CSVKitReader.next方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def main(self):
        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_names = self.args.columns.split(',')

        part_count = 0
        output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
        output.writerow(column_names)

        count = 0
        for row in rows:
            if (self.args.lines > 0) and (count == self.args.lines):
                part_count += 1
                count = 0
                # couldn't find a better way to close the file
                del output
                output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
                output.writerow(column_names)

            output.writerow(row)
            count += 1
开发者ID:Mistobaan,项目名称:csvkit,代码行数:33,代码来源:csvsplit.py

示例2: main

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = rows.next()

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for i, row in enumerate(rows):
            out_row = [row[c] if c < len(row) else None for c in column_ids] 

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue
            
            output.writerow(out_row)
开发者ID:GMADIGITAL,项目名称:csvkit,代码行数:32,代码来源:csvcut.py

示例3: sample_data

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def sample_data(path, dialect_parameters, sample_size, encoding='utf-8'):
    with open(path, 'r') as f:
        reader = CSVKitReader(f, encoding=encoding, **dialect_parameters)

        try:
            reader.next() # skip headers
            samples = []

            for row in islice(reader, sample_size):
                samples.append(row)
        except UnicodeDecodeError:
            raise DataSamplingError(_('This CSV file contains characters that are not %s encoded. You need to input the correct encoding in order to import data from this file.') % (encoding))

        return samples
开发者ID:Rawadx,项目名称:panda,代码行数:16,代码来源:csvdata.py

示例4: test_no_header_row

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def test_no_header_row(self):
        # stack two CSV files
        args = ['--no-header-row', 'examples/no_header_row.csv', 'examples/no_header_row2.csv']
        output_file = StringIO.StringIO()
        utility = CSVStack(args, output_file)

        utility.main()

        # verify the stacked file's contents
        input_file = StringIO.StringIO(output_file.getvalue())
        reader = CSVKitReader(input_file)

        self.assertEqual(reader.next()[0], 'column1')
        self.assertEqual(reader.next()[0], '1')
        self.assertEqual(reader.next()[0], '4')
开发者ID:GMADIGITAL,项目名称:csvkit,代码行数:17,代码来源:test_csvstack.py

示例5: test_explicit_grouping

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def test_explicit_grouping(self):
        # stack two CSV files
        args = ['--groups', 'asd,sdf', '-n', 'foo', 'examples/dummy.csv', 'examples/dummy2.csv']
        output_file = StringIO.StringIO()
        utility = CSVStack(args, output_file)

        utility.main()

        # verify the stacked file's contents
        input_file = StringIO.StringIO(output_file.getvalue())
        reader = CSVKitReader(input_file)

        self.assertEqual(reader.next(), ['foo', 'a', 'b', 'c'])
        self.assertEqual(reader.next()[0], 'asd')
        self.assertEqual(reader.next()[0], 'sdf')
开发者ID:tthibo,项目名称:csvkit,代码行数:17,代码来源:test_csvstack.py

示例6: main

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.regex and not self.args.pattern and not self.args.matchfile:
            self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
        
        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern
            
        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for i, row in enumerate(filter_reader):
            output.writerow(row)
开发者ID:mattdudys,项目名称:csvkit,代码行数:32,代码来源:csvgrep.py

示例7: main

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        #Read in header and rows
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)
        column_names = reader.next()
        if self.args.columns is None:
            grouped_columns_ids = []
        else:
            grouped_columns_ids = parse_column_identifiers(self.args.columns,
                                                       column_names,
                                                       self.args.zero_based)
        aggregations = []
        try:
            for (fun, cols) in map(lambda (f, cols): (
            f, parse_column_identifiers(cols, column_names, self.args.zero_based)),
                                   self.args.aggregations):
                for col in cols:
                    aggregations.append(aggregate_functions[fun](col))
        except KeyError:
            self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys()))
        #Determine columns to group by, default to all columns


        #Write the output
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        for row in group_rows(column_names, reader, grouped_columns_ids,
                              aggregations):
            output.writerow(row)
开发者ID:dcreado,项目名称:csvkit,代码行数:33,代码来源:csvgroup.py

示例8: from_csv

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def from_csv(cls, f, name='from_csv_table', **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        sample = contents
        dialect = sniffer.sniff_dialect(sample, **kwargs)

        f = StringIO(contents) 
        reader = CSVKitReader(f, dialect=dialect, **kwargs)

        headers = reader.next()

        data_columns = [[] for c in headers] 

        for row in reader:
            for i, d in enumerate(row):
                try:
                    data_columns[i].append(d.strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

        columns = []

        for i, c in enumerate(data_columns): 
            columns.append(Column(i, headers[i], c))

        return Table(columns, name=name)
开发者ID:thatmattbone,项目名称:csvkit,代码行数:34,代码来源:table.py

示例9: main

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def main(self):
        if len(self.args.files) < 2:
            self.argparser.error('You must specify at least two files to stack.')

        if self.args.group_by_filenames:
            groups = [os.path.split(f.name)[1] for f in self.args.files] 
        elif self.args.groups:
            groups = self.args.groups.split(',')

            if len(groups) != len(self.args.files):
                self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
        else:
            groups = None
                
        group_name = self.args.group_name if self.args.group_name else 'group'

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for i, f in enumerate(self.args.files):
            rows = CSVKitReader(f, **self.reader_kwargs)
            headers = rows.next()

            if i == 0:
                if groups:
                    headers.insert(0, group_name)
                
                output.writerow(headers)

            for row in rows:
                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)
开发者ID:binarydud,项目名称:csvkit,代码行数:35,代码来源:csvstack.py

示例10: load

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def load(self):
        '''
            Loads the cleaned up csv files into the database
            Checks record count against csv line count
        '''
        ## get a list of tables in the database
        c = connection.cursor()
        c.execute('SHOW TABLES')
        table_list = [t[0] for t in c.fetchall()]

        ### build a dictionary of tables and the paths to the csvs for loading
        table_dict = {}
        for name in os.listdir(self.csv_dir):

            csv_path = os.path.join(
                self.csv_dir,
                name
            )

            for table in table_list:
                if table ==  name.replace('.csv', '').upper():
                    table_dict[name] = {'table_name': table, 'csv_path': csv_path}

        ## load up the data
        for csv_name, query_dict in table_dict.items():
            #print 'working on %s' % csv_name
            table_name = query_dict['table_name']
            csv_path = query_dict['csv_path']

            c.execute('DELETE FROM %s' % table_name)
            #print 'deleted records from %s' % table_name

            bulk_sql_load_part_1 = '''
                LOAD DATA LOCAL INFILE '%s'
                INTO TABLE %s
                FIELDS TERMINATED BY ','
                OPTIONALLY ENCLOSED BY '"'
                IGNORE 1 LINES
                (
            ''' % (csv_path, table_name)
            infile = open(csv_path)
            csv_reader = CSVKitReader(infile)
            headers = csv_reader.next()

            infile.close()
            infile = open(csv_path)
            csv_record_cnt = len(infile.readlines()) - 1
            infile.close()

            sql_fields = ['`%s`' % h for h in headers]
            bulk_sql_load =  bulk_sql_load_part_1 + ','.join(sql_fields) + ')'
            cnt = c.execute(bulk_sql_load)
            transaction.commit_unless_managed()

            # check load, make sure record count matches
            if cnt == csv_record_cnt:
                print "record counts match\t\t\t\t%s" % csv_name
            else:
                print 'table_cnt: %s\tcsv_lines: %s\t\t%s' % (cnt, csv_record_cnt, csv_name)
开发者ID:paigestjohn,项目名称:django-calaccess-parser,代码行数:61,代码来源:downloadcalaccess.py

示例11: infer_types

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def infer_types(f, sample_size=100):
    reader = CSVKitReader(f)
    headers = reader.next()

    sample = islice(reader, sample_size)
    normal_types, normal_values = normalize_table(sample)

    return zip(headers, [t.__name__ for t in normal_types])
开发者ID:netconstructor,项目名称:panda,代码行数:10,代码来源:utils.py

示例12: __init__

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def __init__(self, schema):
        self.fields = [] # A list of FixedWidthFields

        schema_reader = CSVKitReader(schema)
        schema_decoder = SchemaDecoder(schema_reader.next())

        for row in schema_reader:
            self.fields.append(schema_decoder(row))
开发者ID:bsilverthorn,项目名称:csvkit,代码行数:10,代码来源:fixed.py

示例13: from_csv

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
    def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
        """
        Creates a new Table from a file-like object containing CSV data.

        Note: the column_ids argument will cause only those columns with a matching identifier
        to be parsed, type inferred, etc. However, their order/index property will reflect the
        original data (e.g. column 8 will still be "order" 7, even if it's the third column
        in the resulting Table.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        # snifflimit == 0 means do not sniff
        if snifflimit is None:
            kwargs['dialect'] = sniffer.sniff_dialect(contents)
        elif snifflimit > 0:
            kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])

        f = StringIO(contents)
        rows = CSVKitReader(f, **kwargs)

        if no_header_row:
            # Peek at a row to infer column names from
            row = next(rows) 

            headers = make_default_headers(len(row))
            column_ids = parse_column_identifiers(column_ids, headers, zero_based)
            headers = [headers[c] for c in column_ids]
            data_columns = [[] for c in headers]

            # Put row back on top
            rows = itertools.chain([row], rows)
        else:
            headers = rows.next()
            
            if column_ids:
                column_ids = parse_column_identifiers(column_ids, headers, zero_based)
                headers = [headers[c] for c in column_ids]
            else:
                column_ids = range(len(headers))
        
            data_columns = [[] for c in headers]

        for i, row in enumerate(rows):
            for j, d in enumerate(row):
                try:
                    data_columns[j].append(row[column_ids[j]].strip())
                except IndexError:
                    # Non-rectangular data is truncated
                    break

        columns = []

        for i, c in enumerate(data_columns):
            columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types))

        return Table(columns, name=name)
开发者ID:pnaimoli,项目名称:csvkit,代码行数:60,代码来源:table.py

示例14: print_column_names

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def print_column_names(f, output, **reader_kwargs):
    """
    Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout).
    """
    rows = CSVKitReader(f, **reader_kwargs)
    column_names = rows.next()

    for i, c in enumerate(column_names):
        output.write('%3i: %s\n' % (i + 1, c))
开发者ID:brwilson,项目名称:csvkit,代码行数:11,代码来源:cli.py

示例15: extract_column_names

# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def extract_column_names(path, dialect_parameters, encoding='utf-8'):
    with open(path, 'r') as f:
        reader = CSVKitReader(f, encoding=encoding, **dialect_parameters)

        try:
            headers = reader.next()
        except UnicodeDecodeError:
            raise DataSamplingError(_('This CSV file contains characters that are not %s encoded. You need to input the correct encoding in order to import data from this file.') % encoding)

        return headers
开发者ID:Rawadx,项目名称:panda,代码行数:12,代码来源:csvdata.py


注:本文中的csvkit.CSVKitReader.next方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。