本文整理汇总了Python中csvkit.CSVKitReader.next方法的典型用法代码示例。如果您正苦于以下问题:Python CSVKitReader.next方法的具体用法?Python CSVKitReader.next怎么用?Python CSVKitReader.next使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类csvkit.CSVKitReader
的用法示例。
在下文中一共展示了CSVKitReader.next方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def main(self):
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
if self.args.no_header_row:
row = rows.next()
column_names = make_default_headers(len(row))
# Put the row back on top
rows = itertools.chain([row], rows)
else:
column_names = rows.next()
column_names = self.args.columns.split(',')
part_count = 0
output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
output.writerow(column_names)
count = 0
for row in rows:
if (self.args.lines > 0) and (count == self.args.lines):
part_count += 1
count = 0
# couldn't find a better way to close the file
del output
output = CSVKitWriter( open(self.args.file._lazy_args[0]+".part.%d" % part_count, 'w'), **self.writer_kwargs)
output.writerow(column_names)
output.writerow(row)
count += 1
示例2: main
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def main(self):
if self.args.names_only:
self.print_column_names()
return
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
if self.args.no_header_row:
row = rows.next()
column_names = make_default_headers(len(row))
# Put the row back on top
rows = itertools.chain([row], rows)
else:
column_names = rows.next()
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow([column_names[c] for c in column_ids])
for i, row in enumerate(rows):
out_row = [row[c] if c < len(row) else None for c in column_ids]
if self.args.delete_empty:
if ''.join(out_row) == '':
continue
output.writerow(out_row)
示例3: sample_data
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def sample_data(path, dialect_parameters, sample_size, encoding='utf-8'):
with open(path, 'r') as f:
reader = CSVKitReader(f, encoding=encoding, **dialect_parameters)
try:
reader.next() # skip headers
samples = []
for row in islice(reader, sample_size):
samples.append(row)
except UnicodeDecodeError:
raise DataSamplingError(_('This CSV file contains characters that are not %s encoded. You need to input the correct encoding in order to import data from this file.') % (encoding))
return samples
示例4: test_no_header_row
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def test_no_header_row(self):
# stack two CSV files
args = ['--no-header-row', 'examples/no_header_row.csv', 'examples/no_header_row2.csv']
output_file = StringIO.StringIO()
utility = CSVStack(args, output_file)
utility.main()
# verify the stacked file's contents
input_file = StringIO.StringIO(output_file.getvalue())
reader = CSVKitReader(input_file)
self.assertEqual(reader.next()[0], 'column1')
self.assertEqual(reader.next()[0], '1')
self.assertEqual(reader.next()[0], '4')
示例5: test_explicit_grouping
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def test_explicit_grouping(self):
# stack two CSV files
args = ['--groups', 'asd,sdf', '-n', 'foo', 'examples/dummy.csv', 'examples/dummy2.csv']
output_file = StringIO.StringIO()
utility = CSVStack(args, output_file)
utility.main()
# verify the stacked file's contents
input_file = StringIO.StringIO(output_file.getvalue())
reader = CSVKitReader(input_file)
self.assertEqual(reader.next(), ['foo', 'a', 'b', 'c'])
self.assertEqual(reader.next()[0], 'asd')
self.assertEqual(reader.next()[0], 'sdf')
示例6: main
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def main(self):
if self.args.names_only:
self.print_column_names()
return
if not self.args.regex and not self.args.pattern and not self.args.matchfile:
self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
column_names = rows.next()
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
if self.args.regex:
pattern = re.compile(self.args.regex)
elif self.args.matchfile:
lines = set(line.rstrip() for line in self.args.matchfile)
pattern = lambda x: x in lines
else:
pattern = self.args.pattern
patterns = dict((c, pattern) for c in column_ids)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow(column_names)
filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)
for i, row in enumerate(filter_reader):
output.writerow(row)
示例7: main
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def main(self):
if self.args.names_only:
self.print_column_names()
return
#Read in header and rows
reader = CSVKitReader(self.input_file, **self.reader_kwargs)
column_names = reader.next()
if self.args.columns is None:
grouped_columns_ids = []
else:
grouped_columns_ids = parse_column_identifiers(self.args.columns,
column_names,
self.args.zero_based)
aggregations = []
try:
for (fun, cols) in map(lambda (f, cols): (
f, parse_column_identifiers(cols, column_names, self.args.zero_based)),
self.args.aggregations):
for col in cols:
aggregations.append(aggregate_functions[fun](col))
except KeyError:
self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys()))
#Determine columns to group by, default to all columns
#Write the output
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for row in group_rows(column_names, reader, grouped_columns_ids,
aggregations):
output.writerow(row)
示例8: from_csv
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def from_csv(cls, f, name='from_csv_table', **kwargs):
"""
Creates a new Table from a file-like object containing CSV data.
"""
# This bit of nonsense is to deal with "files" from stdin,
# which are not seekable and thus must be buffered
contents = f.read()
sample = contents
dialect = sniffer.sniff_dialect(sample, **kwargs)
f = StringIO(contents)
reader = CSVKitReader(f, dialect=dialect, **kwargs)
headers = reader.next()
data_columns = [[] for c in headers]
for row in reader:
for i, d in enumerate(row):
try:
data_columns[i].append(d.strip())
except IndexError:
# Non-rectangular data is truncated
break
columns = []
for i, c in enumerate(data_columns):
columns.append(Column(i, headers[i], c))
return Table(columns, name=name)
示例9: main
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def main(self):
if len(self.args.files) < 2:
self.argparser.error('You must specify at least two files to stack.')
if self.args.group_by_filenames:
groups = [os.path.split(f.name)[1] for f in self.args.files]
elif self.args.groups:
groups = self.args.groups.split(',')
if len(groups) != len(self.args.files):
self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
else:
groups = None
group_name = self.args.group_name if self.args.group_name else 'group'
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for i, f in enumerate(self.args.files):
rows = CSVKitReader(f, **self.reader_kwargs)
headers = rows.next()
if i == 0:
if groups:
headers.insert(0, group_name)
output.writerow(headers)
for row in rows:
if groups:
row.insert(0, groups[i])
output.writerow(row)
示例10: load
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def load(self):
'''
Loads the cleaned up csv files into the database
Checks record count against csv line count
'''
## get a list of tables in the database
c = connection.cursor()
c.execute('SHOW TABLES')
table_list = [t[0] for t in c.fetchall()]
### build a dictionary of tables and the paths to the csvs for loading
table_dict = {}
for name in os.listdir(self.csv_dir):
csv_path = os.path.join(
self.csv_dir,
name
)
for table in table_list:
if table == name.replace('.csv', '').upper():
table_dict[name] = {'table_name': table, 'csv_path': csv_path}
## load up the data
for csv_name, query_dict in table_dict.items():
#print 'working on %s' % csv_name
table_name = query_dict['table_name']
csv_path = query_dict['csv_path']
c.execute('DELETE FROM %s' % table_name)
#print 'deleted records from %s' % table_name
bulk_sql_load_part_1 = '''
LOAD DATA LOCAL INFILE '%s'
INTO TABLE %s
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
IGNORE 1 LINES
(
''' % (csv_path, table_name)
infile = open(csv_path)
csv_reader = CSVKitReader(infile)
headers = csv_reader.next()
infile.close()
infile = open(csv_path)
csv_record_cnt = len(infile.readlines()) - 1
infile.close()
sql_fields = ['`%s`' % h for h in headers]
bulk_sql_load = bulk_sql_load_part_1 + ','.join(sql_fields) + ')'
cnt = c.execute(bulk_sql_load)
transaction.commit_unless_managed()
# check load, make sure record count matches
if cnt == csv_record_cnt:
print "record counts match\t\t\t\t%s" % csv_name
else:
print 'table_cnt: %s\tcsv_lines: %s\t\t%s' % (cnt, csv_record_cnt, csv_name)
示例11: infer_types
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def infer_types(f, sample_size=100):
reader = CSVKitReader(f)
headers = reader.next()
sample = islice(reader, sample_size)
normal_types, normal_values = normalize_table(sample)
return zip(headers, [t.__name__ for t in normal_types])
示例12: __init__
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def __init__(self, schema):
self.fields = [] # A list of FixedWidthFields
schema_reader = CSVKitReader(schema)
schema_decoder = SchemaDecoder(schema_reader.next())
for row in schema_reader:
self.fields.append(schema_decoder(row))
示例13: from_csv
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs):
"""
Creates a new Table from a file-like object containing CSV data.
Note: the column_ids argument will cause only those columns with a matching identifier
to be parsed, type inferred, etc. However, their order/index property will reflect the
original data (e.g. column 8 will still be "order" 7, even if it's the third column
in the resulting Table.
"""
# This bit of nonsense is to deal with "files" from stdin,
# which are not seekable and thus must be buffered
contents = f.read()
# snifflimit == 0 means do not sniff
if snifflimit is None:
kwargs['dialect'] = sniffer.sniff_dialect(contents)
elif snifflimit > 0:
kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit])
f = StringIO(contents)
rows = CSVKitReader(f, **kwargs)
if no_header_row:
# Peek at a row to infer column names from
row = next(rows)
headers = make_default_headers(len(row))
column_ids = parse_column_identifiers(column_ids, headers, zero_based)
headers = [headers[c] for c in column_ids]
data_columns = [[] for c in headers]
# Put row back on top
rows = itertools.chain([row], rows)
else:
headers = rows.next()
if column_ids:
column_ids = parse_column_identifiers(column_ids, headers, zero_based)
headers = [headers[c] for c in column_ids]
else:
column_ids = range(len(headers))
data_columns = [[] for c in headers]
for i, row in enumerate(rows):
for j, d in enumerate(row):
try:
data_columns[j].append(row[column_ids[j]].strip())
except IndexError:
# Non-rectangular data is truncated
break
columns = []
for i, c in enumerate(data_columns):
columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types))
return Table(columns, name=name)
示例14: print_column_names
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def print_column_names(f, output, **reader_kwargs):
"""
Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout).
"""
rows = CSVKitReader(f, **reader_kwargs)
column_names = rows.next()
for i, c in enumerate(column_names):
output.write('%3i: %s\n' % (i + 1, c))
示例15: extract_column_names
# 需要导入模块: from csvkit import CSVKitReader [as 别名]
# 或者: from csvkit.CSVKitReader import next [as 别名]
def extract_column_names(path, dialect_parameters, encoding='utf-8'):
with open(path, 'r') as f:
reader = CSVKitReader(f, encoding=encoding, **dialect_parameters)
try:
headers = reader.next()
except UnicodeDecodeError:
raise DataSamplingError(_('This CSV file contains characters that are not %s encoded. You need to input the correct encoding in order to import data from this file.') % encoding)
return headers