本文整理汇总了Python中csvkit.CSVKitWriter类的典型用法代码示例。如果您正苦于以下问题:Python CSVKitWriter类的具体用法?Python CSVKitWriter怎么用?Python CSVKitWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CSVKitWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(self):
if self.args.names_only:
self.print_column_names()
return
if self.input_file.name != '<stdin>':
# Use filename as table name
table_name = os.path.splitext(os.path.split(self.input_file.name)[1])[0]
else:
table_name = 'csvsql_table'
tab = table.Table.from_csv(
self.input_file,
name=table_name,
snifflimit=self.args.snifflimit,
no_header_row=self.args.no_header_row,
infer_types=(not self.args.no_inference),
**self.reader_kwargs
)
column_ids = parse_column_identifiers(self.args.columns, tab.headers(), self.args.zero_based)
rows = tab.to_rows(serialize_dates=True)
sorter = lambda r: [r[c] if r[c] is not None else '' for c in column_ids]
rows.sort(key=sorter, reverse=self.args.reverse)
rows.insert(0, tab.headers())
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for row in rows:
output.writerow(row)
示例2: handle
def handle(self, *args, **options):
self.cursor = connection.cursor()
sql = """
SELECT DISTINCT
o.name,
o.seat,
f.filer_id_raw,
f.xref_filer_id,
f.name,
f.party
FROM %(candidate)s as c
INNER JOIN %(office)s as o
ON c.office_id = o.id
INNER JOIN %(filer)s as f
ON c.filer_id = f.id
""" % dict(
candidate=models.Candidate._meta.db_table,
office=models.Office._meta.db_table,
filer=models.Filer._meta.db_table,
)
self.cursor.execute(sql)
writer = CSVKitWriter(open("./candidates.csv", 'wb'))
writer.writerow([
'office_name',
'office_seat',
'filer_id',
'xref_filer_id',
'name',
'party'
])
writer.writerows(self.cursor.fetchall())
开发者ID:california-civic-data-coalition,项目名称:django-calaccess-campaign-browser,代码行数:31,代码来源:exportcalaccesscampaigncandidates.py
示例3: main
def main(self):
if self.args.names_only:
self.print_column_names()
return
#Read in header and rows
reader = CSVKitReader(self.input_file, **self.reader_kwargs)
column_names = reader.next()
if self.args.columns is None:
grouped_columns_ids = []
else:
grouped_columns_ids = parse_column_identifiers(self.args.columns,
column_names,
self.args.zero_based)
aggregations = []
try:
for (fun, cols) in map(lambda (f, cols): (
f, parse_column_identifiers(cols, column_names, self.args.zero_based)),
self.args.aggregations):
for col in cols:
aggregations.append(aggregate_functions[fun](col))
except KeyError:
self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys()))
#Determine columns to group by, default to all columns
#Write the output
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for row in group_rows(column_names, reader, grouped_columns_ids,
aggregations):
output.writerow(row)
示例4: main
def main(self):
if self.args.names_only:
self.print_column_names()
return
if not self.args.columns:
self.argparser.error('You must specify at least one column to search using the -c option.')
if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.')
rows = CSVKitReader(self.input_file, **self.reader_kwargs)
column_names = next(rows)
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
if self.args.regex:
pattern = re.compile(self.args.regex)
elif self.args.matchfile:
lines = set(line.rstrip() for line in self.args.matchfile)
pattern = lambda x: x in lines
else:
pattern = self.args.pattern
patterns = dict((c, pattern) for c in column_ids)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow(column_names)
filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)
for row in filter_reader:
output.writerow(row)
示例5: xlsx2csv
def xlsx2csv(f, output=None, **kwargs):
"""
Convert an Excel .xlsx file to csv.
Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
Blank headers are also possible.
"""
streaming = True if output else False
book = load_workbook(f, use_iterators=True, data_only=True)
sheets = book.get_sheet_names()
fname = os.path.splitext(f.name)[0];
for name in sheets:
outputfname = fname + "_" + name.replace(" ", "") + ".csv"
of = open(outputfname,'w')
sheet = book.get_sheet_by_name(name)
if not streaming:
output = six.StringIO()
writer = CSVKitWriter(output)
for i, row in enumerate(sheet.iter_rows()):
if i == 0:
writer.writerow([c.value for c in row])
continue
out_row = []
for c in row:
value = c.value
if value.__class__ is datetime.datetime:
# Handle default XLSX date as 00:00 time
if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
value = value.time()
value = normalize_datetime(value)
elif value.time() == NULL_TIME:
value = value.date()
else:
value = normalize_datetime(value)
elif value.__class__ is float:
if value % 1 == 0:
value = int(value)
if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
value = value.isoformat()
out_row.append(value)
writer.writerow(out_row)
if not streaming:
data = output.getvalue()
of.write(data)
of.close()
# Return empty string when streaming
return ''
示例6: main
def main(self):
if self.args.names_only:
self.print_column_names()
return
if not self.args.regex and not self.args.pattern and not self.args.matchfile:
self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
column_names = rows.next()
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
if self.args.regex:
pattern = re.compile(self.args.regex)
elif self.args.matchfile:
lines = set(line.rstrip() for line in self.args.matchfile)
pattern = lambda x: x in lines
else:
pattern = self.args.pattern
patterns = dict((c, pattern) for c in column_ids)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow(column_names)
filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)
for i, row in enumerate(filter_reader):
output.writerow(row)
示例7: main
def main(self):
if self.args.names_only:
self.print_column_names()
return
rows = CSVKitReader(self.input_file, **self.reader_kwargs)
if self.args.no_header_row:
row = next(rows)
column_names = make_default_headers(len(row))
# Put the row back on top
rows = itertools.chain([row], rows)
else:
column_names = next(rows)
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow([column_names[c] for c in column_ids])
for row in rows:
out_row = [row[c] if c < len(row) else None for c in column_ids]
if self.args.delete_empty:
if ''.join(out_row) == '':
continue
output.writerow(out_row)
示例8: main
def main(self):
if len(self.args.files) < 2:
self.argparser.error('You must specify at least two files to stack.')
if self.args.group_by_filenames:
groups = [os.path.split(f.name)[1] for f in self.args.files]
elif self.args.groups:
groups = self.args.groups.split(',')
if len(groups) != len(self.args.files):
self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
else:
groups = None
group_name = self.args.group_name if self.args.group_name else 'group'
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for i, f in enumerate(self.args.files):
rows = CSVKitReader(f, **self.reader_kwargs)
headers = rows.next()
if i == 0:
if groups:
headers.insert(0, group_name)
output.writerow(headers)
for row in rows:
if groups:
row.insert(0, groups[i])
output.writerow(row)
示例9: fixed2csv
def fixed2csv(f, schema, output=None, **kwargs):
"""
Convert a fixed-width file to csv using a CSV-formatted schema description.
A schema CSV must start with a header row with (at least) columns labeled "column","start", and "length". (Other columns will be ignored.) For each subsequent row, therefore, those columns will be used to identify a column name, the starting index of the column (an integer), and the length of the column (also an integer).
Values in the 'start' column are assumed to be zero-based, unless the first value for 'start' is 1, in which case all values are assumed to be one-based.
If output is specified, rows will be written to that object, otherwise the complete data will be returned.
"""
streaming = True if output else False
if not streaming:
output = six.StringIO()
try:
encoding = kwargs['encoding']
except KeyError:
encoding = None
writer = CSVKitWriter(output)
reader = FixedWidthReader(f, schema, encoding=encoding)
writer.writerows(reader)
if not streaming:
data = output.getvalue()
return data
# Return empty string when streaming
return ''
示例10: log_errors
def log_errors(self, rows):
"""
Log any errors to a csv file
"""
# Make sure the log directory exists
os.path.exists(self.log_dir) or os.makedirs(self.log_dir)
# Log writer
log_path = os.path.join(
self.log_dir,
self.file_name.lower().replace("tsv", "errors.csv")
)
log_file = open(log_path, 'w')
log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL)
# Add the headers
log_writer.writerow([
'Line number',
'Headers len',
'Fields len',
'Line value'
])
# Log out the rows
log_writer.writerows(rows)
# Shut it down
log_file.close()
示例11: main
def main(self):
try:
engine, metadata = sql.get_connection(self.args.connection_string)
except ImportError:
raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')
conn = engine.connect()
if self.args.query:
query = self.args.query.strip()
else:
query = ""
for line in self.args.file:
query += line
rows = conn.execute(query)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
if not self.args.no_header_row:
output.writerow(rows._metadata.keys)
for row in rows:
output.writerow(row)
conn.close()
示例12: geojson2csv
def geojson2csv(f, key=None, **kwargs):
"""
Convert a GeoJSON document into CSV format.
"""
js = json.load(f, object_pairs_hook=OrderedDict)
if not isinstance(js, dict):
raise TypeError('JSON document is not valid GeoJSON: Root element is not an object.')
if 'type' not in js:
raise TypeError('JSON document is not valid GeoJSON: No top-level "type" key.')
if js['type'] != 'FeatureCollection':
raise TypeError('Only GeoJSON with root FeatureCollection type is supported. Not %s' % js['type'])
if 'features' not in js:
raise TypeError('JSON document is not a valid FeatureCollection: No top-level "features" key.')
features = js['features']
features_parsed = [] # tuples in the format (id, properties, geometry)
property_fields = []
for feature in features:
geoid = feature.get('id', None)
properties = feature.get('properties') or {}
for prop in properties.keys():
if prop not in property_fields:
property_fields.append(prop)
geometry = json.dumps(feature['geometry'])
features_parsed.append((geoid, properties, geometry))
header = ['id']
header.extend(property_fields)
header.append('geojson')
o = six.StringIO()
writer = CSVKitWriter(o)
writer.writerow(header)
for geoid, properties, geometry in features_parsed:
row = [geoid]
for field in property_fields:
row.append(properties.get(field, None))
row.append(geometry)
writer.writerow(row)
output = o.getvalue()
o.close()
return output
示例13: xlsx2csv
def xlsx2csv(f, output=None, **kwargs):
"""
Convert an Excel .xlsx file to csv.
Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
Blank headers are also possible.
"""
streaming = True if output else False
if not streaming:
output = StringIO()
writer = CSVKitWriter(output)
book = load_workbook(f, use_iterators=True, data_only=True)
if 'sheet' in kwargs:
sheet = book.get_sheet_by_name(kwargs['sheet'])
else:
sheet = book.get_active_sheet()
for i, row in enumerate(sheet.iter_rows()):
if i == 0:
writer.writerow([c.value for c in row])
continue
out_row = []
for c in row:
value = c.value
if value.__class__ is datetime.datetime:
# Handle default XLSX date as 00:00 time
if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
value = value.time()
value = normalize_datetime(value)
elif value.time() == NULL_TIME:
value = value.date()
else:
value = normalize_datetime(value)
elif value.__class__ is float:
if value % 1 == 0:
value = int(value)
if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
value = value.isoformat()
out_row.append(value)
writer.writerow(out_row)
if not streaming:
data = output.getvalue()
return data
# Return empty string when streaming
return ''
示例14: main
def main(self):
reader = CSVKitReader(self.args.file, **self.reader_kwargs)
if self.args.dryrun:
checker = RowChecker(reader)
for row in checker.checked_rows():
pass
if checker.errors:
for e in checker.errors:
self.output_file.write("Line %i: %s\n" % (e.line_number, e.msg))
else:
self.output_file.write("No errors.\n")
if checker.joins:
self.output_file.write(
"%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n"
% (checker.rows_joined, checker.joins)
)
else:
base, ext = splitext(self.args.file.name)
with open("%s_out.csv" % base, "w") as f:
clean_writer = CSVKitWriter(f, **self.writer_kwargs)
checker = RowChecker(reader)
clean_writer.writerow(checker.column_names)
for row in checker.checked_rows():
clean_writer.writerow(row)
if checker.errors:
error_filename = "%s_err.csv" % base
with open(error_filename, "w") as f:
error_writer = CSVKitWriter(f, **self.writer_kwargs)
error_header = ["line_number", "msg"]
error_header.extend(checker.column_names)
error_writer.writerow(error_header)
error_count = len(checker.errors)
for e in checker.errors:
error_writer.writerow(self._format_error_row(e))
self.output_file.write(
"%i error%s logged to %s\n" % (error_count, "" if error_count == 1 else "s", error_filename)
)
else:
self.output_file.write("No errors.\n")
if checker.joins:
self.output_file.write(
"%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n"
% (checker.rows_joined, checker.joins)
)
示例15: main
def main(self):
reader = CSVKitReader(self.args.file, **self.reader_kwargs)
cnames = reader.next()
cids = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
mods = {idx: self.args.expr for idx in cids}
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
reader = sed.CsvFilter(reader, mods, header=False)
output.writerow(cnames)
for row in reader:
output.writerow(row)