当前位置: 首页>>代码示例>>Python>>正文


Python csvkit.CSVKitWriter类代码示例

本文整理汇总了Python中csvkit.CSVKitWriter的典型用法代码示例。如果您正苦于以下问题:Python CSVKitWriter类的具体用法?Python CSVKitWriter怎么用?Python CSVKitWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CSVKitWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if self.input_file.name != '<stdin>':
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.input_file.name)[1])[0]
        else:
            table_name = 'csvsql_table'

        tab = table.Table.from_csv(
            self.input_file,
            name=table_name,
            snifflimit=self.args.snifflimit,
            no_header_row=self.args.no_header_row,
            infer_types=(not self.args.no_inference),
            **self.reader_kwargs
        )
        
        column_ids = parse_column_identifiers(self.args.columns, tab.headers(), self.args.zero_based)

        rows = tab.to_rows(serialize_dates=True) 
        sorter = lambda r: [r[c] if r[c] is not None else '' for c in column_ids]
        rows.sort(key=sorter, reverse=self.args.reverse)
        
        rows.insert(0, tab.headers())

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for row in rows:
            output.writerow(row)
开发者ID:NickolasLapp,项目名称:csvkit,代码行数:32,代码来源:csvsort.py

示例2: handle

 def handle(self, *args, **options):
     self.cursor = connection.cursor()
     sql = """
     SELECT DISTINCT
         o.name,
         o.seat,
         f.filer_id_raw,
         f.xref_filer_id,
         f.name,
         f.party
     FROM %(candidate)s as c
     INNER JOIN %(office)s as o
     ON c.office_id = o.id
     INNER JOIN %(filer)s as f
     ON c.filer_id = f.id
     """ % dict(
         candidate=models.Candidate._meta.db_table,
         office=models.Office._meta.db_table,
         filer=models.Filer._meta.db_table,
     )
     self.cursor.execute(sql)
     writer = CSVKitWriter(open("./candidates.csv", 'wb'))
     writer.writerow([
         'office_name',
         'office_seat',
         'filer_id',
         'xref_filer_id',
         'name',
         'party'
     ])
     writer.writerows(self.cursor.fetchall())
开发者ID:california-civic-data-coalition,项目名称:django-calaccess-campaign-browser,代码行数:31,代码来源:exportcalaccesscampaigncandidates.py

示例3: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        #Read in header and rows
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)
        column_names = reader.next()
        if self.args.columns is None:
            grouped_columns_ids = []
        else:
            grouped_columns_ids = parse_column_identifiers(self.args.columns,
                                                       column_names,
                                                       self.args.zero_based)
        aggregations = []
        try:
            for (fun, cols) in map(lambda (f, cols): (
            f, parse_column_identifiers(cols, column_names, self.args.zero_based)),
                                   self.args.aggregations):
                for col in cols:
                    aggregations.append(aggregate_functions[fun](col))
        except KeyError:
            self.argparser.error("Wrong aggregator function. Available: " + ', '.join(aggregate_functions.keys()))
        #Determine columns to group by, default to all columns


        #Write the output
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        for row in group_rows(column_names, reader, grouped_columns_ids,
                              aggregations):
            output.writerow(row)
开发者ID:dcreado,项目名称:csvkit,代码行数:31,代码来源:csvgroup.py

示例4: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.columns:
            self.argparser.error('You must specify at least one column to search using the -c option.')

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.')

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for row in filter_reader:
            output.writerow(row)
开发者ID:DATAQC,项目名称:csvkit,代码行数:33,代码来源:csvgrep.py

示例5: xlsx2csv

def xlsx2csv(f, output=None, **kwargs):
    """
    Convert an Excel .xlsx file to csv.

    Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
    Blank headers are also possible.
    """
    streaming = True if output else False


    book = load_workbook(f, use_iterators=True, data_only=True)
    sheets = book.get_sheet_names()
    fname = os.path.splitext(f.name)[0];

    for name in sheets:
        outputfname = fname + "_" + name.replace(" ", "") + ".csv"
        of = open(outputfname,'w')
        sheet = book.get_sheet_by_name(name)
        if not streaming:
            output = six.StringIO()
            writer = CSVKitWriter(output)

        for i, row in enumerate(sheet.iter_rows()):
            if i == 0:
                writer.writerow([c.value for c in row]) 
                continue

            out_row = []

            for c in row:
                value = c.value

                if value.__class__ is datetime.datetime:
                    # Handle default XLSX date as 00:00 time 
                    if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
                        value = value.time() 

                        value = normalize_datetime(value)
                    elif value.time() == NULL_TIME:
                        value = value.date()
                    else:
                        value = normalize_datetime(value)
                elif value.__class__ is float:
                    if value % 1 == 0:
                        value = int(value)

                if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
                    value = value.isoformat()

                out_row.append(value)

            writer.writerow(out_row)

        if not streaming:
            data = output.getvalue()
            of.write(data)
            of.close()            

    # Return empty string when streaming
    return ''
开发者ID:srkgupta,项目名称:csvkit,代码行数:60,代码来源:xlsx.py

示例6: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.regex and not self.args.pattern and not self.args.matchfile:
            self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
        
        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern
            
        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for i, row in enumerate(filter_reader):
            output.writerow(row)
开发者ID:mattdudys,项目名称:csvkit,代码行数:30,代码来源:csvgrep.py

示例7: main

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.no_header_row:
            row = next(rows)

            column_names = make_default_headers(len(row))

            # Put the row back on top
            rows = itertools.chain([row], rows)
        else:
            column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for row in rows:
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue

            output.writerow(out_row)
开发者ID:gepuro,项目名称:csvkit,代码行数:30,代码来源:csvcut.py

示例8: main

    def main(self):
        if len(self.args.files) < 2:
            self.argparser.error('You must specify at least two files to stack.')

        if self.args.group_by_filenames:
            groups = [os.path.split(f.name)[1] for f in self.args.files] 
        elif self.args.groups:
            groups = self.args.groups.split(',')

            if len(groups) != len(self.args.files):
                self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
        else:
            groups = None
                
        group_name = self.args.group_name if self.args.group_name else 'group'

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for i, f in enumerate(self.args.files):
            rows = CSVKitReader(f, **self.reader_kwargs)
            headers = rows.next()

            if i == 0:
                if groups:
                    headers.insert(0, group_name)
                
                output.writerow(headers)

            for row in rows:
                if groups:
                    row.insert(0, groups[i])

                output.writerow(row)
开发者ID:binarydud,项目名称:csvkit,代码行数:33,代码来源:csvstack.py

示例9: fixed2csv

def fixed2csv(f, schema, output=None, **kwargs):
    """
    Convert a fixed-width file to csv using a CSV-formatted schema description.

    A schema CSV must start with a header row with (at least) columns labeled "column","start", and "length". (Other columns will be ignored.) For each subsequent row, therefore, those columns will be used to identify a column name, the starting index of the column (an integer), and the length of the column (also an integer).
    
    Values in the 'start' column are assumed to be zero-based, unless the first value for 'start' is 1, in which case all values are assumed to be one-based.

    If output is specified, rows will be written to that object, otherwise the complete data will be returned.
    """
    streaming = True if output else False

    if not streaming:
        output = six.StringIO()

    try:
        encoding = kwargs['encoding']
    except KeyError:
        encoding = None

    writer = CSVKitWriter(output)

    reader = FixedWidthReader(f, schema, encoding=encoding)
    writer.writerows(reader)

    if not streaming:
        data = output.getvalue()
        return data
    
    # Return empty string when streaming
    return ''
开发者ID:DATAQC,项目名称:csvkit,代码行数:31,代码来源:fixed.py

示例10: log_errors

    def log_errors(self, rows):
        """
        Log any errors to a csv file
        """
        # Make sure the log directory exists
        os.path.exists(self.log_dir) or os.makedirs(self.log_dir)

        # Log writer
        log_path = os.path.join(
            self.log_dir,
            self.file_name.lower().replace("tsv", "errors.csv")
        )
        log_file = open(log_path, 'w')
        log_writer = CSVKitWriter(log_file, quoting=csv.QUOTE_ALL)

        # Add the headers
        log_writer.writerow([
            'Line number',
            'Headers len',
            'Fields len',
            'Line value'
        ])

        # Log out the rows
        log_writer.writerows(rows)

        # Shut it down
        log_file.close()
开发者ID:alabarga,项目名称:django-calaccess-raw-data,代码行数:28,代码来源:cleancalaccessrawfile.py

示例11: main

    def main(self):
        try:
            engine, metadata = sql.get_connection(self.args.connection_string)
        except ImportError:
            raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n')

        conn = engine.connect()

        if self.args.query:
            query = self.args.query.strip()
        else:
            query = ""

            for line in self.args.file:
                query += line

        rows = conn.execute(query)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        if not self.args.no_header_row:
            output.writerow(rows._metadata.keys)

        for row in rows:
            output.writerow(row)

        conn.close()
开发者ID:haginara,项目名称:csvkit,代码行数:26,代码来源:sql2csv.py

示例12: geojson2csv

def geojson2csv(f, key=None, **kwargs):
    """
    Convert a GeoJSON document into CSV format.
    """
    js = json.load(f, object_pairs_hook=OrderedDict)

    if not isinstance(js, dict):
        raise TypeError('JSON document is not valid GeoJSON: Root element is not an object.')

    if 'type' not in js:
        raise TypeError('JSON document is not valid GeoJSON: No top-level "type" key.')

    if js['type'] != 'FeatureCollection':
        raise TypeError('Only GeoJSON with root FeatureCollection type is supported. Not %s' % js['type']) 

    if 'features' not in js:
        raise TypeError('JSON document is not a valid FeatureCollection: No top-level "features" key.')

    features = js['features']
    
    features_parsed = []    # tuples in the format (id, properties, geometry)
    property_fields = []

    for feature in features:
        geoid = feature.get('id', None)

        properties = feature.get('properties') or {}

        for prop in properties.keys():
            if prop not in property_fields:
                property_fields.append(prop)

        geometry = json.dumps(feature['geometry'])

        features_parsed.append((geoid, properties, geometry))

    header = ['id']
    header.extend(property_fields)
    header.append('geojson')

    o = six.StringIO()
    writer = CSVKitWriter(o)

    writer.writerow(header)

    for geoid, properties, geometry in features_parsed:
        row = [geoid]

        for field in property_fields:
            row.append(properties.get(field, None))

        row.append(geometry)

        writer.writerow(row)

    output = o.getvalue()
    o.close()

    return output
开发者ID:gepuro,项目名称:csvkit,代码行数:59,代码来源:geojs.py

示例13: xlsx2csv

def xlsx2csv(f, output=None, **kwargs):
    """
    Convert an Excel .xlsx file to csv.

    Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
    Blank headers are also possible.
    """
    streaming = True if output else False

    if not streaming:
        output = StringIO()

    writer = CSVKitWriter(output)

    book = load_workbook(f, use_iterators=True, data_only=True)

    if 'sheet' in kwargs:
        sheet = book.get_sheet_by_name(kwargs['sheet'])
    else:
        sheet = book.get_active_sheet()

    for i, row in enumerate(sheet.iter_rows()):
        if i == 0:
            writer.writerow([c.value for c in row]) 
            continue

        out_row = []

        for c in row:
            value = c.value

            if value.__class__ is datetime.datetime:
                # Handle default XLSX date as 00:00 time 
                if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
                    value = value.time() 

                    value = normalize_datetime(value)
                elif value.time() == NULL_TIME:
                    value = value.date()
                else:
                    value = normalize_datetime(value)
            elif value.__class__ is float:
                if value % 1 == 0:
                    value = int(value)

            if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
                value = value.isoformat()

            out_row.append(value)

        writer.writerow(out_row)

    if not streaming:
        data = output.getvalue()
        return data

    # Return empty string when streaming
    return ''
开发者ID:GMADIGITAL,项目名称:csvkit,代码行数:58,代码来源:xlsx.py

示例14: main

    def main(self):
        reader = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write("Line %i: %s\n" % (e.line_number, e.msg))
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write(
                    "%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins)
                )
        else:
            base, ext = splitext(self.args.file.name)

            with open("%s_out.csv" % base, "w") as f:
                clean_writer = CSVKitWriter(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = "%s_err.csv" % base

                with open(error_filename, "w") as f:
                    error_writer = CSVKitWriter(f, **self.writer_kwargs)

                    error_header = ["line_number", "msg"]
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write(
                    "%i error%s logged to %s\n" % (error_count, "" if error_count == 1 else "s", error_filename)
                )
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write(
                    "%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins)
                )
开发者ID:higs4281,项目名称:csvkit,代码行数:58,代码来源:csvclean.py

示例15: main

 def main(self):
   reader = CSVKitReader(self.args.file, **self.reader_kwargs)
   cnames = reader.next()
   cids   = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
   mods   = {idx: self.args.expr for idx in cids}
   output = CSVKitWriter(self.output_file, **self.writer_kwargs)
   reader = sed.CsvFilter(reader, mods, header=False)
   output.writerow(cnames)
   for row in reader:
     output.writerow(row)
开发者ID:pombredanne,项目名称:csvsed,代码行数:10,代码来源:cli.py


注:本文中的csvkit.CSVKitWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。