本文整理汇总了Python中pybabe.Babe.filterColumns方法的典型用法代码示例。如果您正苦于以下问题:Python Babe.filterColumns方法的具体用法?Python Babe.filterColumns怎么用?Python Babe.filterColumns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybabe.Babe
的用法示例。
在下文中一共展示了Babe.filterColumns方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_twitter
# 需要导入模块: from pybabe import Babe [as 别名]
# 或者: from pybabe.Babe import filterColumns [as 别名]
def test_twitter(self):
a = Babe().pull_twitter()
a = a.filterColumns(keep_fields=
["author_name", "author_id", "author_screen_name", "created_at", "hashtags", "text", "in_reply_to_status_id_str"])
a = a.typedetect()
buf = StringIO()
a.push(stream=buf, format='csv')
示例2: test_filter2
# 需要导入模块: from pybabe import Babe [as 别名]
# 或者: from pybabe.Babe import filterColumns [as 别名]
def test_filter2(self):
a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
a = a.filterColumns(remove_fields=['a'])
self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
示例3: test_gs_load_from_kontagent
# 需要导入模块: from pybabe import Babe [as 别名]
# 或者: from pybabe.Babe import filterColumns [as 别名]
def test_gs_load_from_kontagent(self):
# export 1 full day
bucket = 'bertrandtest'
game = 'wordox'
day = '20151021'
hour = '14'
table_name = '{}_{}'.format(game, day)
filename = '{}.csv'.format(table_name + hour)
result = time.strptime(day + ' ' + hour, '%Y%m%d %H')
start_time = datetime(result.tm_year,
result.tm_mon,
result.tm_mday,
result.tm_hour)
end_time = start_time + timedelta(hours=1)
a = Babe()
a = a.pull_kontagent(start_time=start_time,
sample_mode=False,
end_time=end_time,
KT_APPID='869fb4a24faa4c61b702ea137cbe16ad',
discard_names=["PointSend"])
a = a.mapTo(decode_data, insert_fields=["decoded_data"])
a = a.filterColumns(keep_fields=v1)
a = a.filter(lambda row: uid_type_check(row) is True)
a.push(filename=filename,
format='csv',
delimiter='\t',
quotechar='|',
encoding='utf8',
bucket=bucket,
protocol='gs')
a.push_bigquery(filename=filename,
bucket=bucket,
project_id='bigquery-testing-1098',
dataset_id='ladata',
table_name=table_name,
schema=[
{
"name": "date",
"type": "STRING",
"mode": "REQUIRED"
},
{
"name": "hour",
"type": "INTEGER",
"mode": "REQUIRED"
},
{
"name": "time",
"type": "TIMESTAMP",
"mode": "REQUIRED"
},
{
"name": "name",
"type": "STRING",
"mode": "REQUIRED"
},
{
"name": "uid",
"type": "INTEGER"
},
{
"name": "st1",
"type": "STRING"
},
{
"name": "st2",
"type": "STRING"
},
{
"name": "st3",
"type": "STRING"
},
{
"name": "channel_type",
"type": "STRING"
},
{
"name": "value",
"type": "INTEGER"
},
{
"name": "level",
"type": "INTEGER"
},
{
"name": "recipients",
"type": "STRING"
},
{
"name": "tracking_data",
"type": "STRING"
},
{
"name": "data",
"type": "STRING"
}
],
job_id='{}_{}'.format(start_time, end_time),
#.........这里部分代码省略.........
示例4: test_filter2
# 需要导入模块: from pybabe import Babe [as 别名]
# 或者: from pybabe.Babe import filterColumns [as 别名]
def test_filter2(self):
a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
a = a.filterColumns(remove_fields=['a'])
buf = StringIO()
a.push(stream=buf, format="csv")
self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")