本文整理汇总了Python中pandas.io.data.DataReader.to_csv方法的典型用法代码示例。如果您正苦于以下问题:Python DataReader.to_csv方法的具体用法?Python DataReader.to_csv怎么用?Python DataReader.to_csv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.io.data.DataReader
的用法示例。
在下文中一共展示了DataReader.to_csv方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pull_stocks_data
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def pull_stocks_data(retries=2, start_date=None, end_date=None):
"""
Pulling stocks raw data, of the stocks in the symbol list.
:param retries: number of retries for getting each stock's data
:param start_date: the first day of the data (datetime format), default value is 2 years before end_date.
:param end_date: the last day of the data (datetime format), default value is today
"""
symbols = get_stocks_symbols(write_to_files=False)
log.notice("Starting to pull stocks data")
end_date = datetime.today() if end_date is None else end_date
start_date = end_date - timedelta(365*2) # take as a default 2 years backwards
for retry in range(retries):
for symbol in symbols:
filepath = make_filepath(DATA_PATH+"symbols", symbol, 'csv') # optimize by avoiding calling this function every time
try:
data = DataReader(symbol, 'yahoo', start_date, end_date, retry_count=1)
except IOError as e:
log.error("IOError for data query of symbol: {}\n\tError msg: {}".format(symbol, e))
continue
data.to_csv(filepath)
symbols.pop(symbols.index(symbol))
log.warning("Unable to get {} symbols on try #{}".format(len(symbols), retry+1))
log.error("Unable to get {} symbols after {} retries:\n{}".format(len(symbols), retries, symbols))
示例2: _load_raw_yahoo_data
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
"""Load closing prices from yahoo finance.
:Optional:
indexes : dict (Default: {'SPX': '^GSPC'})
Financial indexes to load.
stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
Stock closing prices to load.
start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices from start date on.
end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices until end date.
:Note:
This is based on code presented in a talk by Wes McKinney:
http://wesmckinney.com/files/20111017/notebook_output.pdf
"""
assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""
if start is None:
start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)
if start is not None and end is not None:
assert start < end, "start date is later than end date."
data = OrderedDict()
if stocks is not None:
for stock in stocks:
print(stock)
stock_pathsafe = stock.replace(os.path.sep, '--')
cache_filename = "{stock}-{start}-{end}.csv".format(
stock=stock_pathsafe,
start=start,
end=end).replace(':', '-')
cache_filepath = get_cache_filepath(cache_filename)
if os.path.exists(cache_filepath):
stkd = pd.DataFrame.from_csv(cache_filepath)
else:
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
stkd.to_csv(cache_filepath)
data[stock] = stkd
if indexes is not None:
for name, ticker in iteritems(indexes):
print(name)
stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
data[name] = stkd
return data
示例3: fetch_timeseries
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def fetch_timeseries(symbol, dir_name='data', use_cache=True):
"""
Read time series data. Use cached version if it exists and
use_cache is True, otherwise retrive, cache, then read.
"""
if not os.path.exists(dir_name):
os.makedirs(dir_name)
timeseries_cache = os.path.join(dir_name, symbol + '.csv')
if os.path.isfile(timeseries_cache) and use_cache:
pass
else:
ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1))
ts.to_csv(timeseries_cache, encoding='utf-8')
ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True)
ts = _adj_column_names(ts)
return ts
示例4: import_data_yahoo_to_files
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def import_data_yahoo_to_files( list_symbols,path,startdate):
list_error=[]
logger.info("importing from "+str(startdate))
for symbol in list_symbols:
try :
prices_df = DataReader(symbol, "yahoo", startdate)
count_newdata = len(prices_df)
print symbol , " ", count_newdata
if(count_newdata <=0):
raise Exception("NO DATA for Dates for %s"%symbol)
prices_df = prices_df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high',
'Low': 'low', 'Close': 'actualclose', 'Adj Close': 'close',
'Volume': 'volume', 'Symbol': 'symbol'})
prices_df['symbol'] = symbol
prices_df['symbol'] = prices_df.apply(lambda x: x['symbol'].replace('\r','').upper(), axis=1 )
prices_df.to_csv(path + "/" + symbol + '.csv')
except Exception as ex:
logger.error(ex)
list_error.append(symbol)
logger.error(traceback.format_exc())
示例5: DataReader
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
#http://pandas.pydata.org/pandas-docs/dev/io.html
import datetime as dt
import pandas as pd
from pandas.io.data import DataReader
import numpy as np
data = DataReader('GooG', "yahoo", '2013-02-02','2013-03-02')
print data
data.to_csv("C:\Users\oskar\Documents\doc_no_backup\python_crap\excel\deol.csv")
lastline = raw_input(">")
示例6: LabeledPoint
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
lst_open = df.Open.tolist()
lst_close = df.Close.tolist()
lst_volume = df.Volume.tolist()
lst_f1 = np.array(lst_open[1:]) - np.array(lst_close[:-1])
lst_f2 = np.array(lst_volume[1:]) - np.array(lst_volume[:-1])
# remove first row
df = df[1:]
df['f1'] = lst_f1
df['f2'] = lst_f2
# save data
file_path = 'ibm.csv'
df.to_csv(file_path)
# create rdd from text file
ibm_rdd = sc.textFile(file_path)
ibm_rdd.take(5)
# remove header
header = ibm_rdd.first()
ibm_data_rdd = ibm_rdd.filter(lambda x: x != header) \
.map(lambda x: x.split(',')) \
.map(lambda x: LabeledPoint(x[7],[x[8],x[9]]))
ibm_data_rdd.take(5)
# train and test model for 10 times
示例7: p03_AAPL
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def p03_AAPL(tomtom):
DAX = DataReader('AAPL','yahoo',start = '01/01/2000')
DAX.to_csv(tomtom.get_tmp_name('p03_AAPL.csv'))
示例8: p03_DAX
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def p03_DAX(tomtom):
DAX = DataReader('^GDAXI','yahoo',start = '01/01/2000')
DAX.to_csv(tomtom.get_tmp_name('p03_DAX.csv'))
示例9: DataReader
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
#http://pandas.pydata.org/pandas-docs/dev/io.html
import datetime as dt
import pandas as pd
from pandas.io.data import DataReader
import numpy as np
data = DataReader('GooG', "yahoo", '2013-02-02','2013-03-02')
print data
data.to_csv("C:\Users\oo\Documents\python_none_pythonfiles\excel\pp.csv")
示例10: _load_raw_yahoo_data
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
"""Load closing prices from yahoo finance.
:Optional:
indexes : dict (Default: {'SPX': '^GSPC'})
Financial indexes to load.
stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
Stock closing prices to load.
start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices from start date on.
end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices until end date.
:Note:
This is based on code presented in a talk by Wes McKinney:
http://wesmckinney.com/files/20111017/notebook_output.pdf
"""
assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""
if start is None:
start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)
if not start is None and not end is None:
assert start < end, "start date is later than end date."
data = OrderedDict()
if stocks is not None:
for stock in stocks:
print(stock)
cache_filename = "{stock}-{start}-{end}.csv".format(
stock=stock,
start=start,
end=end).replace(':', '-')
cache_filepath = get_cache_filepath(cache_filename)
if os.path.exists(cache_filepath):
stkd = pd.DataFrame.from_csv(cache_filepath)
else:
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
stkd.to_csv(cache_filepath)
data[stock] = stkd
if indexes is not None:
for name, ticker in iteritems(indexes):
print(name)
stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
data[name] = stkd
# Find common dates
base = set(data[data.keys()[0]]['Close'].keys())
to_del = []
for k in data.keys()[1:]:
here = set(data[k]['Close'].keys())
if abs(len(base) - len(here)) > 0:
print 'Date mismatch for', k
to_del.append(k)
else:
base = base & here
for k in to_del:
del data[k]
print len(data), 'safe symbols loaded of', len(stocks)
return data
示例11:
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
# Downloading data Task 3
import pandas as pd
from pandas.io.data import DataReader as DR
from datetime import datetime as dt
start=dt(2011,1,1)
end=dt(2015,6,1)
data=DR("4162.KL",'yahoo',start,end)
#I've saved data into a csv file to ease extraction of data via pandas
data.to_csv('data.ohlc.scv')
# I would like to read only specific columns of the data set at one time in order to calculate the moving averages for that particular column
df=pd.read_csv('data.ohlc.scv',index_col='Date',parse_dates=True)
# I want to simulate the moving averages of "Open", "High", "Low", "Close"
# I'll be using the pandas library for simplification
df['5OPEN']=pd.rolling_mean(df['Open'],5)
df['5HIGH']=pd.rolling_mean(df['High'],5)
df['5LOW']=pd.rolling_mean(df['Low'],5)
df['5CLOSE']=pd.rolling_mean(df['Close'],5)
#To plot the moving averages
#Again using pandas
import matplotlib.pyplot as plt
df[['5OPEN','5HIGH','5LOW','5CLOSE']].plot();
plt.title('British American Tobacco Malaysia Bhd Moving Average Stock Prices')
plt.show()
#downloading data on FTSEKLCI
dataKLCI=DR("^KLSE", 'yahoo', start, end)
dataKLCI.to_csv('dataKLCI.ohlc.scv')
示例12: retrieve_stock
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
def retrieve_stock(symbol,from_date,to_date):
'''retrieve stock's prices'''
import pandas
from pandas.io.data import DataReader
from math import ceil
#try read it from local file
csv_path = get_csv_path(symbol)
if isfile(csv_path):
with local_file_pool:
data = pandas.read_csv(csv_path)
if not data.empty:
if data.columns[0] != 'Date':
data.rename(columns ={data.columns[0]:'Date'},inplace = True)
data.set_index('Date',inplace = True)
data.index = pandas.to_datetime(data.index)
if data.index.min() <= from_date and to_date <= data.index.max():
return data[data.Volume != 0]
else:
#request more data ( not incremental)
from_date = min(data.index.min(),from_date)
to_date = max(data.index.max(),to_date)
#Fetch from yahoo finance or google finance
with network_pool:
yahoo_sym = convert_to_yahoo_symbol(symbol).lower()
try:
prices = DataReader(yahoo_sym,'yahoo',start = from_date,end = to_date)
prices = prices[prices.Volume != 0]
except:
# Cannot fetch from yahoo finance
prices = pandas.DataFrame()
for page in range(0,ceil((to_date-from_date).days / 200)):
url = google_historical_price_page.format(symbol.replace(':','%3A'),from_date.strftime('%b+%d%%2C+%Y'),to_date.strftime('%b+%d%%2C+%Y'),page * 200)
raw = pandas.read_html(url)
if len(raw) <= 2:
break
else:
data = raw[-1]
if len(data.keys()) >= 6:
data.rename(columns = { 0:'Date',1:'Open' ,2: 'High', 3: 'Low',4: 'Close',5:'Volume'},inplace = True)
data = data[data.Volume !='0']
data = data[data.Volume !='-']
else:
data.rename(columns = { 0:'Date',1:'Close'},inplace = True)
data.set_index('Date',inplace = True)
data = data.ix[1:]
data.index = pandas.to_datetime(data.index)
prices = prices.append(data.astype('float'))
if to_date.date() == datetime.today().date():
import urllib.request
from bs4 import BeautifulSoup
try:
html = urllib.request.urlopen(yahoo_daliy_price_page.format(yahoo_sym)).read()
soup = BeautifulSoup(html)
if soup.title.string.lower().startswith(yahoo_sym):
span = soup.find(attrs = {'class':'time_rtq_ticker'})
if span != None:
close = float(span.contents[0].string.replace(',',''))
if len(prices.columns) == 6:
open,high,low,vol = 0,0,0,0
for elem in soup.find_all(attrs = {'class':'yfnc_tabledata1'}):
if elem.previous.startswith('Open'):
open = float(elem.contents[0].string.replace(',',''))
elif elem.previous.startswith('Day'): #Day's Range
low = float(elem.contents[0].string.replace(',',''))
high = float(elem.contents[2].string.replace(',',''))
elif elem.previous.startswith('Volume'):
vol = float(elem.contents[0].string.replace(',',''))
prices.ix[to_date] = open,high,low,close,vol,0
else:
prices.ix[to_date] = close
except:
import traceback
traceback.print_exc()
pass
#save to local
prices.to_csv(csv_path,index_label = 'Date')
return prices
示例13: get_all_stock_symbols
# 需要导入模块: from pandas.io.data import DataReader [as 别名]
# 或者: from pandas.io.data.DataReader import to_csv [as 别名]
from pandas.io.data import DataReader
from datetime import datetime
import pandas as pd
def get_all_stock_symbols(symbol_file):
symbols = pd.read_csv(symbol_file, sep='|', usecols=[0])
return symbols['Symbol'].values
if __name__ == '__main__':
symbol_file = '../../data/stock_symbols.txt'
stock_data_dir = '../../data/stock_data/'
symbols = get_all_stock_symbols(symbol_file)
for sym in symbols:
try:
historical_data = DataReader(sym, 'yahoo', datetime(2010, 1, 1),
datetime(2015, 1, 1))
stk_filename = '{0}.csv'.format(sym)
stk_filepath = stock_data_dir + stk_filename
historical_data.to_csv(stk_filepath)
except:
pass