本文整理汇总了Python中pandas.io.common.urlopen方法的典型用法代码示例。如果您正苦于以下问题:Python common.urlopen方法的具体用法?Python common.urlopen怎么用?Python common.urlopen使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.io.common
的用法示例。
在下文中一共展示了common.urlopen方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: can_connect
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def can_connect(url, error_classes=_network_error_classes):
"""Try to connect to the given url. True if succeeds, False if IOError
raised
Parameters
----------
url : basestring
The URL to try to connect to
Returns
-------
connectable : bool
Return True if no IOError (unable to connect) or URLError (bad url) was
raised
"""
try:
with urlopen(url):
pass
except error_classes:
return False
else:
return True
示例2: _get_data
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US',
start=2002, end=2005):
# Build URL for api call
url = ("http://api.worldbank.org/countries/" + country + "/indicators/" +
indicator + "?date=" + str(start) + ":" + str(end) +
"&per_page=25000&format=json")
# Download
with urlopen(url) as response:
data = response.read()
# Parse JSON file
data = json.loads(data)[1]
country = [x['country']['value'] for x in data]
iso2c = [x['country']['id'] for x in data]
year = [x['date'] for x in data]
value = [x['value'] for x in data]
# Prepare output
out = pandas.DataFrame([country, iso2c, year, value]).T
return out
示例3: _read
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _read(io):
"""Try to read from a url, file or string.
Parameters
----------
io : str, unicode, or file-like
Returns
-------
raw_text : str
"""
if _is_url(io):
with urlopen(io) as url:
raw_text = url.read()
elif hasattr(io, 'read'):
raw_text = io.read()
elif os.path.isfile(io):
with open(io) as f:
raw_text = f.read()
elif isinstance(io, string_types):
raw_text = io
else:
raise TypeError("Cannot read object of type %r" % type(io).__name__)
return raw_text
示例4: _retry_read_url
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _retry_read_url(url, retry_count, pause, name):
for _ in range(retry_count):
time.sleep(pause)
# kludge to close the socket ASAP
try:
with urlopen(url) as resp:
lines = resp.read()
except _network_error_classes:
pass
else:
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
parse_dates=True)[::-1]
# Yahoo! Finance sometimes does this awesome thing where they
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
rs = rs[:-1]
return rs
raise IOError("after %d tries, %s did not "
"return a 200 for url %r" % (retry_count, name, url))
示例5: get_dividends_yahoo
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_dividends_yahoo(sid, start, end):
# Taken from get_data_yahoo in Pandas library and adjust a single parameter to get dividends
from pandas.compat import StringIO, bytes_to_str
from pandas.io.common import urlopen
start, end = pd.to_datetime(start), pd.to_datetime(end)
url = ('http://ichart.finance.yahoo.com/table.csv?' + 's=%s' % sid +
'&a=%s' % (start.month - 1) +
'&b=%s' % start.day +
'&c=%s' % start.year +
'&d=%s' % (end.month - 1) +
'&e=%s' % end.day +
'&f=%s' % end.year +
'&g=v' + # THE CHANGE
'&ignore=.csv')
with urlopen(url) as resp:
lines = resp.read()
rs = pd.read_csv(StringIO(bytes_to_str(lines)), index_col=0,
parse_dates=True, na_values='-')[::-1]
# Yahoo! Finance sometimes does this awesome thing where they
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
rs = rs[:-1]
return rs
示例6: _read
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _read(obj):
"""Try to read from a url, file or string.
Parameters
----------
obj : str, unicode, or file-like
Returns
-------
raw_text : str
"""
if _is_url(obj):
with urlopen(obj) as url:
text = url.read()
elif hasattr(obj, 'read'):
text = obj.read()
elif isinstance(obj, char_types):
text = obj
try:
if os.path.isfile(text):
with open(text, 'rb') as f:
return f.read()
except (TypeError, ValueError):
pass
else:
raise TypeError("Cannot read object of type %r" % type(obj).__name__)
return text
示例7: get_countries
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_countries():
'''Query information about countries
'''
url = 'http://api.worldbank.org/countries/?per_page=1000&format=json'
with urlopen(url) as response:
data = response.read()
data = json.loads(data)[1]
data = pandas.DataFrame(data)
data.adminregion = [x['value'] for x in data.adminregion]
data.incomeLevel = [x['value'] for x in data.incomeLevel]
data.lendingType = [x['value'] for x in data.lendingType]
data.region = [x['value'] for x in data.region]
data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
return data
示例8: get_indicators
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_indicators():
'''Download information about all World Bank data series
'''
url = 'http://api.worldbank.org/indicators?per_page=50000&format=json'
with urlopen(url) as response:
data = response.read()
data = json.loads(data)[1]
data = pandas.DataFrame(data)
# Clean fields
data.source = [x['value'] for x in data.source]
fun = lambda x: x.encode('ascii', 'ignore')
data.sourceOrganization = data.sourceOrganization.apply(fun)
# Clean topic field
def get_value(x):
try:
return x['value']
except:
return ''
fun = lambda x: [get_value(y) for y in x]
data.topics = data.topics.apply(fun)
data.topics = data.topics.apply(lambda x: ' ; '.join(x))
# Clean outpu
data = data.sort(columns='id')
data.index = pandas.Index(lrange(data.shape[0]))
return data
示例9: get_data_fred
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_data_fred(name, start=dt.datetime(2010, 1, 1),
end=dt.datetime.today()):
"""
Get data for the given name from the St. Louis FED (FRED).
Date format is datetime
Returns a DataFrame.
If multiple names are passed for "series" then the index of the
DataFrame is the outer join of the indicies of each series.
"""
start, end = _sanitize_dates(start, end)
if not is_list_like(name):
names = [name]
else:
names = name
urls = [_FRED_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for
n in names]
def fetch_data(url, name):
with urlopen(url) as resp:
data = read_csv(resp, index_col=0, parse_dates=True,
header=None, skiprows=1, names=["DATE", name],
na_values='.')
try:
return data.truncate(start, end)
except KeyError:
if data.ix[3].name[7:12] == 'Error':
raise IOError("Failed to get the data. Check that {0!r} is "
"a valid FRED series.".format(name))
raise
df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
axis=1, join='outer')
return df
示例10: get_data_famafrench
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_data_famafrench(name):
# path of zip files
zip_file_path = '{0}/{1}.zip'.format(_FAMAFRENCH_URL, name)
with urlopen(zip_file_path) as url:
raw = url.read()
with tempfile.TemporaryFile() as tmpf:
tmpf.write(raw)
with ZipFile(tmpf, 'r') as zf:
data = zf.open(name + '.txt').readlines()
line_lengths = np.array(lmap(len, data))
file_edges = np.where(line_lengths == 2)[0]
datasets = {}
edges = zip(file_edges + 1, file_edges[1:])
for i, (left_edge, right_edge) in enumerate(edges):
dataset = [d.split() for d in data[left_edge:right_edge]]
if len(dataset) > 10:
ncol_raw = np.array(lmap(len, dataset))
ncol = np.median(ncol_raw)
header_index = np.where(ncol_raw == ncol - 1)[0][-1]
header = dataset[header_index]
ds_header = dataset[header_index + 1:]
# to ensure the header is unique
header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
start=1)]
index = np.array([d[0] for d in ds_header], dtype=int)
dataset = np.array([d[1:] for d in ds_header], dtype=float)
datasets[i] = DataFrame(dataset, index, columns=header)
return datasets
# Items needed for options class
示例11: get_elements_from_file
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_elements_from_file(url, element='table'):
_skip_if_none_of(('bs4', 'html5lib'))
url = file_path_to_url(url)
from bs4 import BeautifulSoup
with urlopen(url) as f:
soup = BeautifulSoup(f, features='html5lib')
return soup.find_all(element)
示例12: _build_doc
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _build_doc(self):
"""
Raises
------
ValueError
* If a URL that lxml cannot parse is passed.
Exception
* Any other ``Exception`` thrown. For example, trying to parse a
URL that is syntactically correct on a machine with no internet
connection will fail.
See Also
--------
pandas.io.html._HtmlFrameParser._build_doc
"""
from lxml.html import parse, fromstring, HTMLParser
from lxml.etree import XMLSyntaxError
parser = HTMLParser(recover=True, encoding=self.encoding)
try:
if _is_url(self.io):
with urlopen(self.io) as f:
r = parse(f, parser=parser)
else:
# try to parse the input in the simplest way
r = parse(self.io, parser=parser)
try:
r = r.getroot()
except AttributeError:
pass
except (UnicodeDecodeError, IOError) as e:
# if the input is a blob of html goop
if not _is_url(self.io):
r = fromstring(self.io, parser=parser)
try:
r = r.getroot()
except AttributeError:
pass
else:
raise e
else:
if not hasattr(r, 'text_content'):
raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
return r
示例13: get_components_yahoo
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_components_yahoo(idx_sym):
"""
Returns DataFrame containing list of component information for
index represented in idx_sym from yahoo. Includes component symbol
(ticker), exchange, and name.
Parameters
----------
idx_sym : str
Stock index symbol
Examples:
'^DJI' (Dow Jones Industrial Average)
'^NYA' (NYSE Composite)
'^IXIC' (NASDAQ Composite)
See: http://finance.yahoo.com/indices for other index symbols
Returns
-------
idx_df : DataFrame
"""
stats = 'snx'
# URL of form:
# http://download.finance.yahoo.com/d/quotes.csv?s=@%5EIXIC&f=snxl1d1t1c1ohgv
url = _YAHOO_COMPONENTS_URL + 's={0}&f={1}&e=.csv&h={2}'
idx_mod = idx_sym.replace('^', '@%5E')
url_str = url.format(idx_mod, stats, 1)
idx_df = DataFrame()
mask = [True]
comp_idx = 1
# LOOP across component index structure,
# break when no new components are found
while True in mask:
url_str = url.format(idx_mod, stats, comp_idx)
with urlopen(url_str) as resp:
raw = resp.read()
lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"')
lines = [line.strip().split('","') for line in lines]
temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange'])
temp_df = temp_df.drop_duplicates()
temp_df = temp_df.set_index('ticker')
mask = ~temp_df.index.isin(idx_df.index)
comp_idx = comp_idx + 50
idx_df = idx_df.append(temp_df[mask])
return idx_df
示例14: get_latest_news
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_latest_news(top=None, show_content=False):
"""
获取即时财经新闻
Parameters
--------
top:数值,显示最新消息的条数,默认为80条
show_content:是否显示新闻内容,默认False
Return
--------
DataFrame
classify :新闻类别
title :新闻标题
time :发布时间
url :新闻链接
content:新闻内容(在show_content为True的情况下出现)
"""
top = ct.PAGE_NUM[2] if top is None else top
try:
request = Request(nv.LATEST_URL % (ct.P_TYPE['http'], ct.DOMAINS['sina'],
ct.PAGES['lnews'], top,
_random()))
data_str = urlopen(request, timeout=10).read()
data_str = data_str.decode('GBK')
data_str = data_str.split('=')[1][:-1]
data_str = eval(data_str, type('Dummy', (dict,),
dict(__getitem__ = lambda s, n:n))())
data_str = json.dumps(data_str)
data_str = json.loads(data_str)
data_str = data_str['list']
data = []
for r in data_str:
rt = datetime.fromtimestamp(r['time'])
rtstr = datetime.strftime(rt, "%m-%d %H:%M")
arow = [r['channel']['title'], r['title'], rtstr, r['url']]
if show_content:
arow.append(latest_content(r['url']))
data.append(arow)
df = pd.DataFrame(data, columns=nv.LATEST_COLS_C if show_content else nv.LATEST_COLS)
return df
except Exception as er:
print(str(er))
示例15: guba_sina
# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def guba_sina(show_content=False):
"""
获取sina财经股吧首页的重点消息
Parameter
--------
show_content:是否显示内容,默认False
Return
--------
DataFrame
title, 消息标题
content, 消息内容(show_content=True的情况下)
ptime, 发布时间
rcounts,阅读次数
"""
from pandas.io.common import urlopen
try:
with urlopen(nv.GUBA_SINA_URL%(ct.P_TYPE['http'],
ct.DOMAINS['sina'])) as resp:
lines = resp.read()
html = lxml.html.document_fromstring(lines)
res = html.xpath('//ul[@class=\"list_05\"]/li[not (@class)]')
heads = html.xpath('//div[@class=\"tit_04\"]')
data = []
for head in heads:
title = head.xpath('a/text()')[0]
url = head.xpath('a/@href')[0]
ds = [title]
ds.extend(_guba_content(url))
data.append(ds)
for row in res:
title = row.xpath('a[2]/text()')[0]
url = row.xpath('a[2]/@href')[0]
ds = [title]
ds.extend(_guba_content(url))
data.append(ds)
df = pd.DataFrame(data, columns=nv.GUBA_SINA_COLS)
df['rcounts'] = df['rcounts'].astype(float)
return df if show_content is True else df.drop('content', axis=1)
except Exception as er:
print(str(er))