当前位置: 首页>>代码示例>>Python>>正文


Python common.urlopen方法代码示例

本文整理汇总了Python中pandas.io.common.urlopen方法的典型用法代码示例。如果您正苦于以下问题:Python common.urlopen方法的具体用法?Python common.urlopen怎么用?Python common.urlopen使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.io.common的用法示例。


在下文中一共展示了common.urlopen方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: can_connect

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def can_connect(url, error_classes=_network_error_classes):
    """Try to connect to the given url. True if succeeds, False if IOError
    raised

    Parameters
    ----------
    url : basestring
        The URL to try to connect to

    Returns
    -------
    connectable : bool
        Return True if no IOError (unable to connect) or URLError (bad url) was
        raised
    """
    try:
        with urlopen(url):
            pass
    except error_classes:
        return False
    else:
        return True 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:24,代码来源:testing.py

示例2: _get_data

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US',
              start=2002, end=2005):
    # Build URL for api call
    url = ("http://api.worldbank.org/countries/" + country + "/indicators/" +
           indicator + "?date=" + str(start) + ":" + str(end) +
           "&per_page=25000&format=json")
    # Download
    with urlopen(url) as response:
        data = response.read()
    # Parse JSON file
    data = json.loads(data)[1]
    country = [x['country']['value'] for x in data]
    iso2c = [x['country']['id'] for x in data]
    year = [x['date'] for x in data]
    value = [x['value'] for x in data]
    # Prepare output
    out = pandas.DataFrame([country, iso2c, year, value]).T
    return out 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:20,代码来源:wb.py

示例3: _read

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _read(io):
    """Try to read from a url, file or string.

    Parameters
    ----------
    io : str, unicode, or file-like

    Returns
    -------
    raw_text : str
    """
    if _is_url(io):
        with urlopen(io) as url:
            raw_text = url.read()
    elif hasattr(io, 'read'):
        raw_text = io.read()
    elif os.path.isfile(io):
        with open(io) as f:
            raw_text = f.read()
    elif isinstance(io, string_types):
        raw_text = io
    else:
        raise TypeError("Cannot read object of type %r" % type(io).__name__)
    return raw_text 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:26,代码来源:html.py

示例4: _retry_read_url

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _retry_read_url(url, retry_count, pause, name):
    for _ in range(retry_count):
        time.sleep(pause)

        # kludge to close the socket ASAP
        try:
            with urlopen(url) as resp:
                lines = resp.read()
        except _network_error_classes:
            pass
        else:
            rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                          parse_dates=True)[::-1]
            # Yahoo! Finance sometimes does this awesome thing where they
            # return 2 rows for the most recent business day
            if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                rs = rs[:-1]
            return rs

    raise IOError("after %d tries, %s did not "
                  "return a 200 for url %r" % (retry_count, name, url)) 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:23,代码来源:data.py

示例5: get_dividends_yahoo

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_dividends_yahoo(sid, start, end):
    # Taken from get_data_yahoo in Pandas library and adjust a single parameter to get dividends
    from pandas.compat import StringIO, bytes_to_str
    from pandas.io.common import urlopen

    start, end = pd.to_datetime(start), pd.to_datetime(end)
    url = ('http://ichart.finance.yahoo.com/table.csv?' + 's=%s' % sid +
           '&a=%s' % (start.month - 1) +
           '&b=%s' % start.day +
           '&c=%s' % start.year +
           '&d=%s' % (end.month - 1) +
           '&e=%s' % end.day +
           '&f=%s' % end.year +
           '&g=v' +  # THE CHANGE
           '&ignore=.csv')

    with urlopen(url) as resp:
        lines = resp.read()
    rs = pd.read_csv(StringIO(bytes_to_str(lines)), index_col=0,
                     parse_dates=True, na_values='-')[::-1]
    # Yahoo! Finance sometimes does this awesome thing where they
    # return 2 rows for the most recent business day
    if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
        rs = rs[:-1]
    return rs 
开发者ID:bpsmith,项目名称:tia,代码行数:27,代码来源:ins.py

示例6: _read

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _read(obj):
    """Try to read from a url, file or string.

    Parameters
    ----------
    obj : str, unicode, or file-like

    Returns
    -------
    raw_text : str
    """
    if _is_url(obj):
        with urlopen(obj) as url:
            text = url.read()
    elif hasattr(obj, 'read'):
        text = obj.read()
    elif isinstance(obj, char_types):
        text = obj
        try:
            if os.path.isfile(text):
                with open(text, 'rb') as f:
                    return f.read()
        except (TypeError, ValueError):
            pass
    else:
        raise TypeError("Cannot read object of type %r" % type(obj).__name__)
    return text 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:29,代码来源:html.py

示例7: get_countries

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_countries():
    '''Query information about countries
    '''
    url = 'http://api.worldbank.org/countries/?per_page=1000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    data.adminregion = [x['value'] for x in data.adminregion]
    data.incomeLevel = [x['value'] for x in data.incomeLevel]
    data.lendingType = [x['value'] for x in data.lendingType]
    data.region = [x['value'] for x in data.region]
    data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'})
    return data 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:16,代码来源:wb.py

示例8: get_indicators

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_indicators():
    '''Download information about all World Bank data series
    '''
    url = 'http://api.worldbank.org/indicators?per_page=50000&format=json'
    with urlopen(url) as response:
        data = response.read()
    data = json.loads(data)[1]
    data = pandas.DataFrame(data)
    # Clean fields
    data.source = [x['value'] for x in data.source]
    fun = lambda x: x.encode('ascii', 'ignore')
    data.sourceOrganization = data.sourceOrganization.apply(fun)
    # Clean topic field

    def get_value(x):
        try:
            return x['value']
        except:
            return ''
    fun = lambda x: [get_value(y) for y in x]
    data.topics = data.topics.apply(fun)
    data.topics = data.topics.apply(lambda x: ' ; '.join(x))
    # Clean outpu
    data = data.sort(columns='id')
    data.index = pandas.Index(lrange(data.shape[0]))
    return data 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:28,代码来源:wb.py

示例9: get_data_fred

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_data_fred(name, start=dt.datetime(2010, 1, 1),
                  end=dt.datetime.today()):
    """
    Get data for the given name from the St. Louis FED (FRED).
    Date format is datetime

    Returns a DataFrame.

    If multiple names are passed for "series" then the index of the
    DataFrame is the outer join of the indicies of each series.
    """
    start, end = _sanitize_dates(start, end)

    if not is_list_like(name):
        names = [name]
    else:
        names = name

    urls = [_FRED_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for
            n in names]

    def fetch_data(url, name):
        with urlopen(url) as resp:
            data = read_csv(resp, index_col=0, parse_dates=True,
                            header=None, skiprows=1, names=["DATE", name],
                            na_values='.')
        try:
            return data.truncate(start, end)
        except KeyError:
            if data.ix[3].name[7:12] == 'Error':
                raise IOError("Failed to get the data. Check that {0!r} is "
                              "a valid FRED series.".format(name))
            raise
    df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
                axis=1, join='outer')
    return df 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:38,代码来源:data.py

示例10: get_data_famafrench

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_data_famafrench(name):
    # path of zip files
    zip_file_path = '{0}/{1}.zip'.format(_FAMAFRENCH_URL, name)

    with urlopen(zip_file_path) as url:
        raw = url.read()

    with tempfile.TemporaryFile() as tmpf:
        tmpf.write(raw)

        with ZipFile(tmpf, 'r') as zf:
            data = zf.open(name + '.txt').readlines()

    line_lengths = np.array(lmap(len, data))
    file_edges = np.where(line_lengths == 2)[0]

    datasets = {}
    edges = zip(file_edges + 1, file_edges[1:])
    for i, (left_edge, right_edge) in enumerate(edges):
        dataset = [d.split() for d in data[left_edge:right_edge]]
        if len(dataset) > 10:
            ncol_raw = np.array(lmap(len, dataset))
            ncol = np.median(ncol_raw)
            header_index = np.where(ncol_raw == ncol - 1)[0][-1]
            header = dataset[header_index]
            ds_header = dataset[header_index + 1:]
            # to ensure the header is unique
            header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
                                                                     start=1)]
            index = np.array([d[0] for d in ds_header], dtype=int)
            dataset = np.array([d[1:] for d in ds_header], dtype=float)
            datasets[i] = DataFrame(dataset, index, columns=header)

    return datasets


# Items needed for options class 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:39,代码来源:data.py

示例11: get_elements_from_file

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_elements_from_file(url, element='table'):
    _skip_if_none_of(('bs4', 'html5lib'))
    url = file_path_to_url(url)
    from bs4 import BeautifulSoup
    with urlopen(url) as f:
        soup = BeautifulSoup(f, features='html5lib')
    return soup.find_all(element) 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:9,代码来源:test_html.py

示例12: _build_doc

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def _build_doc(self):
        """
        Raises
        ------
        ValueError
            * If a URL that lxml cannot parse is passed.

        Exception
            * Any other ``Exception`` thrown. For example, trying to parse a
              URL that is syntactically correct on a machine with no internet
              connection will fail.

        See Also
        --------
        pandas.io.html._HtmlFrameParser._build_doc
        """
        from lxml.html import parse, fromstring, HTMLParser
        from lxml.etree import XMLSyntaxError
        parser = HTMLParser(recover=True, encoding=self.encoding)

        try:
            if _is_url(self.io):
                with urlopen(self.io) as f:
                    r = parse(f, parser=parser)
            else:
                # try to parse the input in the simplest way
                r = parse(self.io, parser=parser)
            try:
                r = r.getroot()
            except AttributeError:
                pass
        except (UnicodeDecodeError, IOError) as e:
            # if the input is a blob of html goop
            if not _is_url(self.io):
                r = fromstring(self.io, parser=parser)

                try:
                    r = r.getroot()
                except AttributeError:
                    pass
            else:
                raise e
        else:
            if not hasattr(r, 'text_content'):
                raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
        return r 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:48,代码来源:html.py

示例13: get_components_yahoo

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_components_yahoo(idx_sym):
    """
    Returns DataFrame containing list of component information for
    index represented in idx_sym from yahoo. Includes component symbol
    (ticker), exchange, and name.

    Parameters
    ----------
    idx_sym : str
        Stock index symbol
        Examples:
        '^DJI' (Dow Jones Industrial Average)
        '^NYA' (NYSE Composite)
        '^IXIC' (NASDAQ Composite)

        See: http://finance.yahoo.com/indices for other index symbols

    Returns
    -------
    idx_df : DataFrame
    """
    stats = 'snx'
    # URL of form:
    # http://download.finance.yahoo.com/d/quotes.csv?s=@%5EIXIC&f=snxl1d1t1c1ohgv
    url = _YAHOO_COMPONENTS_URL + 's={0}&f={1}&e=.csv&h={2}'

    idx_mod = idx_sym.replace('^', '@%5E')
    url_str = url.format(idx_mod, stats, 1)

    idx_df = DataFrame()
    mask = [True]
    comp_idx = 1

    # LOOP across component index structure,
    # break when no new components are found
    while True in mask:
        url_str = url.format(idx_mod, stats,  comp_idx)
        with urlopen(url_str) as resp:
            raw = resp.read()
        lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"')
        lines = [line.strip().split('","') for line in lines]

        temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange'])
        temp_df = temp_df.drop_duplicates()
        temp_df = temp_df.set_index('ticker')
        mask = ~temp_df.index.isin(idx_df.index)

        comp_idx = comp_idx + 50
        idx_df = idx_df.append(temp_df[mask])

    return idx_df 
开发者ID:ktraunmueller,项目名称:Computable,代码行数:53,代码来源:data.py

示例14: get_latest_news

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def get_latest_news(top=None, show_content=False):
    """
        获取即时财经新闻
    
    Parameters
    --------
        top:数值,显示最新消息的条数,默认为80条
        show_content:是否显示新闻内容,默认False
    
    Return
    --------
        DataFrame
            classify :新闻类别
            title :新闻标题
            time :发布时间
            url :新闻链接
            content:新闻内容(在show_content为True的情况下出现)
    """
    top = ct.PAGE_NUM[2] if top is None else top
    try:
        request = Request(nv.LATEST_URL % (ct.P_TYPE['http'], ct.DOMAINS['sina'],
                                                   ct.PAGES['lnews'], top,
                                                   _random()))
        data_str = urlopen(request, timeout=10).read()
        data_str = data_str.decode('GBK')
        data_str = data_str.split('=')[1][:-1]
        data_str = eval(data_str, type('Dummy', (dict,), 
                                       dict(__getitem__ = lambda s, n:n))())
        data_str = json.dumps(data_str)
        data_str = json.loads(data_str)
        data_str = data_str['list']
        data = []
        for r in data_str:
            rt = datetime.fromtimestamp(r['time'])
            rtstr = datetime.strftime(rt, "%m-%d %H:%M")
            arow = [r['channel']['title'], r['title'], rtstr, r['url']]
            if show_content:
                arow.append(latest_content(r['url']))
            data.append(arow)
        df = pd.DataFrame(data, columns=nv.LATEST_COLS_C if show_content else nv.LATEST_COLS)
        return df
    except Exception as er:
        print(str(er)) 
开发者ID:waditu,项目名称:tushare,代码行数:45,代码来源:newsevent.py

示例15: guba_sina

# 需要导入模块: from pandas.io import common [as 别名]
# 或者: from pandas.io.common import urlopen [as 别名]
def guba_sina(show_content=False):
    """
       获取sina财经股吧首页的重点消息
    Parameter
    --------
        show_content:是否显示内容,默认False
    
    Return
    --------
    DataFrame
        title, 消息标题
        content, 消息内容(show_content=True的情况下)
        ptime, 发布时间
        rcounts,阅读次数
    """
    
    from pandas.io.common import urlopen
    try:
        with urlopen(nv.GUBA_SINA_URL%(ct.P_TYPE['http'],
                                       ct.DOMAINS['sina'])) as resp:
            lines = resp.read()
        html = lxml.html.document_fromstring(lines)
        res = html.xpath('//ul[@class=\"list_05\"]/li[not (@class)]')
        heads = html.xpath('//div[@class=\"tit_04\"]')
        data = []
        for head in heads:
            title = head.xpath('a/text()')[0]
            url = head.xpath('a/@href')[0]
            ds = [title]
            ds.extend(_guba_content(url))
            data.append(ds)
        for row in res:
            title = row.xpath('a[2]/text()')[0]
            url = row.xpath('a[2]/@href')[0]
            ds = [title]
            ds.extend(_guba_content(url))
            data.append(ds)
        df = pd.DataFrame(data, columns=nv.GUBA_SINA_COLS)
        df['rcounts'] = df['rcounts'].astype(float)
        return df if show_content is True else df.drop('content', axis=1)
    except Exception as er:
        print(str(er)) 
开发者ID:waditu,项目名称:tushare,代码行数:44,代码来源:newsevent.py


注:本文中的pandas.io.common.urlopen方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。