当前位置: 首页>>代码示例>>Python>>正文


Python pandas.read_html方法代码示例

本文整理汇总了Python中pandas.read_html方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_html方法的具体用法?Python pandas.read_html怎么用?Python pandas.read_html使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.read_html方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_portfolio_fromttjj

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def get_portfolio_fromttjj(code, start=None, end=None):
    startobj = dt.datetime.strptime(start, "%Y%m%d")
    endobj = dt.datetime.strptime(end, "%Y%m%d")
    if (endobj - startobj).days < 90:
        return None  # note start is always 1.1 4.1 7.1 10.1 in incremental updates
    if code.startswith("F"):
        code = code[1:]
    r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    table = s.find("table", class_="tzxq")
    df = pd.read_html(str(table))[0]
    df["date"] = pd.to_datetime(df["报告期"])
    df["stock_ratio"] = df["股票占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["bond_ratio"] = df["债券占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["cash_ratio"] = df["现金占净比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    #     df["dr_ratio"] = df["存托凭证占净比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1]))
    df["assets"] = df["净资产(亿元)"]
    df = df[::-1]
    return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]]


# this is the most elegant approach to dispatch get_daily, the definition can be such simple
# you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio`` 
开发者ID:refraction-ray,项目名称:xalpha,代码行数:25,代码来源:universal.py

示例2: addpositionstodict

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def addpositionstodict(gendict):
    print("Downloading position information from web...")
    for accidwithloc in tqdm(gendict):
        if 'Start' in gendict[accidwithloc]:
            continue
        accid = '_'.join(accidwithloc.split('_')[:-1])
        url = ('http://crispr.i2bc.paris-saclay.fr/crispr/crispr_db.php?'
               'checked%5B%5D={}'.format(accid))
        page = requests.get(url)
        htmltable = html.fromstring(page.content).xpath(
            "//table[normalize-space(@class)='primary_table']")[1]
        strtable = etree.tostring(htmltable)
        # converts to pandas df and then to numpy array then drop titles
        arrtable = pandas.read_html(strtable)[0].as_matrix()[2:]
        for row in arrtable:
            if row[0] in gendict:
                gendict[row[0]]['Start'] = row[2]
                gendict[row[0]]['Stop'] = row[3]
            else:
                if row[1] != 'questionable':
                    print("Can't find %s in local files" % row[0])
    return gendict 
开发者ID:phageParser,项目名称:phageParser,代码行数:24,代码来源:populate.py

示例3: __query_new_stocks

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def __query_new_stocks(self):
        DATA_URL = 'http://vip.stock.finance.sina.com.cn/corp/view/vRPD_NewStockIssue.php?page=1&cngem=0&orderBy=NetDate&orderType=desc'
        html = lxml.html.parse(DATA_URL)
        res = html.xpath('//table[@id=\"NewStockTable\"]/tr')
        if six.PY2:
            sarr = [etree.tostring(node) for node in res]
        else:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('<font color="red">*</font>', '')
        sarr = '<table>%s</table>' % sarr
        df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0]
        df = df.select(lambda x: x in [0, 1, 2, 3, 7], axis=1)
        df.columns = ['code', 'xcode', 'name', 'ipo_date', 'price']
        df['code'] = df['code'].map(lambda x: str(x).zfill(6))
        df['xcode'] = df['xcode'].map(lambda x: str(x).zfill(6))
        return df 
开发者ID:QUANTAXIS,项目名称:QUANTAXIS,代码行数:19,代码来源:shipaneclient.py

示例4: _fetch_documentation

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def _fetch_documentation(version, base_url="https://spark.apache.org/docs"):
    doc_urls = [
        "{base_url}/{version}/configuration.html",
        "{base_url}/{version}/sql-programming-guide.html",
        "{base_url}/{version}/monitoring.html",
        "{base_url}/{version}/spark-standalone.html",
        "{base_url}/{version}/running-on-mesos.html",
        "{base_url}/{version}/running-on-yarn.html",
    ]

    for url in doc_urls:
        doc_url = url.format(version=version, base_url=base_url)
        # print(url)
        print("Loading spark properties from %s", doc_url)
        dfs = pd.read_html(doc_url, header=0)
        desired_cols = ["Property Name", "Default", "Meaning"]
        for df in dfs:
            if ("Property Name" in df) and ('Default' in df):
                for pn, default, desc in df[desired_cols].itertuples(index=False):
                    if type(default) == numpy.bool_:
                        default = bool(default)
                    yield pn, default, desc 
开发者ID:Valassis-Digital-Media,项目名称:spylon,代码行数:24,代码来源:update_spark_params.py

示例5: get_forex_buy_quote

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def get_forex_buy_quote(currency_code: str = 'EUR', source: str = 'FNB', order_type: str = 'buy'):
    """Get latest forex from FNB website

    """
    if source == 'FNB':
        tables = pd.read_html(
            'https://www.fnb.co.za/Controller?nav=rates.forex.list.ForexRatesList',
            index_col=1, header=0, match=currency_code)

        df = tables[0]

        types = {
            'buy': 'Bank Selling Rate',
            'sell': 'Bank Buying Rate',
        }

        exhange_rate = df.loc[currency_code, types[order_type]]

        return Decimal("%.4f" % float(exhange_rate)) 
开发者ID:rsxm,项目名称:bitrader,代码行数:21,代码来源:arbitrage_tools.py

示例6: _profit_divis

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def _profit_divis(pageNo, dataArr, nextPage):
        ct._write_console()
        html = lxml.html.parse('%sdata.cfi.cn/%s'%(ct.P_TYPE['http'], nextPage))
        res = html.xpath("//table[@class=\"table_data\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('--', '0')
        sarr = '<table>%s</table>'%sarr
        df = pd.read_html(sarr, skiprows=[0])[0]
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@id=\"content\"]/div[2]/a[last()]/@href')[0]
        np = nextPage.split('&')[2].split('=')[1]
        if pageNo < int(np):
            return _profit_divis(int(np), dataArr, nextPage)
        else:
            return dataArr 
开发者ID:waditu,项目名称:tushare,代码行数:21,代码来源:reference.py

示例7: _today_ticks

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'],
                                                         ct.DOMAINS['vsf'], ct.PAGES['t_ticks'],
                                                         symbol, tdate, pageNo
                                ))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x : x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG) 
开发者ID:waditu,项目名称:tushare,代码行数:27,代码来源:trading.py

示例8: fix_span_tables

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def fix_span_tables(soup):
    classes = OrderedDict([("ltx_tabular", "table"), ("ltx_tr", "tr"), ("ltx_th", "th"),
               ("ltx_tbody", "tbody"), ("ltx_thead", "thead"), ("ltx_td", "td"),
               ("ltx_tfoot", "tfoot")])

    query = ','.join(["span." + c for c in classes.keys()])
    for elem in soup.select(query):
        for k, v in classes.items():
            if k in elem.attrs["class"]:
                elem.name = v
                break

# pandas.read_html treats th differently
# by trying in a few places to get column names
# for now <th>s are changed to <td>s, but we still
# have classes (ltx_th) to distinguish them 
开发者ID:paperswithcode,项目名称:axcell,代码行数:18,代码来源:extract_tables.py

示例9: _sz_hz

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def _sz_hz(date='', retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            request = Request(rv.MAR_SZ_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'],
                                    ct.PAGES['szsefc'], date))
            lines = urlopen(request, timeout = 10).read()
            if len(lines) <= 200:
                return pd.DataFrame()
            df = pd.read_html(lines, skiprows=[0])[0]
            df.columns = rv.MAR_SZ_HZ_COLS
            df['opDate'] = date
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG) 
开发者ID:andyzsf,项目名称:TuShare,代码行数:20,代码来源:reference.py

示例10: getdatafromViewDNS

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def getdatafromViewDNS(searchQuery):
	searchQuery = searchQuery.replace(" ", "+")
	url = "https://viewdns.info/reversewhois/?q=" + searchQuery
	print ("[*] Extracting from: " + url)
	try:
		result = pd.read_html(requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text)
		response = result[3][0]
		iter_url = iter(response)
		return iter_url
       # next(iter_url)
        #for url in iter_url:
		#	print(url)			
	except Exception as e:
		print("[!] Couldn't send query, error: {e} exiting...\n")
		exit
	
# Will return the org name for any domain name. 
开发者ID:InitRoot,项目名称:fransRecon,代码行数:19,代码来源:fransRecon.py

示例11: stock_info_change_name

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def stock_info_change_name(stock="688588"):
    """
    新浪财经-股票曾用名
    http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/300378.phtml
    :param stock: 股票代码
    :type stock: str
    :return: 股票曾用名列表
    :rtype: list
    """
    url = f"http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/{stock}.phtml"
    r = requests.get(url)
    temp_df = pd.read_html(r.text)[3].iloc[:, :2]
    temp_df.dropna(inplace=True)
    temp_df.columns = ["item", "value"]
    temp_df["item"] = temp_df["item"].str.split(":", expand=True)[0]
    try:
        name_list = temp_df[temp_df["item"] == "证券简称更名历史"].value.tolist()[0].split(" ")
        return name_list
    except:
        return None 
开发者ID:jindaxiang,项目名称:akshare,代码行数:22,代码来源:stock_info.py

示例12: stock_sse_summary

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def stock_sse_summary():
    """
    上海证券交易所-总貌
    http://www.sse.com.cn/market/stockdata/statistic/
    :return: 上海证券交易所-总貌
    :rtype: pandas.DataFrame
    """
    url = "http://www.sse.com.cn/market/stockdata/statistic/"
    r = requests.get(url)
    r.encoding = "utf-8"
    big_df = pd.DataFrame()
    temp_list = ["总貌", "主板", "科创板"]
    for i in range(len(pd.read_html(r.text))):
        for j in range(0, 2):
            inner_df = pd.read_html(r.text)[i].iloc[:, j].str.split("  ", expand=True)
            inner_df["item"] = temp_list[i]
            big_df = big_df.append(inner_df)
    big_df.dropna(how="any", inplace=True)
    big_df.columns = ["item", "number", "type"]
    big_df = big_df[["type", "item", "number"]]
    return big_df 
开发者ID:jindaxiang,项目名称:akshare,代码行数:23,代码来源:stock_summary.py

示例13: sunrise_city_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def sunrise_city_list() -> list:
    """
    查询日出与日落数据的城市列表
    :return: 所有可以获取的数据的城市列表
    :rtype: list
    """
    url = "https://www.timeanddate.com/sun/china"
    res = requests.get(url)
    city_list = []
    china_city_one_df = pd.read_html(res.text)[0]
    china_city_two_df = pd.read_html(res.text)[1]
    city_list.extend([item.lower() for item in china_city_one_df.iloc[:, 0].tolist()])
    city_list.extend([item.lower() for item in china_city_one_df.iloc[:, 1].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 0].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 1].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 2].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 3].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 4][:-2].tolist()])
    return city_list 
开发者ID:jindaxiang,项目名称:akshare,代码行数:21,代码来源:time_and_date.py

示例14: sunrise_daily

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def sunrise_daily(date: str = "20200428", city: str = "北京") -> pd.DataFrame:
    """
    每日日出日落数据
    https://www.timeanddate.com/sun/china/shaoxing
    :param date: 需要查询的日期, e.g., “20200428”
    :type date: str
    :param city: 需要查询的城市; 注意输入的格式, e.g., "北京", "上海"
    :type city: str
    :return: 返回指定日期指定地区的日出日落数据
    :rtype: pandas.DataFrame
    """
    if pypinyin.slug(city, separator='') in sunrise_city_list():
        year = date[:4]
        month = date[4:6]
        url = f"https://www.timeanddate.com/sun/china/{pypinyin.slug(city, separator='')}?month={month}&year={year}"
        res = requests.get(url)
        table = pd.read_html(res.text, header=2)[0]
        month_df = table.iloc[:-1, ]
        day_df = month_df[month_df.iloc[:, 0].astype(str).str.zfill(2) == date[6:]]
        day_df.index = pd.to_datetime([date] * len(day_df), format="%Y%m%d")
        return day_df
    else:
        return "请输入正确的城市名称" 
开发者ID:jindaxiang,项目名称:akshare,代码行数:25,代码来源:time_and_date.py

示例15: scrap

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_html [as 别名]
def scrap(url, browser, retryCount=2):
        """ 抓取网页table

        :param url: 网址
        :param browser: 浏览器
        :return: dataframe
        """
        try:
            while retryCount > 0:
                try:
                    browser.get(url)
                    time.sleep(random.random() / 4)
                    if 'thead' in browser.page_source:
                        break
                except Exception as e:
                    print(retryCount, e.args)
                    retryCount -= 1
                    if retryCount == 1:
                        mProxy.deleteProxy(myProxy)
            for x in ['lxml', 'xml', 'html5lib']:
                # 可能会出现lxml版本大于4.1.1时,获取不到table
                try:
                    soup = BeautifulSoup(browser.page_source, x)
                    table = soup.find_all(id='tb_cgtj')[0]
                    if table:
                        break
                except:
                    time.sleep(0.1)
                    print('using BeautifulSoup {}'.format(x))
            df = pd.read_html(str(table), header=1)[0]
            df.columns = ['tradedate', 'related', 'close', 'zd', 'hvol', 'hamount', 'hpercent', 'oneday', 'fiveday',
                          'tenday']
        except Exception as e:
            print(e.args)
            return pd.DataFrame()

        return df 
开发者ID:pchaos,项目名称:wanggeService,代码行数:39,代码来源:hsgtcg.py


注:本文中的pandas.read_html方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。