Python pandas.read_html方法代碼示例

本文整理匯總了Python中pandas.read_html方法的典型用法代碼示例。如果您正苦於以下問題：Python pandas.read_html方法的具體用法？Python pandas.read_html怎麽用？Python pandas.read_html使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pandas的用法示例。

在下文中一共展示了pandas.read_html方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_portfolio_fromttjj

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def get_portfolio_fromttjj(code, start=None, end=None):
    startobj = dt.datetime.strptime(start, "%Y%m%d")
    endobj = dt.datetime.strptime(end, "%Y%m%d")
    if (endobj - startobj).days < 90:
        return None  # note start is always 1.1 4.1 7.1 10.1 in incremental updates
    if code.startswith("F"):
        code = code[1:]
    r = rget("http://fundf10.eastmoney.com/zcpz_{code}.html".format(code=code))
    s = BeautifulSoup(r.text, "lxml")
    table = s.find("table", class_="tzxq")
    df = pd.read_html(str(table))[0]
    df["date"] = pd.to_datetime(df["報告期"])
    df["stock_ratio"] = df["股票占淨比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["bond_ratio"] = df["債券占淨比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    df["cash_ratio"] = df["現金占淨比"].replace("---", "0%").apply(lambda s: _float(s[:-1]))
    #     df["dr_ratio"] = df["存托憑證占淨比"].replace("---", "0%").apply(lambda s: xa.cons._float(s[:-1]))
    df["assets"] = df["淨資產（億元）"]
    df = df[::-1]
    return df[["date", "stock_ratio", "bond_ratio", "cash_ratio", "assets"]]


# this is the most elegant approach to dispatch get_daily, the definition can be such simple
# you actually don't need to bother on start end blah, everything is taken care of by ``cahcedio``

開發者ID:refraction-ray，項目名稱:xalpha，代碼行數:25，代碼來源:universal.py

示例2: addpositionstodict

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def addpositionstodict(gendict):
    print("Downloading position information from web...")
    for accidwithloc in tqdm(gendict):
        if 'Start' in gendict[accidwithloc]:
            continue
        accid = '_'.join(accidwithloc.split('_')[:-1])
        url = ('http://crispr.i2bc.paris-saclay.fr/crispr/crispr_db.php?'
               'checked%5B%5D={}'.format(accid))
        page = requests.get(url)
        htmltable = html.fromstring(page.content).xpath(
            "//table[normalize-space(@class)='primary_table']")[1]
        strtable = etree.tostring(htmltable)
        # converts to pandas df and then to numpy array then drop titles
        arrtable = pandas.read_html(strtable)[0].as_matrix()[2:]
        for row in arrtable:
            if row[0] in gendict:
                gendict[row[0]]['Start'] = row[2]
                gendict[row[0]]['Stop'] = row[3]
            else:
                if row[1] != 'questionable':
                    print("Can't find %s in local files" % row[0])
    return gendict

開發者ID:phageParser，項目名稱:phageParser，代碼行數:24，代碼來源:populate.py

示例3: __query_new_stocks

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def __query_new_stocks(self):
        DATA_URL = 'http://vip.stock.finance.sina.com.cn/corp/view/vRPD_NewStockIssue.php?page=1&cngem=0&orderBy=NetDate&orderType=desc'
        html = lxml.html.parse(DATA_URL)
        res = html.xpath('//table[@id=\"NewStockTable\"]/tr')
        if six.PY2:
            sarr = [etree.tostring(node) for node in res]
        else:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('<font color="red">*</font>', '')
        sarr = '<table>%s</table>' % sarr
        df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0]
        df = df.select(lambda x: x in [0, 1, 2, 3, 7], axis=1)
        df.columns = ['code', 'xcode', 'name', 'ipo_date', 'price']
        df['code'] = df['code'].map(lambda x: str(x).zfill(6))
        df['xcode'] = df['xcode'].map(lambda x: str(x).zfill(6))
        return df

開發者ID:QUANTAXIS，項目名稱:QUANTAXIS，代碼行數:19，代碼來源:shipaneclient.py

示例4: _fetch_documentation

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def _fetch_documentation(version, base_url="https://spark.apache.org/docs"):
    doc_urls = [
        "{base_url}/{version}/configuration.html",
        "{base_url}/{version}/sql-programming-guide.html",
        "{base_url}/{version}/monitoring.html",
        "{base_url}/{version}/spark-standalone.html",
        "{base_url}/{version}/running-on-mesos.html",
        "{base_url}/{version}/running-on-yarn.html",
    ]

    for url in doc_urls:
        doc_url = url.format(version=version, base_url=base_url)
        # print(url)
        print("Loading spark properties from %s", doc_url)
        dfs = pd.read_html(doc_url, header=0)
        desired_cols = ["Property Name", "Default", "Meaning"]
        for df in dfs:
            if ("Property Name" in df) and ('Default' in df):
                for pn, default, desc in df[desired_cols].itertuples(index=False):
                    if type(default) == numpy.bool_:
                        default = bool(default)
                    yield pn, default, desc

開發者ID:Valassis-Digital-Media，項目名稱:spylon，代碼行數:24，代碼來源:update_spark_params.py

示例5: get_forex_buy_quote

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def get_forex_buy_quote(currency_code: str = 'EUR', source: str = 'FNB', order_type: str = 'buy'):
    """Get latest forex from FNB website

    """
    if source == 'FNB':
        tables = pd.read_html(
            'https://www.fnb.co.za/Controller?nav=rates.forex.list.ForexRatesList',
            index_col=1, header=0, match=currency_code)

        df = tables[0]

        types = {
            'buy': 'Bank Selling Rate',
            'sell': 'Bank Buying Rate',
        }

        exhange_rate = df.loc[currency_code, types[order_type]]

        return Decimal("%.4f" % float(exhange_rate))

開發者ID:rsxm，項目名稱:bitrader，代碼行數:21，代碼來源:arbitrage_tools.py

示例6: _profit_divis

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def _profit_divis(pageNo, dataArr, nextPage):
        ct._write_console()
        html = lxml.html.parse('%sdata.cfi.cn/%s'%(ct.P_TYPE['http'], nextPage))
        res = html.xpath("//table[@class=\"table_data\"]/tr")
        if ct.PY3:
            sarr = [etree.tostring(node).decode('utf-8') for node in res]
        else:
            sarr = [etree.tostring(node) for node in res]
        sarr = ''.join(sarr)
        sarr = sarr.replace('--', '0')
        sarr = '<table>%s</table>'%sarr
        df = pd.read_html(sarr, skiprows=[0])[0]
        dataArr = dataArr.append(df, ignore_index=True)
        nextPage = html.xpath('//div[@id=\"content\"]/div[2]/a[last()]/@href')[0]
        np = nextPage.split('&')[2].split('=')[1]
        if pageNo < int(np):
            return _profit_divis(int(np), dataArr, nextPage)
        else:
            return dataArr

開發者ID:waditu，項目名稱:tushare，代碼行數:21，代碼來源:reference.py

示例7: _today_ticks

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def _today_ticks(symbol, tdate, pageNo, retry_count, pause):
    ct._write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            html = lxml.html.parse(ct.TODAY_TICKS_URL % (ct.P_TYPE['http'],
                                                         ct.DOMAINS['vsf'], ct.PAGES['t_ticks'],
                                                         symbol, tdate, pageNo
                                ))  
            res = html.xpath('//table[@id=\"datatbl\"]/tbody/tr')
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            sarr = sarr.replace('--', '0')
            df = pd.read_html(StringIO(sarr), parse_dates=False)[0]
            df.columns = ct.TODAY_TICK_COLUMNS
            df['pchange'] = df['pchange'].map(lambda x : x.replace('%', ''))
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)

開發者ID:waditu，項目名稱:tushare，代碼行數:27，代碼來源:trading.py

示例8: fix_span_tables

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def fix_span_tables(soup):
    classes = OrderedDict([("ltx_tabular", "table"), ("ltx_tr", "tr"), ("ltx_th", "th"),
               ("ltx_tbody", "tbody"), ("ltx_thead", "thead"), ("ltx_td", "td"),
               ("ltx_tfoot", "tfoot")])

    query = ','.join(["span." + c for c in classes.keys()])
    for elem in soup.select(query):
        for k, v in classes.items():
            if k in elem.attrs["class"]:
                elem.name = v
                break

# pandas.read_html treats th differently
# by trying in a few places to get column names
# for now <th>s are changed to <td>s, but we still
# have classes (ltx_th) to distinguish them

開發者ID:paperswithcode，項目名稱:axcell，代碼行數:18，代碼來源:extract_tables.py

示例9: _sz_hz

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def _sz_hz(date='', retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        ct._write_console()
        try:
            request = Request(rv.MAR_SZ_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'],
                                    ct.PAGES['szsefc'], date))
            lines = urlopen(request, timeout = 10).read()
            if len(lines) <= 200:
                return pd.DataFrame()
            df = pd.read_html(lines, skiprows=[0])[0]
            df.columns = rv.MAR_SZ_HZ_COLS
            df['opDate'] = date
        except Exception as e:
            print(e)
        else:
            return df
    raise IOError(ct.NETWORK_URL_ERROR_MSG)

開發者ID:andyzsf，項目名稱:TuShare，代碼行數:20，代碼來源:reference.py

示例10: getdatafromViewDNS

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def getdatafromViewDNS(searchQuery):
	searchQuery = searchQuery.replace(" ", "+")
	url = "https://viewdns.info/reversewhois/?q=" + searchQuery
	print ("[*] Extracting from: " + url)
	try:
		result = pd.read_html(requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text)
		response = result[3][0]
		iter_url = iter(response)
		return iter_url
       # next(iter_url)
        #for url in iter_url:
		#	print(url)			
	except Exception as e:
		print("[!] Couldn't send query, error: {e} exiting...\n")
		exit
	
# Will return the org name for any domain name.

開發者ID:InitRoot，項目名稱:fransRecon，代碼行數:19，代碼來源:fransRecon.py

示例11: stock_info_change_name

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def stock_info_change_name(stock="688588"):
    """
    新浪財經-股票曾用名
    http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/300378.phtml
    :param stock: 股票代碼
    :type stock: str
    :return: 股票曾用名列表
    :rtype: list
    """
    url = f"http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/{stock}.phtml"
    r = requests.get(url)
    temp_df = pd.read_html(r.text)[3].iloc[:, :2]
    temp_df.dropna(inplace=True)
    temp_df.columns = ["item", "value"]
    temp_df["item"] = temp_df["item"].str.split("：", expand=True)[0]
    try:
        name_list = temp_df[temp_df["item"] == "證券簡稱更名曆史"].value.tolist()[0].split(" ")
        return name_list
    except:
        return None

開發者ID:jindaxiang，項目名稱:akshare，代碼行數:22，代碼來源:stock_info.py

示例12: stock_sse_summary

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def stock_sse_summary():
    """
    上海證券交易所-總貌
    http://www.sse.com.cn/market/stockdata/statistic/
    :return: 上海證券交易所-總貌
    :rtype: pandas.DataFrame
    """
    url = "http://www.sse.com.cn/market/stockdata/statistic/"
    r = requests.get(url)
    r.encoding = "utf-8"
    big_df = pd.DataFrame()
    temp_list = ["總貌", "主板", "科創板"]
    for i in range(len(pd.read_html(r.text))):
        for j in range(0, 2):
            inner_df = pd.read_html(r.text)[i].iloc[:, j].str.split("  ", expand=True)
            inner_df["item"] = temp_list[i]
            big_df = big_df.append(inner_df)
    big_df.dropna(how="any", inplace=True)
    big_df.columns = ["item", "number", "type"]
    big_df = big_df[["type", "item", "number"]]
    return big_df

開發者ID:jindaxiang，項目名稱:akshare，代碼行數:23，代碼來源:stock_summary.py

示例13: sunrise_city_list

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def sunrise_city_list() -> list:
    """
    查詢日出與日落數據的城市列表
    :return: 所有可以獲取的數據的城市列表
    :rtype: list
    """
    url = "https://www.timeanddate.com/sun/china"
    res = requests.get(url)
    city_list = []
    china_city_one_df = pd.read_html(res.text)[0]
    china_city_two_df = pd.read_html(res.text)[1]
    city_list.extend([item.lower() for item in china_city_one_df.iloc[:, 0].tolist()])
    city_list.extend([item.lower() for item in china_city_one_df.iloc[:, 1].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 0].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 1].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 2].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 3].tolist()])
    city_list.extend([item.lower() for item in china_city_two_df.iloc[:, 4][:-2].tolist()])
    return city_list

開發者ID:jindaxiang，項目名稱:akshare，代碼行數:21，代碼來源:time_and_date.py

示例14: sunrise_daily

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def sunrise_daily(date: str = "20200428", city: str = "北京") -> pd.DataFrame:
    """
    每日日出日落數據
    https://www.timeanddate.com/sun/china/shaoxing
    :param date: 需要查詢的日期, e.g., “20200428”
    :type date: str
    :param city: 需要查詢的城市; 注意輸入的格式, e.g., "北京", "上海"
    :type city: str
    :return: 返回指定日期指定地區的日出日落數據
    :rtype: pandas.DataFrame
    """
    if pypinyin.slug(city, separator='') in sunrise_city_list():
        year = date[:4]
        month = date[4:6]
        url = f"https://www.timeanddate.com/sun/china/{pypinyin.slug(city, separator='')}?month={month}&year={year}"
        res = requests.get(url)
        table = pd.read_html(res.text, header=2)[0]
        month_df = table.iloc[:-1, ]
        day_df = month_df[month_df.iloc[:, 0].astype(str).str.zfill(2) == date[6:]]
        day_df.index = pd.to_datetime([date] * len(day_df), format="%Y%m%d")
        return day_df
    else:
        return "請輸入正確的城市名稱"

開發者ID:jindaxiang，項目名稱:akshare，代碼行數:25，代碼來源:time_and_date.py

示例15: scrap

# 需要導入模塊: import pandas [as 別名]
# 或者: from pandas import read_html [as 別名]
def scrap(url, browser, retryCount=2):
        """ 抓取網頁table

        :param url: 網址
        :param browser: 瀏覽器
        :return: dataframe
        """
        try:
            while retryCount > 0:
                try:
                    browser.get(url)
                    time.sleep(random.random() / 4)
                    if 'thead' in browser.page_source:
                        break
                except Exception as e:
                    print(retryCount, e.args)
                    retryCount -= 1
                    if retryCount == 1:
                        mProxy.deleteProxy(myProxy)
            for x in ['lxml', 'xml', 'html5lib']:
                # 可能會出現lxml版本大於4.1.1時，獲取不到table
                try:
                    soup = BeautifulSoup(browser.page_source, x)
                    table = soup.find_all(id='tb_cgtj')[0]
                    if table:
                        break
                except:
                    time.sleep(0.1)
                    print('using BeautifulSoup {}'.format(x))
            df = pd.read_html(str(table), header=1)[0]
            df.columns = ['tradedate', 'related', 'close', 'zd', 'hvol', 'hamount', 'hpercent', 'oneday', 'fiveday',
                          'tenday']
        except Exception as e:
            print(e.args)
            return pd.DataFrame()

        return df

開發者ID:pchaos，項目名稱:wanggeService，代碼行數:39，代碼來源:hsgtcg.py

注：本文中的pandas.read_html方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。