當前位置: 首頁>>代碼示例>>Python>>正文


Python Table.from_csv方法代碼示例

本文整理匯總了Python中dazzle.core.table.Table.from_csv方法的典型用法代碼示例。如果您正苦於以下問題:Python Table.from_csv方法的具體用法?Python Table.from_csv怎麽用?Python Table.from_csv使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在dazzle.core.table.Table的用法示例。


在下文中一共展示了Table.from_csv方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_rebuild01

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
    def test_rebuild01(self):
        cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), delimiter='\t',
                                   usecols=['CategoryID', 'ParentCategoryID', 'Level'], verbose=False)


        cat.rebuild({"CategoryID": np.int8, "Level": np.int8, "ParentCategoryID": np.int8})
        self.assertEqual(len(cat[:]), 69)
        self.assertEqual(cat['CategoryID'].dtype, np.int8)
        self.assertEqual(cat[0]['CategoryID'], -128) # int8.min
        self.assertEqual(cat[0]['Level'], -128) # int8.min
        self.assertEqual(cat[0]['ParentCategoryID'], -128) # int8.min
開發者ID:mpage38,項目名稱:dazzle,代碼行數:13,代碼來源:test_table.py

示例2: test_rebuild02

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
 def test_rebuild02(self):
     cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), delimiter='\t',
                                usecols=['CategoryID', 'ParentCategoryID', 'Level'], verbose=False)
     cat.rebuild({"CategoryID": np.uint8, "Level": np.uint8, "ParentCategoryID": np.uint8})
開發者ID:mpage38,項目名稱:dazzle,代碼行數:6,代碼來源:test_table.py

示例3: test_from_csv04

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
 def test_from_csv04(self):
     cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), delimiter='\t',
                                usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
     self.assertEqual(len(cat.ctable), 68)
     self.assertEqual(len(cat.columns), 2)
開發者ID:mpage38,項目名稱:dazzle,代碼行數:7,代碼來源:test_table.py

示例4: test_from_csv03

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
 def test_from_csv03(self):
     cat = Table.from_csv("Category", self.ds, os.path.join(AVITO_DATA_DIR, "Category.tsv"), verbose=False)
開發者ID:mpage38,項目名稱:dazzle,代碼行數:4,代碼來源:test_table.py

示例5: test_from_csv02

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
 def test_from_csv02(self):
     Table.from_csv("Category", self.ds, "/temp/dazzle-test/dataset.json", usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
開發者ID:mpage38,項目名稱:dazzle,代碼行數:4,代碼來源:test_table.py

示例6: test_from_csv01

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
 def test_from_csv01(self):
     Table.from_csv("Category", self.ds, "/bim/bam/test.csv", usecols=['CategoryID', 'ParentCategoryID'], verbose=False)
開發者ID:mpage38,項目名稱:dazzle,代碼行數:4,代碼來源:test_table.py

示例7: load_dataset

# 需要導入模塊: from dazzle.core.table import Table [as 別名]
# 或者: from dazzle.core.table.Table import from_csv [as 別名]
def load_dataset():
    """'Raw'-dataset is the result of loading the CSV sources data into dazzle tables, only filtering out data
    that we don't want to further process.

    The method is programmed in a non-destructive way so as to be able to launch it several times
    before getting the job done.
    """
    import os
    from dazzle.core.dataset import DataSet



    if DataSet.exists(raw_dir):
        ds = DataSet.open(raw_dir)
    else:
        ds = DataSet(raw_dir, force_create=True)

    # Notes:
    # - many of the following attributes should be unsigned int instead of signed int, but numexpr works only on
    # signed data.
    # - Simlarly to pandas, we use the types required to contain the existing data, not the types we desire to use
    if ds.get_table("Category") is None:
        t = Table.from_csv("Category", ds, os.path.join(csv_dir, "Category.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['CategoryID', 'ParentCategoryID', 'Level'],
                           dtype={'CategoryID': 'i4', 'ParentCategoryID': 'i1', 'Level': 'i1'})
        t = None

    # Notice the filter attribute that does not exist in pandas.read_csv(). It makes it possible to skip some rows
    # based on a numexpr expression. IsClick == IsClick is true iff IsClick is not na
    if ds.get_table("TrainSearchStream") is None:
        t = Table.from_csv("TrainSearchStream", ds, os.path.join(csv_dir, "trainSearchStream.tsv"), delimiter='\t', chunksize=10**7,
                         usecols=['SearchID', 'AdID', 'Position', 'ObjectType', 'HistCTR', 'IsClick'],
                         dtype={'SearchID':'i4', 'AdID':'i4', 'Position':'i1', 'ObjectType':'i1', 'HistCTR':'f4', 'IsClick':'f1'},
                         filter='(ObjectType == 3) & (IsClick == IsClick)')
        t = None

    # We avoid to load the string fields. We will see this problem later with Don
    if ds.get_table("AdsInfo") is None:
        t = Table.from_csv("AdsInfo", ds, os.path.join(csv_dir, "AdsInfo.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['AdID', 'LocationID', 'CategoryID', 'Price', 'IsContext'],
                           dtype={'AdID':'i4', 'LocationID':'f4', 'CategoryID':'f4', 'Price': 'f4', 'IsContext': 'f1'})
        t = None

    # We avoid to load the string fields. We will see this problem later with Don
    if ds.get_table("SearchInfo") is None:
        t = Table.from_csv("SearchInfo", ds, os.path.join(csv_dir, "SearchInfo.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['SearchID', 'IPID', 'UserID', 'IsUserLoggedOn', 'LocationID', 'CategoryID'],
                           dtype={'SearchID':'i4', 'IPID':'i4', 'UserID':'f4', 'IsUserLoggedOn':'f1',
                                       'LocationID':'f4', 'CategoryID':'f4'})
        t = None

    if ds.get_table("userInfo") is None:
        t = Table.from_csv("userInfo", ds, os.path.join(csv_dir, "userInfo.tsv"), delimiter='\t', chunksize=10**7,
                            usecols=['UserID', 'UserAgentID', 'UserAgentOSID','UserDeviceID', 'UserAgentFamilyID'],
                            dtype={'UserID':'i4', 'UserAgentID':'i4', 'UserAgentOSID':'i4',
                                   'UserDeviceID':'i4', 'UserAgentFamilyID':'i4'})
        t = None

    if ds.get_table("Location") is None:
        t = Table.from_csv("Location", ds, os.path.join(csv_dir, "Location.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['LocationID', 'CityID', 'RegionID'],
                           dtype={'LocationID': 'i4', 'CityID':'f4', 'RegionID': 'f4'})
        t = None

    if ds.get_table("PhoneRequestsStream") is None:
        t = Table.from_csv("PhoneRequestsStream", ds, os.path.join(csv_dir, "PhoneRequestsStream.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['UserID', 'IPID', 'AdID', 'PhoneRequestDate'],
                           dtype={'UserID':'i4', 'IPID':'i4', 'AdID':'i4', 'PhoneRequestDate': 'object'})
        t = None

    if ds.get_table("VisitsStream") is None:
        t = Table.from_csv("VisitsStream", ds, os.path.join(csv_dir, "VisitsStream.tsv"), delimiter='\t', chunksize=10**7,
                           usecols=['UserID', 'IPID', 'AdID', 'ViewDate'],
                           dtype={'UserID':'i4', 'IPID':'i4', 'AdID':'i4', 'ViewDate': 'object'})
        t = None

    return ds
開發者ID:mpage38,項目名稱:dazzle,代碼行數:79,代碼來源:avito_dp.py


注:本文中的dazzle.core.table.Table.from_csv方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。