當前位置: 首頁>>代碼示例>>Python>>正文


Python Bunch.target_name方法代碼示例

本文整理匯總了Python中sklearn.datasets.base.Bunch.target_name方法的典型用法代碼示例。如果您正苦於以下問題:Python Bunch.target_name方法的具體用法?Python Bunch.target_name怎麽用?Python Bunch.target_name使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.datasets.base.Bunch的用法示例。


在下文中一共展示了Bunch.target_name方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: calc_tfidf

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
def calc_tfidf(trainsetfile,stopwordfile,dstdir):
    data_set = joblib.load(trainsetfile)
    wordbag = Bunch(target_name=[],label=[],filenames=[],tdm=[],vocabulary={})
    wordbag.target_name = data_set.tatget_name
    wordbag.label = data_set.label
    
    corpus = data_set.contents
    stopwordlist = read_stopword(stopwordfile)
    vectorize = TfidfVectorizer(sublinear_tf=True,max_df = 0.8,min_df=3,max_features=50000,stop_words=stopwordlist)
    feature_train = vectorize.fit_transform(corpus)
    wordbag.tdm = feature_train
    wordbag.vocabulary = vectorize.vocabulary_
    joblib.dump(wordbag,dstdir+"/"+"word_bag.data",compress=3)
開發者ID:wadeallstar,項目名稱:python-fraud-detect,代碼行數:15,代碼來源:process_tool.py

示例2: testset_tfidf

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
def testset_tfidf(testsetfile,stopwordfile,myvocabulary):
    data_set = joblib.load(testsetfile)
    wordbag = Bunch(target_name=[],label=[],filenames=[],tdm=[],vocabulary={})
    wordbag.target_name = data_set.tatget_name
    wordbag.label = data_set.label
    
    corpus = data_set.contents
    stopwordlist = read_stopword(stopwordfile)
    vectorize = TfidfVectorizer(sublinear_tf=True,stop_words=stopwordlist,vocabulary=myvocabulary)
    feature_train = vectorize.fit_transform(corpus)
    wordbag.tdm = feature_train
    joblib.dump(wordbag,"test_wordbag/test_word_bag.data",compress=3)
    return wordbag
    
開發者ID:wadeallstar,項目名稱:python-fraud-detect,代碼行數:15,代碼來源:process_tool.py

示例3: train_bags

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
def train_bags(token_path,filename,wordbag_path):
    data_set = Bunch(tatget_name=[],label=[],filenames=[],contents=[])

    dir_list = os.listdir(token_path)
    data_set.target_name = dir_list
    
    for file in dir_list:
        file_name = token_path+"/"+file
        file_read = open(file_name,"r")
        for line in file_read:
            data_set.label.append(data_set.target_name.index(file))
            data_set.contents.append(line.strip())
        file_read.close()
    #持久化
    joblib.dump(data_set, wordbag_path+"/"+filename, compress=3)
開發者ID:wadeallstar,項目名稱:python-fraud-detect,代碼行數:17,代碼來源:process_tool.py

示例4: reload

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
from sklearn.externals import joblib
import jieba
from sklearn.feature_extraction.text import HashingVectorizer

reload(sys)
# sys.setdefaultencoding('utf-8')

token_path = "token"+"/"
#次袋語料路徑
wordbag_path = "wordbag"+"/"
#是引用bunch存儲
data_set = Bunch(target_name=[],label=[],filenames=[],contents=[])

dir_list = os.listdir(token_path)
data_set.target_name = dir_list

for file in dir_list:
    file_name = token_path+file
    file_read = open(file_name,"r")
    for line in file_read:
        data_set.label.append(data_set.target_name.index(file))
        data_set.contents.append(line.strip())
    file_read.close()
#持久化
joblib.dump(data_set, wordbag_path+"train_set1124.data", compress=3)

#驗證
data_set = joblib.load(wordbag_path+"train_set1124.data")
print data_set.target_name
開發者ID:wadeallstar,項目名稱:python-fraud-detect,代碼行數:31,代碼來源:train_bags.py

示例5: reload

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
reload(sys)

#導入訓練預料
data_set={}
#訓練語料集路徑
train_path='text_corpus1_wordbag/train_set.data'
file_obj=open(train_path,'rb')

#讀取持久化後的對象
data_set=pickle.load(file_obj)
file_obj.close()

#定義詞袋數據結構
wordbag=Bunch(target_name=[],label=[],filenames=[],tdm=[],vocabulary={})
wordbag.target_name=data_set.target_name
wordbag.label=data_set.label
wordbag.filenames=data_set.filenames

#構建語料
corpus=data_set.contents

#從文件導入停用詞表
stpwrdpath='extra_dict/hlt_stop_words.txt'
stpwrd_dic=open(stpwrdpath,'rb')
stpwrd_content=stpwrd_dic.read()

#將停用詞轉換為list
stpwrdlst=stpwrd_content.splitlines()
stpwrd_dic.close()
開發者ID:Pengfei-Zhu,項目名稱:DataMining,代碼行數:31,代碼來源:tf-idffinal.py

示例6: Bunch

# 需要導入模塊: from sklearn.datasets.base import Bunch [as 別名]
# 或者: from sklearn.datasets.base.Bunch import target_name [as 別名]
# 詞袋語料路徑
wordbag_path = "text_corpus_wordbag/"
if not os.path.exists(wordbag_path):
    os.makedirs(wordbag_path)

# Bunch類提供一種key,value的對象形式
# target_name:所有分類名稱列表
# label:每個文件的分類標簽列表
# filenames:文件名稱
# contents:文件內容
data_set = Bunch(target_name=[], label=[], filenames=[], contents=[])

# 獲取seg_path下的所有子分類
class_list = os.listdir(seg_path)
data_set.target_name = class_list

# 獲取每個子目錄下所有的文件
for mydir in class_list:
  class_path = seg_path + mydir + "/"
  file_list = os.listdir(class_path) # 獲取class_path下的所有文件
  for file_name in file_list:
      file_path = class_path + file_name
      data_set.filenames.append(file_path) # 把文件路徑附加到數據集中
      data_set.label.append(data_set.target_name.index(mydir)) # 把文件分類標簽附加到數據集中
      with open(file_path, 'r', encoding='gb18030') as file:
          seg_corpus = file.read() # 讀取語料
          data_set.contents.append(seg_corpus) # 構建分詞文本內容列表


# 訓練集對象持久化
開發者ID:longcd,項目名稱:Text-Classification-System,代碼行數:32,代碼來源:train_bags.py


注:本文中的sklearn.datasets.base.Bunch.target_name方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。