当前位置: 首页>>代码示例>>Python>>正文


Python Bunch.fixk方法代码示例

本文整理汇总了Python中sklearn.datasets.base.Bunch.fixk方法的典型用法代码示例。如果您正苦于以下问题:Python Bunch.fixk方法的具体用法?Python Bunch.fixk怎么用?Python Bunch.fixk使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.datasets.base.Bunch的用法示例。


在下文中一共展示了Bunch.fixk方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.datasets.base import Bunch [as 别名]
# 或者: from sklearn.datasets.base.Bunch import fixk [as 别名]
def main():
    accuracies = defaultdict(lambda: [])

    aucs = defaultdict(lambda: [])

    x_axis = defaultdict(lambda: [])

    vct = CountVectorizer(encoding='ISO-8859-1', min_df=5, max_df=1.0, binary=True, ngram_range=(1, 1),
                          token_pattern='\\b\\w+\\b', tokenizer=StemTokenizer())
    vct_analizer = vct.build_tokenizer()
    print("Start loading ...")
    # data fields: data, bow, file_names, target_names, target

    ########## NEWS GROUPS ###############
    # easy to hard. see "Less is More" paper: http://axon.cs.byu.edu/~martinez/classes/678/Presentations/Clawson.pdf
    categories = [['alt.atheism', 'talk.religion.misc'],
                  ['comp.graphics', 'comp.windows.x'],
                  ['comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware'],
                  ['rec.sport.baseball', 'sci.crypt']]

    min_size = max(50, args.fixk)

    if "imdb" in args.train:
        ########## IMDB MOVIE REVIEWS ###########
        data = load_imdb(args.train, shuffle=True, rnd=2356, vct=vct, min_size=min_size,
                               fix_k=args.fixk)  # should brind data as is
    elif "aviation" in args.train:
        raise Exception("We are not ready for that data yet")
    elif "20news" in args.train:
        ########## 20 news groups ######
        data = load_20newsgroups(categories=categories[0], vectorizer=vct, min_size=min_size,
                                       fix_k=args.fixk)  # for testing purposes
    elif "dummy" in args.train:
        ########## DUMMY DATA###########
        data = load_dummy("C:/Users/mramire8/Documents/code/python/data/dummy", shuffle=True,
                                rnd=2356, vct=vct, min_size=0, fix_k=args.fixk)
    else:
        raise Exception("We do not know that dataset")

    print("Data %s" % args.train)
    print("Data size %s" % len(data.train.data))
    #print(data.train.data[0])
    #### COST MODEL
    parameters = parse_parameters(args.cost_model)

    print "Cost Parameters %s" % parameters

    cost_model = set_cost_model(parameters)

    print "\nCost Model: %s" % cost_model.__class__.__name__

    #### ACCURACY MODEL
    # try:
    # #     accu_parameters = parse_parameters(args.accu_model)
    # except ValueError:
    accu_parameters = parse_parameters_mat(args.accu_model)
    # else
    #     print("Error: Accuracy parameters didn't work")

    print "Accuracy Parameters %s" % accu_parameters
    #if "fixed" in args.accu_function:
    #    accuracy_model = base_models.FixedAccuracyModel(accuracy_value=.7)
    #elif "log" in args.accu_function:
    #    accuracy_model = base_models.LogAccuracyModel(model=parameters)
    #elif "linear" in args.accu_function:
    #    accuracy_model = base_models.LRAccuracyModel(model=parameters)
    #else:
    #    raise Exception("We need a defined cost function options [fixed|log|linear]")
    #
    #print "\nAccuracy Model: %s " % accuracy_model

    #### CLASSIFIER
    #### Informed priors
    #feature_counts = np.ones(x_train.shape[0]) * x_train
    #feature_frequencies = feature_counts / np.sum(feature_counts)
    #alpha = feature_frequencies
    alpha = 1
    clf = MultinomialNB(alpha=alpha)
    print "\nClassifier: %s" % clf

    #### EXPERT MODEL
    #expert = baseexpert.BaseExpert()
    if "fixed" in args.expert:
        expert = baseexpert.FixedAccuracyExpert(accuracy_value=accu_parameters[0],
                                                cost_function=cost_model.cost_function)  #average value of accuracy of the experts
    elif "true" in args.expert:
        expert = baseexpert.TrueOracleExpert(cost_function=cost_model.cost_function)
    elif "linear" in args.expert:
        #expert = baseexpert.LRFunctionExpert(model=[0.0019, 0.6363],cost_function=cost_model.cost_function)
        raise Exception("We do not know linear yet!!")
    elif "log" in args.expert:
        expert = baseexpert.LogFunctionExpert(model=accu_parameters, cost_function=cost_model.cost_function)
    elif "direct" in args.expert:
        expert = baseexpert.LookUpExpert(accuracy_value=accu_parameters, cost_function=cost_model.cost_function)
    else:
        raise Exception("We need a defined cost function options [fixed|log|linear]")
        #expert = baseexpert.TrueOracleExpert(cost_function=cost_model.cost_function)
    print "\nExpert: %s " % expert

    #### ACTIVE LEARNING SETTINGS
#.........这里部分代码省略.........
开发者ID:mramire8,项目名称:active,代码行数:103,代码来源:traintest.py

示例2: main

# 需要导入模块: from sklearn.datasets.base import Bunch [as 别名]
# 或者: from sklearn.datasets.base.Bunch import fixk [as 别名]
def main():
    accuracies = defaultdict(lambda: [])

    aucs = defaultdict(lambda: [])

    x_axis = defaultdict(lambda: [])

    vct = CountVectorizer(encoding='ISO-8859-1', min_df=5, max_df=1.0, binary=True, ngram_range=(1, 3),
                          token_pattern='\\b\\w+\\b', tokenizer=StemTokenizer())
    vct_analizer = vct.build_tokenizer()
    print("Start loading ...")
    # data fields: data, bow, file_names, target_names, target

    ########## NEWS GROUPS ###############
    # easy to hard. see "Less is More" paper: http://axon.cs.byu.edu/~martinez/classes/678/Presentations/Clawson.pdf
    categories = [['alt.atheism', 'talk.religion.misc'],
                  ['comp.graphics', 'comp.windows.x'],
                  ['comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware'],
                  ['rec.sport.baseball', 'sci.crypt']]

    min_size = max(100, args.fixk)

    fixk_saved = "{0}{1}.p".format(args.train, args.fixk)

    try:
        fixk_file = open(fixk_saved, "rb")
        data = pickle.load(fixk_file)
    except IOError:
        data = load_dataset(args.train, args.fixk, categories[0], vct, min_size, percent=.5)
        fixk_file = open(fixk_saved, "wb")
        pickle.dump(data, fixk_file)

    # data = load_dataset(args.train, args.fixk, categories[0], vct, min_size)

    print("Data %s" % args.train)
    print("Data size %s" % len(data.train.data))

    parameters = parse_parameters_mat(args.cost_model)

    print "Cost Parameters %s" % parameters

    cost_model = set_cost_model(args.cost_function, parameters=parameters)
    print "\nCost Model: %s" % cost_model.__class__.__name__


    #### STUDENT CLASSIFIER
    clf = linear_model.LogisticRegression(penalty="l1", C=1)
    print "\nStudent Classifier: %s" % clf

    #### EXPERT CLASSIFIER

    exp_clf = linear_model.LogisticRegression(penalty='l1', C=.3)
    exp_clf.fit(data.test.bow, data.test.target)
    expert = baseexpert.NeutralityExpert(exp_clf, threshold=args.neutral_threshold,
                                         cost_function=cost_model.cost_function)
    print "\nExpert: %s " % expert

    #### ACTIVE LEARNING SETTINGS
    step_size = args.step_size
    bootstrap_size = args.bootstrap
    evaluation_points = 200

    print("\nExperiment: step={0}, BT={1}, plot points={2}, fixk:{3}, minsize:{4}".format(step_size, bootstrap_size,
                                                                                          evaluation_points, args.fixk,
                                                                                          min_size))
    print ("Cheating experiment - use full uncertainty query k words")
    t0 = time.time()
    ### experiment starts
    tx =[]
    tac = []
    tau = []
    for t in range(args.trials):
        trial_accu =[]

        trial_aucs = []

        trial_x_axis = []
        print "*" * 60
        print "Trial: %s" % t

        student = randomsampling.UncertaintyLearner(model=clf, accuracy_model=None, budget=args.budget, seed=t)
        print "\nStudent: %s " % student
        train_indices = []
        train_x = []
        train_y = []
        pool = Bunch()
        pool.data = data.train.bow.tocsr()   # full words, for training
        pool.fixk = data.train.bowk.tocsr()  # k words BOW for querying
        pool.target = data.train.target
        pool.predicted = []
        pool.kwords = np.array(data.train.kwords)  # k words
        pool.remaining = set(range(pool.data.shape[0]))  # indices of the pool

        bootstrapped = False

        current_cost = 0
        iteration = 0
        while 0 < student.budget and len(pool.remaining) > step_size and iteration <= args.maxiter:

            if not bootstrapped:
#.........这里部分代码省略.........
开发者ID:mramire8,项目名称:active,代码行数:103,代码来源:unckcheatv2.py

示例3: main

# 需要导入模块: from sklearn.datasets.base import Bunch [as 别名]
# 或者: from sklearn.datasets.base.Bunch import fixk [as 别名]
def main():
    accuracies = defaultdict(lambda: [])

    aucs = defaultdict(lambda: [])

    x_axis = defaultdict(lambda: [])

    vct = CountVectorizer(encoding='latin-1', min_df=5, max_df=1.0, binary=True, ngram_range=(1, 3),
                          token_pattern='\\b\\w+\\b', tokenizer=StemTokenizer())
    vct_analizer = vct.build_tokenizer()
    print("Start loading ...")
    # data fields: data, bow, file_names, target_names, target

    ########## NEWS GROUPS ###############
    # easy to hard. see "Less is More" paper: http://axon.cs.byu.edu/~martinez/classes/678/Presentations/Clawson.pdf
    categories = [['alt.atheism', 'talk.religion.misc'],
                  ['comp.graphics', 'comp.windows.x'],
                  ['comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware'],
                  ['rec.sport.baseball', 'sci.crypt']]

    min_size = max(10, args.fixk)

    if args.fixk < 0:
        args.fixk = None

    # data = load_dataset(args.train, args.fixk, categories[0], vct, min_size, percent=.5)
    # fixk_saved = "{0}{1}.p".format(args.train, args.fixk)

    data, vct = load_from_file(args.train, categories, args.fixk, min_size, vct)

    print("Data %s" % args.train)
    print("Data size %s" % len(data.train.data))

    #### COST MODEL
    parameters = parse_parameters_mat(args.cost_model)
    print "Cost Parameters %s" % parameters
    cost_model = set_cost_model(args.cost_function, parameters=parameters)
    print "\nCost Model: %s" % cost_model.__class__.__name__

    #### ACCURACY MODEL
    accu_parameters = parse_parameters_mat(args.accu_model)

    #### CLASSIFIER
    clf = set_classifier(args.classifier)
    print "\nClassifier: %s" % clf

    #### EXPERT MODEL

    if "fixed" in args.expert:
        expert = baseexpert.FixedAccuracyExpert(accuracy_value=accu_parameters[0],
                                                cost_function=cost_model.cost_function)  #average value of accuracy of the experts
    elif "true" in args.expert:
        expert = baseexpert.TrueOracleExpert(cost_function=cost_model.cost_function)
    elif "linear" in args.expert:
        #expert = baseexpert.LRFunctionExpert(model=[0.0019, 0.6363],cost_function=cost_model.cost_function)
        raise Exception("We do not know linear yet!!")
    elif "log" in args.expert:
        expert = baseexpert.LogFunctionExpert(model=accu_parameters, cost_function=cost_model.cost_function)
    elif "direct" in args.expert:
        expert = baseexpert.LookUpExpert(accuracy_value=accu_parameters, cost_function=cost_model.cost_function)
    elif "neutral" in args.expert:
        exp_clf = LogisticRegression(penalty='l1', C=1)
        exp_clf.fit(data.test.bow, data.test.target)
        expert = baseexpert.NeutralityExpert(exp_clf, threshold=args.neutral_threshold,
                                         cost_function=cost_model.cost_function)
    else:
        raise Exception("We need a defined cost function options [fixed|log|linear]")

    exp_clf = LogisticRegression(penalty='l1', C=args.expert_penalty)
    exp_clf.fit(data.test.bow, data.test.target)
    print "\nExpert: %s " % expert
    coef = exp_clf.coef_[0]
    # print_features(coef, vct.get_feature_names())
    #### ACTIVE LEARNING SETTINGS
    step_size = args.step_size
    bootstrap_size = args.bootstrap
    evaluation_points = 200

    print("\nExperiment: step={0}, BT={1}, plot points={2}, fixk:{3}, minsize:{4}".format(step_size, bootstrap_size,
                                                                                          evaluation_points, args.fixk,
                                                                                          50))

    t0 = time.time()
    tac = []
    tau = []
    ### experiment starts
    for t in range(args.trials):
        trial_accu = []

        trial_aucs = []

        print "*" * 60
        print "Trial: %s" % t
        if  args.student in "unc":
            student = randomsampling.UncertaintyLearner(model=clf, accuracy_model=None, budget=args.budget, seed=t,
                                                        subpool=250)
        else:
            student = randomsampling.RandomSamplingLearner(model=clf, accuracy_model=None, budget=args.budget, seed=t)

        print "\nStudent: %s " % student
#.........这里部分代码省略.........
开发者ID:mramire8,项目名称:active,代码行数:103,代码来源:traintestLR.py


注:本文中的sklearn.datasets.base.Bunch.fixk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。