本文整理汇总了Python中wekaexamples.helper.get_data_dir函数的典型用法代码示例。如果您正苦于以下问题:Python get_data_dir函数的具体用法?Python get_data_dir怎么用?Python get_data_dir使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_data_dir函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
"""
Just runs some example code.
"""
# load ARFF file
helper.print_title("Loading ARFF file")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff")
print(str(data))
# load CSV file
helper.print_title("Loading CSV file")
loader = Loader(classname="weka.core.converters.CSVLoader")
data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv")
print(str(data))
# load directory
# changes this to something sensible
text_dir = "/some/where"
if os.path.exists(text_dir) and os.path.isdir(text_dir):
helper.print_title("Loading directory: " + text_dir)
loader = TextDirectoryLoader(options=["-dir", text_dir, "-F", "-charset", "UTF-8"])
data = loader.load()
print(unicode(data))
示例2: main
def main():
"""
Just runs some example code.
"""
classifier = Classifier("weka.classifiers.trees.J48")
helper.print_title("Capabilities")
capabilities = classifier.capabilities
print(capabilities)
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_data = loader.load_file(iris_file)
iris_data.class_is_last()
data_capabilities = Capabilities.for_instances(iris_data)
print(data_capabilities)
print("classifier handles dataset: " + str(capabilities.supports(data_capabilities)))
# disable/enable
helper.print_title("Disable/Enable")
capability = Capability(member="UNARY_ATTRIBUTES")
capabilities.disable(capability)
capabilities.min_instances = 10
print("Removing: " + str(capability))
print(capabilities)
示例3: gridsearch
def gridsearch():
"""
Applies GridSearch to a dataset. GridSearch package must be not be installed, as the monolithic weka.jar
already contains this package.
"""
helper.print_title("GridSearch")
# load a dataset
fname = helper.get_data_dir() + os.sep + "bolts.arff"
helper.print_info("Loading train: " + fname)
loader = Loader(classname="weka.core.converters.ArffLoader")
train = loader.load_file(fname)
train.class_is_last()
# classifier
grid = GridSearch(options=["-sample-size", "100.0", "-traversal", "ROW-WISE", "-num-slots", "1", "-S", "1"])
grid.evaluation = "CC"
grid.y = {"property": "kernel.gamma", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
grid.x = {"property": "C", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
cls = Classifier(
classname="weka.classifiers.functions.SMOreg",
options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"])
grid.classifier = cls
grid.build_classifier(train)
print("Model:\n" + str(grid))
print("\nBest setup:\n" + grid.best.to_commandline())
示例4: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
full = loader.load_file(iris_file)
full.class_is_last()
# remove class attribute
data = Instances.copy_instances(full)
data.no_class()
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print("done")
# classes to clusters
evl = ClusterEvaluation()
evl.set_model(clusterer)
evl.test_model(full)
helper.print_title("Cluster results")
print(evl.cluster_results)
helper.print_title("Classes to clusters")
print(evl.classes_to_clusters)
示例5: main
def main(args):
"""
Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
evaluates the built model on the test set.
:param args: the commandline arguments (optional, can be dataset filename)
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# generate train/test split of randomized data
train, test = data.train_test_split(66.0, Random(1))
# build classifier
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
print(cls)
# evaluate
evl = Evaluation(train)
evl.test_model(cls, test)
print(evl.summary())
示例6: main
def main():
"""
Just runs some example code.
"""
"""
Plots a dataset.
"""
# setup the flow
helper.print_title("Plot dataset")
iris = helper.get_data_dir() + os.sep + "iris.arff"
flow = Flow(name="plot dataset")
filesupplier = FileSupplier()
filesupplier.config["files"] = [iris]
flow.actors.append(filesupplier)
loaddataset = LoadDataset()
flow.actors.append(loaddataset)
branch = Branch()
flow.actors.append(branch)
seq = Sequence(name="matrix plot")
branch.actors.append(seq)
mplot = MatrixPlot()
mplot.config["percent"] = 50.0
mplot.config["wait"] = False
seq.actors.append(mplot)
seq = Sequence(name="line plot")
branch.actors.append(seq)
copy = Copy()
seq.actors.append(copy)
flter = Filter()
flter.config["setup"] = filters.Filter(
classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flter.config["keep_relationname"] = True
seq.actors.append(flter)
lplot = LinePlot()
lplot.config["percent"] = 50.0
lplot.config["wait"] = True
seq.actors.append(lplot)
# run the flow
msg = flow.setup()
if msg is None:
print("\n" + flow.tree + "\n")
msg = flow.execute()
if msg is not None:
print("Error executing flow:\n" + msg)
else:
print("Error setting up flow:\n" + msg)
flow.wrapup()
flow.cleanup()
示例7: main
def main():
"""
Shows how to use the CostSensitiveClassifier.
"""
# load a dataset
data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# classifier
classifier = SingleClassifierEnhancer(
classname="weka.classifiers.meta.CostSensitiveClassifier",
options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
classifier.classifier = base
folds = 10
evaluation = Evaluation(data)
evaluation.crossvalidate_model(classifier, data, folds, Random(1))
print("")
print("=== Setup ===")
print("Classifier: " + classifier.to_commandline())
print("Dataset: " + data.relationname)
print("")
print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
示例8: load_incremental
def load_incremental():
"""
Loads a dataset incrementally.
"""
# setup the flow
helper.print_title("Load dataset (incremental)")
iris = helper.get_data_dir() + os.sep + "iris.arff"
flow = Flow(name="load dataset")
filesupplier = FileSupplier()
filesupplier.config["files"] = [iris]
flow.actors.append(filesupplier)
loaddataset = LoadDataset()
loaddataset.config["incremental"] = True
flow.actors.append(loaddataset)
console = Console()
flow.actors.append(console)
# run the flow
msg = flow.setup()
if msg is None:
msg = flow.execute()
if msg is not None:
print("Error executing flow:\n" + msg)
else:
print("Error setting up flow:\n" + msg)
flow.wrapup()
flow.cleanup()
示例9: load_custom_loader
def load_custom_loader():
"""
Loads a dataset using a custom loader.
"""
# setup the flow
helper.print_title("Load dataset (custom loader)")
iris = helper.get_data_dir() + os.sep + "iris.csv"
flow = Flow(name="load dataset")
filesupplier = FileSupplier()
filesupplier.config["files"] = [iris]
flow.actors.append(filesupplier)
loaddataset = LoadDataset()
loaddataset.config["incremental"] = False
loaddataset.config["use_custom_loader"] = True
loaddataset.config["custom_loader"] = Loader(classname="weka.core.converters.CSVLoader")
flow.actors.append(loaddataset)
console = Console()
flow.actors.append(console)
# run the flow
msg = flow.setup()
if msg is None:
msg = flow.execute()
if msg is not None:
print("Error executing flow:\n" + msg)
else:
print("Error setting up flow:\n" + msg)
flow.wrapup()
flow.cleanup()
示例10: main
def main(args):
"""
Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
:param args: the commandline arguments
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_file, incremental=True)
data.class_is_last()
# classifier
nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
nb.build_classifier(data)
# train incrementally
for inst in loader:
nb.update_classifier(inst)
print(nb)
示例11: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
# remove class attribute
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print(clusterer)
# cluster data
helper.print_info("Clustering data")
for index, inst in enumerate(data):
cl = clusterer.cluster_instance(inst)
dist = clusterer.distribution_for_instance(inst)
print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist))
示例12: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_data = loader.load_file(iris_file)
iris_data.class_is_last()
# train classifier
classifier = Classifier("weka.classifiers.trees.J48")
classifier.build_classifier(iris_data)
# save and read object
helper.print_title("I/O: single object")
outfile = tempfile.gettempdir() + os.sep + "j48.model"
serialization.write(outfile, classifier)
model = Classifier(jobject=serialization.read(outfile))
print(model)
# save classifier and dataset header (multiple objects)
helper.print_title("I/O: single object")
serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
objects = serialization.read_all(outfile)
for i, obj in enumerate(objects):
helper.print_info("Object #" + str(i+1) + ":")
if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
obj = Instances(jobject=obj)
elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
obj = Classifier(jobject=obj)
print(obj)
示例13: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
# remove class attribute
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print(clusterer)
helper.print_info("Evaluating on data")
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print("# clusters: " + str(evaluation.num_clusters))
print("log likelihood: " + str(evaluation.log_likelihood))
print("cluster assignments:\n" + str(evaluation.cluster_assignments))
plc.plot_cluster_assignments(evaluation, data, inst_no=True)
# using a filtered clusterer
helper.print_title("Filtered clusterer")
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
fclusterer = FilteredClusterer()
fclusterer.clusterer = clusterer
fclusterer.filter = remove
fclusterer.build_clusterer(data)
print(fclusterer)
# load a dataset incrementally and build clusterer incrementally
helper.print_title("Incremental clusterer")
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
clusterer = Clusterer("weka.clusterers.Cobweb")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
remove.inputformat(iris_inc)
iris_filtered = remove.outputformat()
clusterer.build_clusterer(iris_filtered)
for inst in loader:
remove.input(inst)
inst_filtered = remove.output()
clusterer.update_clusterer(inst_filtered)
clusterer.update_finished()
print(clusterer.to_commandline())
print(clusterer)
print(clusterer.graph)
plg.plot_dot_graph(clusterer.graph)
示例14: main
def main(args):
"""
Trains Apriori on the specified dataset (uses vote UCI dataset if no dataset specified).
:param args: the commandline arguments
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# build Apriori, using last attribute as class attribute
apriori = Associator(classname="weka.associations.Apriori", options=["-c", "-1"])
apriori.build_associations(data)
print(str(apriori))
# iterate association rules (low-level)
helper.print_info("Rules (low-level)")
# make the underlying rules list object iterable in Python
rules = javabridge.iterate_collection(apriori.jwrapper.getAssociationRules().getRules().o)
for i, r in enumerate(rules):
# wrap the Java object to make its methods accessible
rule = JWrapper(r)
print(str(i+1) + ". " + str(rule))
# output some details on rule
print(" - consequence support: " + str(rule.getConsequenceSupport()))
print(" - premise support: " + str(rule.getPremiseSupport()))
print(" - total support: " + str(rule.getTotalSupport()))
print(" - total transactions: " + str(rule.getTotalTransactions()))
# iterate association rules (high-level)
helper.print_info("Rules (high-level)")
print("can produce rules? " + str(apriori.can_produce_rules()))
print("rule metric names: " + str(apriori.rule_metric_names))
rules = apriori.association_rules()
if rules is not None:
print("producer: " + rules.producer)
print("# rules: " + str(len(rules)))
for i, rule in enumerate(rules):
print(str(i+1) + ". " + str(rule))
# output some details on rule
print(" - consequence support: " + str(rule.consequence_support))
print(" - consequence: " + str(rule.consequence))
print(" - premise support: " + str(rule.premise_support))
print(" - premise: " + str(rule.premise))
print(" - total support: " + str(rule.total_support))
print(" - total transactions: " + str(rule.total_transactions))
print(" - metric names: " + str(rule.metric_names))
print(" - metric values: " + str(rule.metric_values))
print(" - metric value 'Confidence': " + str(rule.metric_value('Confidence')))
print(" - primary metric name: " + str(rule.primary_metric_name))
print(" - primary metric value: " + str(rule.primary_metric_value))
示例15: incremental
def incremental():
"""
Just runs some example code.
"""
"""
Loads/filters a dataset incrementally.
"""
# setup the flow
helper.print_title("Filter datasets (incrementally)")
iris = helper.get_data_dir() + os.sep + "iris.arff"
anneal = helper.get_data_dir() + os.sep + "anneal.arff"
flow = Flow(name="filter datasets (incrementally)")
filesupplier = FileSupplier()
filesupplier.config["files"] = [iris, anneal]
flow.actors.append(filesupplier)
loaddataset = LoadDataset()
loaddataset.config["incremental"] = True
flow.actors.append(loaddataset)
flter = Filter()
flter.config["setup"] = filters.Filter(
classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1"])
flter.config["keep_relationname"] = True
flow.actors.append(flter)
console = Console()
flow.actors.append(console)
# run the flow
msg = flow.setup()
if msg is None:
print("\n" + flow.tree + "\n")
msg = flow.execute()
if msg is not None:
print("Error executing flow:\n" + msg)
else:
print("Error setting up flow:\n" + msg)
flow.wrapup()
flow.cleanup()