当前位置: 首页>>代码示例>>Java>>正文


Java FileIterator类代码示例

本文整理汇总了Java中cc.mallet.pipe.iterator.FileIterator的典型用法代码示例。如果您正苦于以下问题:Java FileIterator类的具体用法?Java FileIterator怎么用?Java FileIterator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


FileIterator类属于cc.mallet.pipe.iterator包,在下文中一共展示了FileIterator类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public static void main(String[] args) {
	String htmldir = args[0];
	Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
			new CharSequenceRemoveHTML() });
	InstanceList list = new InstanceList(pipe);
	list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));

	for (int index = 0; index < list.size(); index++) {
		Instance inst = list.get(index);
		System.err.println(inst.getData());
	}

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:CharSequenceRemoveHTML.java

示例2: testThree

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public void testThree ()
{
	InstanceList il = new InstanceList (
		new SerialPipes(new Pipe[] {
			new Target2Label(),
			new CharSequence2TokenSequence(),
			new TokenSequenceLowercase(),
			new TokenSequenceRemoveStopwords(),
			new TokenSequence2FeatureSequence(),
			new FeatureSequence2FeatureVector()
		}));
	Iterator<Instance> pi = new FileIterator(new File("foo/bar"), null, Pattern.compile("^([^/]*)/"));
	il.addThruPipe (pi);
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:15,代码来源:TestRainbowStyle.java

示例3: testIncrementallyTrainedGrowingAlphabets

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public void testIncrementallyTrainedGrowingAlphabets()
{
	System.out.println("testIncrementallyTrainedGrowingAlphabets");
	String[]    args = new String[] {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	File[] directories = new File[args.length];
	for (int i = 0; i < args.length; i++)
		directories[i] = new File (args[i]);

	SerialPipes instPipe =
		// MALLET pipeline for converting instances to feature vectors
		new SerialPipes(new Pipe[] {
				new Target2Label(),
				new Input2CharSequence(),
				//SKIP_HEADER only works for Unix
				//new CharSubsequence(CharSubsequence.SKIP_HEADER),
				new CharSequence2TokenSequence(),
				new TokenSequenceLowercase(),
				new TokenSequenceRemoveStopwords(),
				new TokenSequence2FeatureSequence(),
				new FeatureSequence2FeatureVector() });

	InstanceList instList = new InstanceList(instPipe);
	instList.addThruPipe(new
			FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

	System.out.println("Training 1");
	NaiveBayesTrainer trainer = new NaiveBayesTrainer();
	NaiveBayes classifier = trainer.trainIncremental(instList);

	//instList.getDataAlphabet().stopGrowth();

	// incrementally train...
	String[] t2directories = {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	System.out.println("data alphabet size " + instList.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
	InstanceList instList2 = new InstanceList(instPipe);
	instList2.addThruPipe(new
			FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));

	System.out.println("Training 2");

	System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

	NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:54,代码来源:TestNaiveBayes.java

示例4: testIncrementallyTrained

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public void testIncrementallyTrained()
{
	System.out.println("testIncrementallyTrained");
	String[]    args = new String[] {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	File[] directories = new File[args.length];
	for (int i = 0; i < args.length; i++)
		directories[i] = new File (args[i]);

	SerialPipes instPipe =
		// MALLET pipeline for converting instances to feature vectors
		new SerialPipes(new Pipe[] {
				new Target2Label(),
				new Input2CharSequence(),
				//SKIP_HEADER only works for Unix
				//new CharSubsequence(CharSubsequence.SKIP_HEADER),
				new CharSequence2TokenSequence(),
				new TokenSequenceLowercase(),
				new TokenSequenceRemoveStopwords(),
				new TokenSequence2FeatureSequence(),
				new FeatureSequence2FeatureVector() });

	InstanceList instList = new InstanceList(instPipe);
	instList.addThruPipe(new
			FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

	System.out.println("Training 1");
	NaiveBayesTrainer trainer = new NaiveBayesTrainer();
	NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);

	Classification initialClassification = classifier.classify("Hello Everybody");
	Classification initial2Classification = classifier.classify("Goodbye now");
	System.out.println("Initial Classification = ");
	initialClassification.print();
	initial2Classification.print();
	System.out.println("data alphabet " + classifier.getAlphabet());
	System.out.println("label alphabet " + classifier.getLabelAlphabet());


	// incrementally train...
	String[] t2directories = {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	System.out.println("data alphabet size " + instList.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
	InstanceList instList2 = new InstanceList(instPipe);
	instList2.addThruPipe(new
			FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES));

	System.out.println("Training 2");

	System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

	NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);


}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:63,代码来源:TestNaiveBayes.java

示例5: testEmptyStringBug

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public void testEmptyStringBug()
{
	System.out.println("testEmptyStringBug");
	String[]    args = new String[] {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/a",
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	File[] directories = new File[args.length];
	for (int i = 0; i < args.length; i++)
		directories[i] = new File (args[i]);

	SerialPipes instPipe =
		// MALLET pipeline for converting instances to feature vectors
		new SerialPipes(new Pipe[] {
				new Target2Label(),
				new Input2CharSequence(),
				//SKIP_HEADER only works for Unix
				//new CharSubsequence(CharSubsequence.SKIP_HEADER),
				new CharSequence2TokenSequence(),
				new TokenSequenceLowercase(),
				new TokenSequenceRemoveStopwords(),
				new TokenSequence2FeatureSequence(),
				new FeatureSequence2FeatureVector() });

	InstanceList instList = new InstanceList(instPipe);
	instList.addThruPipe(new
			FileIterator(directories, FileIterator.STARTING_DIRECTORIES));

	System.out.println("Training 1");
	NaiveBayesTrainer trainer = new NaiveBayesTrainer();
	NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList);

	Classification initialClassification = classifier.classify("Hello Everybody");
	Classification initial2Classification = classifier.classify("Goodbye now");
	System.out.println("Initial Classification = ");
	initialClassification.print();
	initial2Classification.print();
	System.out.println("data alphabet " + classifier.getAlphabet());
	System.out.println("label alphabet " + classifier.getLabelAlphabet());


	// test
	String[] t2directories = {
			"src/cc/mallet/classify/tests/NaiveBayesData/learn/b"
	};

	System.out.println("data alphabet size " + instList.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
	InstanceList instList2 = new InstanceList(instPipe);
	instList2.addThruPipe(new
			FileIterator(t2directories, FileIterator.STARTING_DIRECTORIES, true));

	System.out.println("Training 2");

	System.out.println("data alphabet size " + instList2.getDataAlphabet().size());
	System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

	NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
	Classification secondClassification = classifier.classify("Goodbye now");
	secondClassification.print();

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:64,代码来源:TestNaiveBayes.java

示例6: main

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public static void main (String[] args) throws IOException {
	CommandOption
								.setSummary(Text2Clusterings.class,
														"A tool to convert a list of text files to a Clusterings.");
	CommandOption.process(Text2Clusterings.class, args);

	if (classDirs.value.length == 0) {
		logger
					.warning("You must include --input DIR1 DIR2 ...' in order to specify a"
										+ "list of directories containing the documents for each class.");
		System.exit(-1);
	}

	Clustering[] clusterings = new Clustering[classDirs.value.length];
	int fi = 0;
	for (int i = 0; i < classDirs.value.length; i++) {
		Alphabet fieldAlph = new Alphabet();
		Alphabet valueAlph = new Alphabet();
		File directory = new File(classDirs.value[i]);
		File[] subdirs = getSubDirs(directory);
		Alphabet clusterAlph = new Alphabet();
		InstanceList instances = new InstanceList(new Noop());
		TIntArrayList labels = new TIntArrayList();
		for (int j = 0; j < subdirs.length; j++) {
			ArrayList<File> records = new FileIterator(subdirs[j]).getFileArray();
			int label = clusterAlph.lookupIndex(subdirs[j].toString());
			for (int k = 0; k < records.size(); k++) {
				if (fi % 100 == 0) System.out.print(fi);
				else if (fi % 10 == 0) System.out.print(".");
				if (fi % 1000 == 0 && fi > 0) System.out.println();
				System.out.flush();
				fi++;


				File record = records.get(k);
				labels.add(label);
				instances.add(new Instance(new Record(fieldAlph, valueAlph, parseFile(record)),
											new Integer(label), record.toString(),
											record.toString()));
			}
		}
		clusterings[i] =
				new Clustering(instances, subdirs.length, labels.toNativeArray());
	}

	logger.info("\nread " + fi + " objects in " + clusterings.length + " clusterings.");
	try {
		ObjectOutputStream oos =
				new ObjectOutputStream(new FileOutputStream(outputFile.value));
		oos.writeObject(new Clusterings(clusterings));
		oos.close();
	} catch (Exception e) {
		logger.warning("Exception writing clustering to file " + outputFile.value
										+ " " + e);
		e.printStackTrace();
	}

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:59,代码来源:Text2Clusterings.java

示例7: main

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public static void main (String[] args) throws IOException {
	CommandOption
								.setSummary(Text2Clusterings.class,
														"A tool to convert a list of text files to a Clusterings.");
	CommandOption.process(Text2Clusterings.class, args);

	if (classDirs.value.length == 0) {
		logger
					.warning("You must include --input DIR1 DIR2 ...' in order to specify a"
										+ "list of directories containing the documents for each class.");
		System.exit(-1);
	}

	Clustering[] clusterings = new Clustering[classDirs.value.length];
	int fi = 0;
	for (int i = 0; i < classDirs.value.length; i++) {
		Alphabet fieldAlph = new Alphabet();
		Alphabet valueAlph = new Alphabet();
		File directory = new File(classDirs.value[i]);
		File[] subdirs = getSubDirs(directory);
		Alphabet clusterAlph = new Alphabet();
		InstanceList instances = new InstanceList(new Noop());
		TIntArrayList labels = new TIntArrayList();
		for (int j = 0; j < subdirs.length; j++) {
			ArrayList<File> records = new FileIterator(subdirs[j]).getFileArray();
			int label = clusterAlph.lookupIndex(subdirs[j].toString());
			for (int k = 0; k < records.size(); k++) {
				if (fi % 100 == 0) System.out.print(fi);
				else if (fi % 10 == 0) System.out.print(".");
				if (fi % 1000 == 0 && fi > 0) System.out.println();
				System.out.flush();
				fi++;


				File record = records.get(k);
				labels.add(label);
				instances.add(new Instance(new Record(fieldAlph, valueAlph, parseFile(record)),
											new Integer(label), record.toString(),
											record.toString()));
			}
		}
		clusterings[i] =
				new Clustering(instances, subdirs.length, labels.toArray());
	}

	logger.info("\nread " + fi + " objects in " + clusterings.length + " clusterings.");
	try {
		ObjectOutputStream oos =
				new ObjectOutputStream(new FileOutputStream(outputFile.value));
		oos.writeObject(new Clusterings(clusterings));
		oos.close();
	} catch (Exception e) {
		logger.warning("Exception writing clustering to file " + outputFile.value
										+ " " + e);
		e.printStackTrace();
	}

}
 
开发者ID:iamxiatian,项目名称:wikit,代码行数:59,代码来源:Text2Clusterings.java

示例8: getInstanceList

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
/**
 * 
 * @param data_dir
 * @return
 */
public InstanceList getInstanceList(String data_dir) {
	InstanceList instances = new InstanceList(getPipe());
	instances.addThruPipe(new FileIterator(new File[] { new File(data_dir) }, FileIterator.STARTING_DIRECTORIES, true));
	return instances;
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:11,代码来源:MalletWrapper.java

示例9: main

import cc.mallet.pipe.iterator.FileIterator; //导入依赖的package包/类
public static void main (String[] args) throws IOException {
	CommandOption
								.setSummary(Text2Clusterings.class,
														"A tool to convert a list of text files to a Clusterings.");
	CommandOption.process(Text2Clusterings.class, args);

	if (classDirs.value.length == 0) {
		logger
					.warning("You must include --input DIR1 DIR2 ...' in order to specify a"
										+ "list of directories containing the documents for each class.");
		System.exit(-1);
	}

	Clustering[] clusterings = new Clustering[classDirs.value.length];
	int fi = 0;
	for (int i = 0; i < classDirs.value.length; i++) {
		Alphabet fieldAlph = new Alphabet();
		Alphabet valueAlph = new Alphabet();
		File directory = new File(classDirs.value[i]);
		File[] subdirs = getSubDirs(directory);
		Alphabet clusterAlph = new Alphabet();
		InstanceList instances = new InstanceList(new Noop());
		IntArrayList labels = new IntArrayList();
		for (int j = 0; j < subdirs.length; j++) {
			ArrayList<File> records = new FileIterator(subdirs[j]).getFileArray();
			int label = clusterAlph.lookupIndex(subdirs[j].toString());
			for (int k = 0; k < records.size(); k++) {
				if (fi % 100 == 0) System.out.print(fi);
				else if (fi % 10 == 0) System.out.print(".");
				if (fi % 1000 == 0 && fi > 0) System.out.println();
				System.out.flush();
				fi++;


				File record = records.get(k);
				labels.add(label);
				instances.add(new Instance(new Record(fieldAlph, valueAlph, parseFile(record)),
											new Integer(label), record.toString(),
											record.toString()));
			}
		}
		clusterings[i] =
				new Clustering(instances, subdirs.length, labels.toArray());
	}

	logger.info("\nread " + fi + " objects in " + clusterings.length + " clusterings.");
	try {
		ObjectOutputStream oos =
				new ObjectOutputStream(new FileOutputStream(outputFile.value));
		oos.writeObject(new Clusterings(clusterings));
		oos.close();
	} catch (Exception e) {
		logger.warning("Exception writing clustering to file " + outputFile.value
										+ " " + e);
		e.printStackTrace();
	}

}
 
开发者ID:cmoen,项目名称:mallet,代码行数:59,代码来源:Text2Clusterings.java


注:本文中的cc.mallet.pipe.iterator.FileIterator类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。