当前位置: 首页>>代码示例>>Java>>正文


Java Instance.setSource方法代码示例

本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance.setSource方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.setSource方法的具体用法?Java Instance.setSource怎么用?Java Instance.setSource使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在edu.umass.cs.mallet.base.types.Instance的用法示例。


在下文中一共展示了Instance.setSource方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	carrier.setSource (carrier.getData());
	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:6,代码来源:SaveDataInSource.java

示例2: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	if (!(carrier.getData() instanceof CharSequence))
		throw new ClassCastException ("carrier.data is a " + carrier.getData().getClass().getName() +
																 " not a CharSequence");
	TokenSequence dataTokens = new TokenSequence ();
		TokenSequence targetTokens = new TokenSequence ();
	CharSequence string = (CharSequence) carrier.getData();
	String tag = backgroundTag;
	String nextTag = backgroundTag;
	Matcher m = sgmlPattern.matcher (string);
	int textStart = 0;
	int textEnd = 0;
	int nextStart = 0;
	boolean done = false;

	while (!done) {
		done = !findNextValidMatch (m);
		if (done)
			textEnd = string.length()-1;
		else {
			String sgml = m.group();
			int groupCount = m.groupCount();
			if (sgml.charAt(1) == '/')
				nextTag = backgroundTag;
			else{
				nextTag = m.group(0);
				nextTag = sgml.substring(1, sgml.length()-1);
			}
			nextStart = m.end();
			textEnd = m.start();
		}
		if (textEnd - textStart > 0) {
			lexer.setCharSequence (string.subSequence (textStart, textEnd));
			while (lexer.hasNext()) {
				dataTokens.add (new Token ((String) lexer.next()));
				targetTokens.add (new Token (tag));
			}
		}
		textStart = nextStart;
		tag = nextTag;
	}
	carrier.setData(dataTokens);
	carrier.setTarget(targetTokens);

	carrier.setSource(dataTokens);

	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:50,代码来源:SelectiveSGML2TokenSequence.java

示例3: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
   CharSequence string = (CharSequence) carrier.getData();
	StringTokenization dataTokens = new StringTokenization (string);
	TokenSequence targetTokens = new TokenSequence ();
	String tag = backgroundTag;
	String nextTag = backgroundTag;
	Matcher m = sgmlPattern.matcher (string);
	int textStart = 0;
	int textEnd = 0;
	int nextStart = 0;
	boolean done = false;

	logger.fine(sgmlPattern.pattern());
	logger.finer(string.toString());

	while (!done) {
		done = !(m.find());
		if (done)
			textEnd = string.length(); // culotta: changed from string.length()-1 
		else {
			String sgml = m.group();
			logger.finer ("SGML = "+sgml);

			int groupCount = m.groupCount();
			logger.finer(Integer.toString (groupCount));

			if (sgml.charAt(1) == '/')
				nextTag = backgroundTag;
			else{
				//nextTag = m.group(0).intern();
				nextTag = sgml.substring(1, sgml.length()-1);
			}
			logger.finer("nextTag: " + nextTag);

			nextStart = m.end();  // m.end returns one beyond index of last match char
			textEnd = m.start();  // String.subtring does not include index end
			logger.finer ("Text start/end "+textStart+" "+textEnd);
		}
		if (textEnd - textStart > 0) {
			logger.finer ("Tag = "+tag);
			logger.finer ("Target = "+string.subSequence (textStart, textEnd));
			lexer.setCharSequence (string.subSequence (textStart, textEnd));
			while (lexer.hasNext()) {
         lexer.next ();
         int tokStart = textStart + lexer.getStartOffset ();
         int tokEnd = textStart + lexer.getEndOffset ();
         dataTokens.add (new StringSpan (string, tokStart, tokEnd));
				targetTokens.add (new Token (tag));
			}
		}
		textStart = nextStart;
		tag = nextTag;
	}
	carrier.setData(dataTokens);
	carrier.setTarget(targetTokens);

   if (saveSource)
	  carrier.setSource(dataTokens);

	return carrier;
}
 
开发者ID:clulab,项目名称:reach-banner,代码行数:63,代码来源:SGML2TokenSequence.java

示例4: pipe

import edu.umass.cs.mallet.base.types.Instance; //导入方法依赖的package包/类
public Instance pipe (Instance carrier)
{
	String sentenceLines = (String) carrier.getData();
	String[] tokens = sentenceLines.split ("\n");
	TokenSequence data = new TokenSequence (tokens.length);
	LabelSequence target = new LabelSequence ((LabelAlphabet)getTargetAlphabet(), tokens.length);
	StringBuffer source = saveSource ? new StringBuffer() : null;

	String prevLabel = "NOLABEL";
	String word, label;
	String[] words = new String[tokens.length];
	for (int i = 0; i < tokens.length; i++) {
		if (tokens[i].length() != 0) {
			String[] features = tokens[i].split ("\t");
			if (features.length != 2)
				throw new IllegalStateException ("Line \""+sentenceLines + " " + tokens[i] +"\" doesn't have 2 elements");
			word = features[0]; // .toLowerCase();
			label = features[1];
		} else {
			word = "-<S>-";
			label = "O";
		}

		words[i] = word;
		
		if(label.startsWith("I-malignancy-type"))
			label = "I-malignancy-type";
		else if(label.startsWith("B-malignancy-type"))
			label = "B-malignancy-type";
		else
			label = "O";
		
		// Transformations
		if (doDigitCollapses) {
			if (word.matches ("19\\d\\d"))
				word = "<YEAR>";
			else if (word.matches ("19\\d\\ds"))
				word = "<YEARDECADE>";
			else if (word.matches ("19\\d\\d-\\d+"))
				word = "<YEARSPAN>";
			else if (word.matches ("\\d+\\\\/\\d"))
				word = "<FRACTION>";
			else if (word.matches ("\\d[\\d,\\.]*"))
				word = "<DIGITS>";
			else if (word.matches ("19\\d\\d-\\d\\d-\\d--d"))
				word = "<DATELINEDATE>";
			else if (word.matches ("19\\d\\d-\\d\\d-\\d\\d"))
				word = "<DATELINEDATE>";
			else if (word.matches (".*-led"))
				word = "<LED>";
			else if (word.matches (".*-sponsored"))
				word = "<LED>";
		}

		if (doDowncasing)
			word = word.toLowerCase();
		Token token = new Token (word);
		
		// Append
		data.add (token);
		//target.add (bigramLabel);
		target.add (label);
		//System.out.print (label + ' ');
		if (saveSource) {
			source.append (word); source.append (" ");
			//source.append (bigramLabel); source.append ("\n");
			source.append (label); source.append ("\n");
		}

	}
	//System.out.println ("");
	carrier.setData(data);
	carrier.setTarget(target);
	carrier.setName(words);
	if (saveSource)
		carrier.setSource(source);
	return carrier;
}
 
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:79,代码来源:MaligSentence2TokenSequence.java


注:本文中的edu.umass.cs.mallet.base.types.Instance.setSource方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。