本文整理汇总了Java中uk.ac.man.cs.choif.extend.structure.ContextVector类的典型用法代码示例。如果您正苦于以下问题:Java ContextVector类的具体用法?Java ContextVector怎么用?Java ContextVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ContextVector类属于uk.ac.man.cs.choif.extend.structure包,在下文中一共展示了ContextVector类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: normalize
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of tokenised sentences,
* this function produces a list of stem frequency tables,
* or context vector
* Creation date: (11/05/99 03:43:34)
* @return uk.ac.man.cs.choif.extend.structure.ContextVector[]
* @param S java.lang.String[][]
*/
// modification par Christine Jacquin le 28/09/10
// avant: méthode private final static, maintenant=> protected
protected ContextVector[] normalize(final String[][] S) {
//System.out.println("on passe pas dans la bonne normalise");
WordList stopword = WordList.stopwordList();
ContextVector[] V = new ContextVector[S.length];
String token, stem;
for (int i=S.length; i-->0;) {
V[i] = new ContextVector();
for (int j=S[i].length; j-->0;) {
token = S[i][j].toLowerCase();
if (Punctuation.isWord(token) && !stopword.has(token)) {
stem = Stemmer.stemOf(token);
ContextVector.inc(stem, 1, V[i]);
}
}
}
return V;
}
示例2: segment
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of elementary text blocks
* (usually tokenised sentences), segment the document into n
* coherent topic segments. If n is -1, the algorithm
* will decide the appropriate number of segments by
* monitoring the rate of increase in segment density.
* Creation date: (11/05/99 05:55:46)
* @return String[][] A list of coherent topic segments
* @param String[] A list of elementary text blocks (usually sentences). Each block is a string of space separated tokens.
* @param n int Number of segments to make, if -1 then let the algorithm decide.
* @param s int Size of ranking mask, must be >= 3 and an odd number
*/
//modification par Christine Jacquin le 28/09/10
//avant: méthode final static , maintenant=> rien
public String[][][] segment(final String[][] document, final int n, final int s) {
Debugx.msg("C99", "Context vectors...");
ContextVector[] vectors = normalize(document);
Debugx.msg("C99", "Similarity matrix...");
/*System.out.println("context vector");
for (int i=0; i<vectors.length;i++){
System.out.println(vectors[i]);
}
*/
float[][] sim = similarity(vectors);
vectors = null;
Debugx.msg("C99", "Rank matrix (" + s + "x" + s + " rank mask)...");
float[][] rank = rank(sim, s);
sim = null;
Debugx.msg("C99", "Sum of rank matrix...");
float[][] sum = sum(rank);
rank = null;
Debugx.msg("C99", "Divisive clustering (" + (n==-1 ? "automatic" : "user") + " termination)...");
int[] B = Arrayx.sortAsc(boundaries(sum, n));
sum = null;
Debugx.msg("C99", "Found " + (B.length+1) + " segments...");
return split(document, B);
}
示例3: normalize
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/** Redefine the method normalize of the super class C99
* we write the same code excepted that we use the result of the WST and Snowball component
* to obtain the tokens and their associated stem (stored in the tabTokenStem object)
* The S parameter is not be used but is coming from the normalize method which is inherited
* The tabTokenStem array replaces S in the UIMA implementation
*/
public ContextVector[] normalize(final String[][] S) {
WordList stopword = WordList.stopwordList();
ContextVector[] v = new ContextVector[rawText.getSentenceArrayOfTokenFeatureArray().length];
String token, stem;
for (int i=rawText.getSentenceArrayOfTokenFeatureArray().length; i-->0;) {
v[i] = new ContextVector();
for (int j=rawText.getSentenceArrayOfTokenFeatureArray()[i].length; j-->0;) {
token = rawText.getSentenceArrayOfTokenFeatureArray()[i][j].getToken().toLowerCase();
// to take into account the behavior of isWord() method
// for this method,if a "-" is involved in the token, this one is a word
// so the "-" is a word to for this method
if (!token.equals("-")){
if (Punctuation.isWord(token) && !stopword.has(token)) {
stem = rawText.getSentenceArrayOfTokenFeatureArray()[i][j].getTokenFeature().toLowerCase();
ContextVector.inc(stem, 1, v[i]);
}
}
}
}
return v;
}
示例4: normalize
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of tokenised sentences,
* this function produces a list of stem frequency tables,
* or context vector
* Creation date: (11/05/99 03:43:34)
* @return uk.ac.man.cs.choif.extend.structure.ContextVector[]
* @param S java.lang.String[][]
*/
private final static ContextVector[] normalize(final String[][] S) {
WordList stopword = WordList.stopwordList();
ContextVector[] V = new ContextVector[S.length];
String token, stem;
for (int i=S.length; i-->0;) {
V[i] = new ContextVector();
for (int j=S[i].length; j-->0;) {
token = S[i][j].toLowerCase();
if (Punctuation.isWord(token) && !stopword.has(token)) {
stem = Stemmer.stemOf(token);
ContextVector.inc(stem, 1, V[i]);
}
}
}
return V;
}
示例5: segment
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of elementary text blocks
* (usually tokenised sentences), segment the document into n
* coherent topic segments. If n is -1, the algorithm
* will decide the appropriate number of segments by
* monitoring the rate of increase in segment density.
* Creation date: (11/05/99 05:55:46)
* @return String[][] A list of coherent topic segments
* @param String[] A list of elementary text blocks (usually sentences). Each block is a string of space separated tokens.
* @param n int Number of segments to make, if -1 then let the algorithm decide.
* @param s int Size of ranking mask, must be >= 3 and an odd number
*/
public final static String[][][] segment(final String[][] document, final int n, final int s) {
Debugx.msg("C99", "Context vectors...");
ContextVector[] vectors = normalize(document);
Debugx.msg("C99", "Similarity matrix...");
float[][] sim = similarity(vectors);
vectors = null;
Debugx.msg("C99", "Rank matrix (" + s + "x" + s + " rank mask)...");
float[][] rank = rank(sim, s);
sim = null;
Debugx.msg("C99", "Sum of rank matrix...");
float[][] sum = sum(rank);
rank = null;
Debugx.msg("C99", "Divisive clustering (" + (n==-1 ? "automatic" : "user") + " termination)...");
int[] B = Arrayx.sortAsc(boundaries(sum, n));
sum = null;
Debugx.msg("C99", "Found " + (B.length+1) + " segments...");
return split(document, B);
}
示例6: segmentW
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of elementary text blocks
* (usually tokenised sentences), segment the document into n
* coherent topic segments. If n is -1, the algorithm
* will decide the appropriate number of segments by
* monitoring the rate of increase in segment density.
* Creation date: (11/05/99 05:55:46)
* @return String[][] A list of coherent topic segments
* @param String[] A list of elementary text blocks (usually sentences). Each block is a string of space separated tokens.
* @param n int Number of segments to make, if -1 then let the algorithm decide.
* @param s int Size of ranking mask, must be >= 3 and an odd number
*/
public final static String[][][] segmentW(final String[][] document, final int n, final int s) {
Debugx.msg("C99", "Context vectors...");
ContextVector tf = new ContextVector();
ContextVector[] vectors = normalize(document, tf);
Debugx.msg("C99", "Similarity matrix...");
EntropyVector ev = new EntropyVector(tf);
float[][] sim = similarity(vectors, ev);
vectors = null;
Debugx.msg("C99", "Rank matrix (" + s + "x" + s + " rank mask)...");
float[][] rank = rank(sim, s);
sim = null;
Debugx.msg("C99", "Sum of rank matrix...");
float[][] sum = sum(rank);
rank = null;
Debugx.msg("C99", "Divisive clustering (" + (n==-1 ? "automatic" : "user") + " termination)...");
int[] B = Arrayx.sortAsc(boundaries(sum, n));
sum = null;
Debugx.msg("C99", "Found " + (B.length+1) + " segments...");
return split(document, B);
}
示例7: segmentW
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a document as a list of elementary text blocks
* (usually tokenised sentences), segment the document into n
* coherent topic segments. If n is -1, the algorithm
* will decide the appropriate number of segments by
* monitoring the rate of increase in segment density.
* Creation date: (11/05/99 05:55:46)
* @return String[][] A list of coherent topic segments
* @param String[] A list of elementary text blocks (usually sentences). Each block is a string of space separated tokens.
* @param n int Number of segments to make, if -1 then let the algorithm decide.
* @param s int Size of ranking mask, must be >= 3 and an odd number
*/
//modification par Christine Jacquin le 28/09/10
//avant: méthode final static , maintenant=> rien
public String[][][] segmentW(final String[][] document, final int n, final int s) {
Debugx.msg("C99", "Context vectors...");
ContextVector tf = new ContextVector();
ContextVector[] vectors = normalize(document, tf);
/* System.out.println("context vector");
for (int i=0; i<vectors.length;i++){
System.out.println(vectors[i]);
}
*/
Debugx.msg("C99", "Similarity matrix...");
EntropyVector ev = new EntropyVector(tf);
float[][] sim = similarity(vectors, ev);
vectors = null;
Debugx.msg("C99", "Rank matrix (" + s + "x" + s + " rank mask)...");
float[][] rank = rank(sim, s);
sim = null;
Debugx.msg("C99", "Sum of rank matrix...");
float[][] sum = sum(rank);
rank = null;
Debugx.msg("C99", "Divisive clustering (" + (n==-1 ? "automatic" : "user") + " termination)...");
int[] B = Arrayx.sortAsc(boundaries(sum, n));
sum = null;
Debugx.msg("C99", "Found " + (B.length+1) + " segments...");
return split(document, B);
}
示例8: similarity
import uk.ac.man.cs.choif.extend.structure.ContextVector; //导入依赖的package包/类
/**
* Given a list fo context vector, compute the similarity matrix
* Creation date: (11/05/99 04:45:51)
* @return float[][]
* @param v uk.ac.man.cs.choif.extend.structure.ContextVector[]
*/
private final static float[][] similarity(final ContextVector[] v) {
float[][] S = new float[v.length][v.length];
for (int i=v.length; i-->0;) {
for (int j=i+1; j-->0;) {
S[i][j] = ContextVector.cos(v[i], v[j]);
S[j][i] = S[i][j];
}
}
return S;
}