本文整理汇总了C#中Tokenizer.GetTokens方法的典型用法代码示例。如果您正苦于以下问题:C# Tokenizer.GetTokens方法的具体用法?C# Tokenizer.GetTokens怎么用?C# Tokenizer.GetTokens使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Tokenizer
的用法示例。
在下文中一共展示了Tokenizer.GetTokens方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: Test
public void Test()
{
var tokenizer = new Tokenizer();
var inputText = "first second,--214 third";
var actual = tokenizer.GetTokens(inputText);
var expected = new[] { "first", "second", "third" };
Assert.That(actual, Is.EquivalentTo(expected));
}
示例2: Tokenizer_SimpleExpr_Parsed
public void Tokenizer_SimpleExpr_Parsed()
{
var input = "+ 2 3";
var str = new[] {"+", "2", "3"};
var tokenizer = new Tokenizer();
var tokens = tokenizer.GetTokens(input);
for (var i = 0; i < str.Length; i++)
{
Assert.AreEqual(str[i], tokens[i]);
}
}
示例3: Tokenizer_Complex_Parsed
public void Tokenizer_Complex_Parsed()
{
var input = "(+ (* 4 8 9 (/ 3 6)) 3)";
var result = new string[]{"(", "+", "(","*", "4", "8", "9", "(", "/", "3", "6", ")", ")", "3", ")"};
var tokenizer = new Tokenizer();
var tokens = tokenizer.GetTokens(input);
for (var i = 0; i < result.Length; i++)
{
Assert.AreEqual(result[i], tokens[i]);
}
}
示例4: GetConfusions
private void GetConfusions(Func<string, IEnumerable<Tuple<LanguageInfo, double>>> identify, string method, HashSet<string> mostCommonLanguagesArray)
{
var mostCommonLanguages = mostCommonLanguagesArray.Select((item, i) => new { item, i }).ToDictionary(_ => _.item, _ => _.i);
var windowLengthList =
Enumerable.Range(1, 10).Concat(new[] { 13, 16, 20, 23, 26, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200 }).ToArray();
mostCommonLanguagesArray
.Select(
lang =>
{
var text = File.ReadAllText(Path.Combine(_languageSamplesDir, lang + ".txt"));
var middle = text.Length/2;
var window = 1*1000*1000;
// take the middle of 1M characters length
return Tuple.Create(lang, text.Substring(Math.Max(middle - window/2, 0), Math.Min(window - 1, text.Length)));
})
.AsParallel()
.AsOrdered()
.SelectMany(
_ =>
{
var lang = _.Item1;
var sample = _.Item2;
var tokenizer = new Tokenizer();
//printfn "tokenizing"
var tokenNumber = 1000;
var tokens = tokenizer.GetTokens(sample).Skip(5).Take(tokenNumber).ToArray();
//printfn "tokenized"
return
windowLengthList
.Select(
windowLength =>
{
var windowCount = tokenNumber - windowLength + 1;
var samplePeriod = (int) Math.Ceiling(windowCount/100.0); //100 samples on average
var actuals =
tokens.Buffer(windowLength, samplePeriod)
.Select(tokenWindow => System.String.Join(" ", tokenWindow))
.Select(windowText => identify(windowText).First().Item1.Iso639_2T)
.ToArray();
return Tuple.Create(lang, windowLength, actuals);
});
})
.GroupBy(_ => _.Item2)
.ForEach(g =>
{
var windowLength = g.Key;
var experiment =
g.SelectMany(
_ =>
{
var lang = _.Item1;
var actuals = _.Item3;
return actuals
.Select(a => Tuple.Create(mostCommonLanguages[lang], mostCommonLanguages[a]));
})
.ToArray();
var matrix = new GeneralConfusionMatrix(
mostCommonLanguagesArray.Count, experiment.Select(_ => _.Item1).ToArray(), experiment.Select(_ => _.Item2).ToArray());
using (var writer = new StreamWriter(Path.Combine(_outputFolder, windowLength + "." + method + ".csv")))
{
PrintMatrix(writer, matrix, mostCommonLanguagesArray.ToArray());
}
});
}