本文整理汇总了C#中Tokenizer.Tokenize方法的典型用法代码示例。如果您正苦于以下问题:C# Tokenizer.Tokenize方法的具体用法?C# Tokenizer.Tokenize怎么用?C# Tokenizer.Tokenize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Tokenizer
的用法示例。
在下文中一共展示了Tokenizer.Tokenize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: BadTokenPosition
public void BadTokenPosition()
{
Tokenizer tokenizer = new Tokenizer();
tokenizer.AddTokenMatcher(new IntegerLiteralMatcher());
tokenizer.AddTokenMatcher(new WhiteSpaceMatcher());
try
{
tokenizer.Tokenize("5 A");
}
catch (UnknownTokenException ex)
{
Assert.AreEqual(3, ex.Position.Column);
Assert.AreEqual(1, ex.Position.Line);
Assert.AreEqual("A", ex.Token);
}
try
{
tokenizer.Tokenize("5 4\r\n2\r\n X\r\n5");
}
catch (UnknownTokenException ex)
{
Assert.AreEqual(4, ex.Position.Column);
Assert.AreEqual(3, ex.Position.Line);
Assert.AreEqual("X",ex.Token);
}
}
示例2: NumericLiterals
public void NumericLiterals()
{
Tokenizer tokenizer = new Tokenizer();
tokenizer.AddTokenMatcher(new IntegerLiteralMatcher());
tokenizer.AddTokenMatcher(new DecimalLiteralMatcher());
tokenizer.AddTokenMatcher(new WhiteSpaceMatcher());
Token[] tokens;
tokens = tokenizer.Tokenize("10 10.0");
Assert.AreEqual(3,tokens.Length);
Assert.AreEqual("10",tokens[0].Text);
Assert.AreEqual("10.0",tokens[2].Text);
tokens = tokenizer.Tokenize("10m 10ul");
Assert.AreEqual(3, tokens.Length);
Assert.AreEqual("10m", tokens[0].Text);
Assert.AreEqual("10ul", tokens[2].Text);
tokens = tokenizer.Tokenize("10f 10l");
Assert.AreEqual(3, tokens.Length);
Assert.AreEqual("10f", tokens[0].Text);
Assert.AreEqual("10l", tokens[2].Text);
}
示例3: TestTokenizerBasicWhitespaceSeparatedStringsWithWhitespaceIncluded
public void TestTokenizerBasicWhitespaceSeparatedStringsWithWhitespaceIncluded()
{
var input = "one two three \n\n \t four\t\t\tfive\t\t\tsix";
Tokenizer<Token> tokenizer = new Tokenizer<Token>();
tokenizer.WhitespaceBehavior = WhitespaceBehavior.DelimitAndInclude;
var tokens = tokenizer.Tokenize(input);
var reconstructed = "";
bool? lastTokenWasWhitespace = null;
Token lastToken = null;
foreach (var token in tokens)
{
if(token.Value == null)
{
Assert.Fail("Unexpected null valued token");
}
else if(string.IsNullOrWhiteSpace(token.Value))
{
lastTokenWasWhitespace = true;
}
else
{
if(lastTokenWasWhitespace.HasValue && lastTokenWasWhitespace.Value == false)
{
Assert.Fail("2 consecutive non-whitespace tokens encountered.");
}
lastTokenWasWhitespace = false;
}
reconstructed += token.Value;
lastToken = token;
}
Assert.AreEqual(input, reconstructed);
}
示例4: Lexer
public Lexer(string text)
{
Tokenizer tokenizer = new Tokenizer(text);
_tokenList.AddRange(tokenizer.Tokenize());
_tokenList.Add(new Token { Kind = TokenKind.EndOfInput, RawInput = "<end>" });
}
示例5: Parse
public ICmdArgument[] Parse(string line)
{
var tokenizer = new Tokenizer();
var parser = new Parser();
var tokens = tokenizer.Tokenize(line);
return parser.Parse(tokens);
}
示例6: BadToken
public void BadToken()
{
Tokenizer tokenizer = new Tokenizer();
tokenizer.AddTokenMatcher(new IntegerLiteralMatcher());
tokenizer.AddTokenMatcher(new WhiteSpaceMatcher());
tokenizer.Tokenize("5 A");
}
示例7: TestStringLiteral
public void TestStringLiteral()
{
Tokenizer tokenizer = new Tokenizer();
tokenizer.AddTokenMatcher(new StringLiteralMatcher());
tokenizer.AddTokenMatcher(new WhiteSpaceMatcher());
tokenizer.AddTokenMatcher(new CharMatcher('+'));
Token[] tokens = tokenizer.Tokenize("\"test1\" + \"test2\"");
Assert.AreEqual(5, tokens.Length);
Assert.AreEqual("\"test1\"", tokens[0].Text);
Assert.AreEqual("\"test2\"", tokens[4].Text);
tokens = tokenizer.Tokenize("\"test1\" + \"test\\\"2\"");
Assert.AreEqual(5, tokens.Length);
Assert.AreEqual("\"test1\"", tokens[0].Text);
Assert.AreEqual("\"test\\\"2\"", tokens[4].Text);
}
示例8: Index
public void Index(string text)
{
if (text == "") return;
Tokenizer proc = new Tokenizer(new CharFilter(new StringFlow(text)));
List<string> words = proc.Tokenize();
foreach (string word in words)
{
if (word.Length >= minLength && (stopwords == null || !stopwords.Contains(word)))
index.Inc(word);
}
}
示例9: TestTokenizerQuoteHandling
public void TestTokenizerQuoteHandling()
{
var input = "Hi, my name is \"John Smith\"";
Tokenizer<Token> tokenizer = new Tokenizer<Token>();
tokenizer.WhitespaceBehavior = WhitespaceBehavior.DelimitAndExclude;
tokenizer.DoubleQuoteBehavior = DoubleQuoteBehavior.IncludeQuotedTokensAsStringLiterals;
var tokens = tokenizer.Tokenize(input);
AssertEqual(tokens, "Hi,", "my", "name", "is", "\"John Smith\"");
tokenizer.WhitespaceBehavior = WhitespaceBehavior.DelimitAndInclude;
tokenizer.DoubleQuoteBehavior = DoubleQuoteBehavior.NoSpecialHandling;
tokenizer.Delimiters.Add("\"");
tokens = tokenizer.Tokenize(input);
AssertEqual(tokens, "Hi,", " ", "my", " ", "name", " ", "is", " ", "\"", "John", " ", "Smith", "\"");
}
示例10: BadToken
public void BadToken()
{
try
{
Tokenizer tokenizer = new Tokenizer();
tokenizer.AddTokenMatcher(new IntegerLiteralMatcher());
tokenizer.AddTokenMatcher(new WhiteSpaceMatcher());
tokenizer.Tokenize("5 A");
Assert.Fail();
}
catch(UnknownTokenException ex)
{
}
}
示例11: TestTokenizerBasicWhitespaceSeparatedStrings
public void TestTokenizerBasicWhitespaceSeparatedStrings()
{
Tokenizer<Token> tokenizer = new Tokenizer<Token>();
tokenizer.WhitespaceBehavior = WhitespaceBehavior.DelimitAndExclude;
var tokens = tokenizer.Tokenize("one two three \n\n \t four\t\t\tfive\t\t\tsix");
Assert.AreEqual(6, tokens.Count);
Assert.AreEqual("one", tokens[0].Value);
Assert.AreEqual("two", tokens[1].Value);
Assert.AreEqual("three", tokens[2].Value);
Assert.AreEqual("four", tokens[3].Value);
Assert.AreEqual("five", tokens[4].Value);
Assert.AreEqual("six", tokens[5].Value);
Assert.AreEqual(1, tokens[0].Line);
Assert.AreEqual(1, tokens[0].Column);
Assert.AreEqual(1, tokens[1].Line);
Assert.AreEqual(5, tokens[1].Column);
Assert.AreEqual(3, tokens[3].Line);
Assert.AreEqual(9, tokens[3].Column);
}
示例12: CreateIntegerStream
private IEnumerable<int> CreateIntegerStream(Stream stream)
{
Tokenizer tokenizer = new Tokenizer();
var sequence = tokenizer.Tokenize(stream);
bool isNegative = false;
foreach (var token in sequence)
{
if (token.IsMinus())
{
isNegative = !isNegative;
}
else if (token.IsNumber())
{
yield return (isNegative ? -1 : 1) * token.AsInt();
}
else
{
isNegative = false;
}
}
}
示例13: TestWordExtract
public void TestWordExtract()
{
//setup
IUrlExpander expander = new UrlExpander();
Tokenizer tokenizer = new Tokenizer(expander);
List<Tweet> tweets = new List<Tweet>();
tweets.Add( new Tweet() {
text = "@steelers_munoz why can't you sleep? i'm starving, I haven't had dinner and it's 10.25 haha",
date_scanned = DateTime.Now.ToLongTimeString(), date_tweeted = DateTime.Now.ToLongTimeString(), english_similarity = 0.5, sample_reason = SampleReason.user_data.ToString(), screen_name = "utunga", twitter_id = 9128123123});
tweets.Add( new Tweet() {
text = "RT @OMGTeenQuotez: the bad experiences i been through made me stronger.. #OMGTeenQuotez",
date_scanned = DateTime.Now.ToLongTimeString(), date_tweeted = DateTime.Now.ToLongTimeString(), english_similarity = 0.5, sample_reason = SampleReason.user_data.ToString(), screen_name = "utunga", twitter_id = 9128123123});
tweets.Add( new Tweet() {
text = "the dog did it http://bit.ly/bkBS0o",
date_scanned = DateTime.Now.ToLongTimeString(), date_tweeted = DateTime.Now.ToLongTimeString(), english_similarity = 0.5, sample_reason = SampleReason.user_data.ToString(), screen_name = "utunga", twitter_id = 9128123123});
foreach (Tweet tw in tweets)
{
string screenName = tw.screen_name;
long? twitter_id = tw.twitter_id;
foreach (string text in tokenizer.Tokenize(tw.text))
{
Word word = new Word { screen_name = screenName, text = text, twitter_id = twitter_id };
if (word.IsEntity())
{
tw.AddEntity(word.text);
Console.Out.WriteLine("entity: " + word.text);
}
}
Console.Out.WriteLine("JSON:" + JSON.Serialize(tw));
}
}
示例14: TestTokenizerQuoteHandlingWithEscapeSequenceBlockingQuotes
public void TestTokenizerQuoteHandlingWithEscapeSequenceBlockingQuotes()
{
var input = "Hi, my name is \\\"John Smith\\\"";
Tokenizer<Token> tokenizer = new Tokenizer<Token>();
tokenizer.WhitespaceBehavior = WhitespaceBehavior.DelimitAndExclude;
tokenizer.DoubleQuoteBehavior = DoubleQuoteBehavior.IncludeQuotedTokensAsStringLiterals;
tokenizer.Delimiters.Add("\"");
var tokens = tokenizer.Tokenize(input);
AssertEqual(tokens, "Hi,", "my", "name", "is", "\"John", "Smith\"");
}
示例15: TokenizeFile
private IList<Token> TokenizeFile(CarbonFile file)
{
IList<Token> tokens;
var tokenizer = new Tokenizer();
using (new ProfileRegion("Tokenize"))
{
using (var stream = file.OpenRead())
{
using (var reader = new StreamReader(stream, Encoding.UTF8, false, 4096, true))
{
tokens = tokenizer.Tokenize(this.grammar, reader);
}
}
}
return tokens;
}