本文整理汇总了C++中Codec::valid方法的典型用法代码示例。如果您正苦于以下问题:C++ Codec::valid方法的具体用法?C++ Codec::valid怎么用?C++ Codec::valid使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Codec
的用法示例。
在下文中一共展示了Codec::valid方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: guessTextCodec
static Codec * guessTextCodec( const EString & body )
{
// step 1. try iso-2022-jp. this goes first because it's so
// restrictive, and because 2022 strings also match the ascii and
// utf-8 tests.
if ( body[0] == 0x1B &&
( body[1] == '(' || body[1] == '$' ) &&
( body[2] == 'B' || body[2] == 'J' || body[2] == '@' ) ) {
Codec * c = new Iso2022JpCodec;
c->toUnicode( body );
if ( c->wellformed() )
return c;
}
// step 2. could it be pure ascii?
Codec * a = new AsciiCodec;
(void)a->toUnicode( body );
if ( a->wellformed() )
return a;
// some multibyte encodings have to go before utf-8, or else utf-8
// will match. this applies at least to iso-2002-jp, but may also
// apply to other encodings that use octet values 0x01-0x07f
// exclusively.
// step 3. does it look good as utf-8?
Codec * u = new Utf8Codec;
(void)u->toUnicode( body );
if ( u->wellformed() ) {
// if it's actually ascii, return that.
if ( a->valid() )
return a;
return u;
}
// step 4. guess a codec based on the bodypart content.
Codec * g = Codec::byString( body );
if ( g ) {
// this probably isn't necessary... but it doesn't hurt to be sure.
(void)g->toUnicode( body );
if ( g->wellformed() )
return g;
}
// step 5. is utf-8 at all plausible?
if ( u->valid() )
return u;
// should we use g here if valid()?
return 0;
}
示例2: if
Bodypart * Bodypart::parseBodypart( uint start, uint end,
const EString & rfc2822,
Header * h, Multipart * parent )
{
if ( rfc2822[start] == 13 )
start++;
if ( rfc2822[start] == 10 )
start++;
Bodypart * bp = new Bodypart;
bp->setParent( parent );
bp->setHeader( h );
EString body;
if ( end > start )
body = rfc2822.mid( start, end-start );
if ( !body.contains( '=' ) ) {
// sometimes people send c-t-e: q-p _and_ c-t-e: 7bit or 8bit.
// if they are equivalent we can accept it.
uint i = 0;
bool any = false;
HeaderField * f = 0;
while ( (f=h->field(HeaderField::ContentTransferEncoding,i)) != 0 ) {
if ( ((ContentTransferEncoding*)f)->encoding() == EString::QP )
any = true;
i++;
}
if ( any && i > 1 )
h->removeField( HeaderField::ContentTransferEncoding );
}
EString::Encoding e = EString::Binary;
ContentTransferEncoding * cte = h->contentTransferEncoding();
if ( cte )
e = cte->encoding();
if ( !body.isEmpty() ) {
if ( e == EString::Base64 || e == EString::Uuencode )
body = body.decoded( e );
else
body = body.crlf().decoded( e );
}
ContentType * ct = h->contentType();
if ( !ct ) {
switch ( h->defaultType() ) {
case Header::TextPlain:
h->add( "Content-Type", "text/plain" );
break;
case Header::MessageRfc822:
h->add( "Content-Type", "message/rfc822" );
break;
}
ct = h->contentType();
}
if ( ct->type() == "text" ) {
bool specified = false;
bool unknown = false;
Codec * c = 0;
if ( ct ) {
EString csn = ct->parameter( "charset" );
if ( csn.lower() == "default" )
csn = "";
if ( !csn.isEmpty() )
specified = true;
c = Codec::byName( csn );
if ( !c )
unknown = true;
if ( c && c->name().lower() == "us-ascii" ) {
// Some MTAs appear to say this in case there is no
// Content-Type field - without checking whether the
// body actually is ASCII. If it isn't, we'd better
// call our charset guesser.
(void)c->toUnicode( body );
if ( !c->valid() )
specified = false;
// Not pretty.
}
}
if ( !c )
c = new AsciiCodec;
bp->d->hasText = true;
bp->d->text = c->toUnicode( body.crlf() );
if ( c->name() == "GB2312" || c->name() == "ISO-2022-JP" ||
c->name() == "KS_C_5601-1987" ) {
// undefined code point usage in GB2312 spam is much too
// common. (GB2312 spam is much too common, but that's
// another matter.) Gb2312Codec turns all undefined code
// points into U+FFFD, so here, we can take the unicode
// form and say it's the canonical form. when a client
// later reads the message, it gets the text in unicode,
// including U+FFFD.
bool bad = !c->valid();
// the header may contain some unencoded gb2312. we bang
// it by hand, ignoring errors.
//.........这里部分代码省略.........
示例3: tag
static Codec * guessHtmlCodec( const EString & body )
{
// Let's see if the general function has something for us.
Codec * guess = guessTextCodec( body );
// HTML prescribes that 8859-1 is the default. Let's see if 8859-1
// works.
if ( !guess ) {
guess = new Iso88591Codec;
(void)guess->toUnicode( body );
if ( !guess->valid() )
guess = 0;
}
if ( !guess ||
( !guess->wellformed() &&
( guess->name() == "ISO-8859-1" ||
guess->name() == "ISO-8859-15" ) ) ) {
// Some people believe that Windows codepage 1252 is
// ISO-8859-1. Let's see if that works.
Codec * windoze = new Cp1252Codec;
(void)windoze->toUnicode( body );
if ( windoze->wellformed() )
guess = windoze;
}
// Some user-agents add a <meta http-equiv="content-type"> instead
// of the Content-Type field. Maybe that exists? And if it exists,
// is it more likely to be correct than our guess above?
EString b = body.lower().simplified();
int i = 0;
while ( i >= 0 ) {
EString tag( "<meta http-equiv=\"content-type\" content=\"" );
i = b.find( tag, i );
if ( i >= 0 ) {
i = i + tag.length();
int j = i;
while ( j < (int)b.length() && b[j] != '"' )
j++;
HeaderField * hf
= HeaderField::create( "Content-Type",
b.mid( i, j-i ) );
EString cs = ((MimeField*)hf)->parameter( "charset" );
Codec * meta = 0;
if ( !cs.isEmpty() )
meta = Codec::byName( cs );
UString m;
if ( meta )
m = meta->toUnicode( body );
UString g;
if ( guess )
g = guess->toUnicode( body );
if ( meta &&
( ( !m.isEmpty() && m == g ) ||
( meta->wellformed() &&
( !guess || !guess->wellformed() ) ) ||
( meta->valid() && !guess ) ||
( meta->valid() && guess &&
guess->name() == "ISO-8859-1" ) ||
( meta->valid() && guess && !guess->valid() ) ) &&
meta->toUnicode( b ).ascii().contains( tag ) ) {
guess = meta;
}
}
}
return guess;
}