当前位置: 首页>>代码示例>>C++>>正文


C++ Codec::valid方法代码示例

本文整理汇总了C++中Codec::valid方法的典型用法代码示例。如果您正苦于以下问题:C++ Codec::valid方法的具体用法?C++ Codec::valid怎么用?C++ Codec::valid使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Codec的用法示例。


在下文中一共展示了Codec::valid方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: guessTextCodec

static Codec * guessTextCodec( const EString & body )
{
    // step 1. try iso-2022-jp. this goes first because it's so
    // restrictive, and because 2022 strings also match the ascii and
    // utf-8 tests.
    if ( body[0] == 0x1B &&
         ( body[1] == '(' || body[1] == '$' ) &&
         ( body[2] == 'B' || body[2] == 'J' || body[2] == '@' ) ) {
        Codec * c = new Iso2022JpCodec;
        c->toUnicode( body );
        if ( c->wellformed() )
            return c;
    }

    // step 2. could it be pure ascii?
    Codec * a = new AsciiCodec;
    (void)a->toUnicode( body );
    if ( a->wellformed() )
        return a;

    // some multibyte encodings have to go before utf-8, or else utf-8
    // will match. this applies at least to iso-2002-jp, but may also
    // apply to other encodings that use octet values 0x01-0x07f
    // exclusively.

    // step 3. does it look good as utf-8?
    Codec * u = new Utf8Codec;
    (void)u->toUnicode( body );
    if ( u->wellformed() ) {
        // if it's actually ascii, return that.
        if ( a->valid() )
            return a;
        return u;
    }

    // step 4. guess a codec based on the bodypart content.
    Codec * g = Codec::byString( body );
    if ( g ) {
        // this probably isn't necessary... but it doesn't hurt to be sure.
        (void)g->toUnicode( body );
        if ( g->wellformed() )
            return g;
    }

    // step 5. is utf-8 at all plausible?
    if ( u->valid() )
        return u;
    // should we use g here if valid()?

    return 0;
}
开发者ID:,项目名称:,代码行数:51,代码来源:

示例2: if

Bodypart * Bodypart::parseBodypart( uint start, uint end,
                                    const EString & rfc2822,
                                    Header * h, Multipart * parent )
{
    if ( rfc2822[start] == 13 )
        start++;
    if ( rfc2822[start] == 10 )
        start++;

    Bodypart * bp = new Bodypart;
    bp->setParent( parent );
    bp->setHeader( h );

    EString body;
    if ( end > start )
        body = rfc2822.mid( start, end-start );
    if ( !body.contains( '=' ) ) {
        // sometimes people send c-t-e: q-p _and_ c-t-e: 7bit or 8bit.
        // if they are equivalent we can accept it.
        uint i = 0;
        bool any = false;
        HeaderField * f = 0;
        while ( (f=h->field(HeaderField::ContentTransferEncoding,i)) != 0 ) {
            if ( ((ContentTransferEncoding*)f)->encoding() == EString::QP )
                any = true;
            i++;
        }
        if ( any && i > 1 )
            h->removeField( HeaderField::ContentTransferEncoding );
    }

    EString::Encoding e = EString::Binary;
    ContentTransferEncoding * cte = h->contentTransferEncoding();
    if ( cte )
        e = cte->encoding();
    if ( !body.isEmpty() ) {
        if ( e == EString::Base64 || e == EString::Uuencode )
            body = body.decoded( e );
        else
            body = body.crlf().decoded( e );
    }

    ContentType * ct = h->contentType();
    if ( !ct ) {
        switch ( h->defaultType() ) {
        case Header::TextPlain:
            h->add( "Content-Type", "text/plain" );
            break;
        case Header::MessageRfc822:
            h->add( "Content-Type", "message/rfc822" );
            break;
        }
        ct = h->contentType();
    }
    if ( ct->type() == "text" ) {
        bool specified = false;
        bool unknown = false;
        Codec * c = 0;

        if ( ct ) {
            EString csn = ct->parameter( "charset" );
            if ( csn.lower() == "default" )
                csn = "";
            if ( !csn.isEmpty() )
                specified = true;
            c = Codec::byName( csn );
            if ( !c )
                unknown = true;
            if ( c && c->name().lower() == "us-ascii" ) {
                // Some MTAs appear to say this in case there is no
                // Content-Type field - without checking whether the
                // body actually is ASCII. If it isn't, we'd better
                // call our charset guesser.
                (void)c->toUnicode( body );
                if ( !c->valid() )
                    specified = false;
                // Not pretty.
            }
        }

        if ( !c )
            c = new AsciiCodec;

        bp->d->hasText = true;
        bp->d->text = c->toUnicode( body.crlf() );

        if ( c->name() == "GB2312" || c->name() == "ISO-2022-JP" ||
             c->name() == "KS_C_5601-1987" ) {
            // undefined code point usage in GB2312 spam is much too
            // common. (GB2312 spam is much too common, but that's
            // another matter.) Gb2312Codec turns all undefined code
            // points into U+FFFD, so here, we can take the unicode
            // form and say it's the canonical form. when a client
            // later reads the message, it gets the text in unicode,
            // including U+FFFD.

            bool bad = !c->valid();

            // the header may contain some unencoded gb2312. we bang
            // it by hand, ignoring errors.
//.........这里部分代码省略.........
开发者ID:,项目名称:,代码行数:101,代码来源:

示例3: tag

static Codec * guessHtmlCodec( const EString & body )
{
    // Let's see if the general function has something for us.
    Codec * guess = guessTextCodec( body );

    // HTML prescribes that 8859-1 is the default. Let's see if 8859-1
    // works.
    if ( !guess ) {
        guess = new Iso88591Codec;
        (void)guess->toUnicode( body );
        if ( !guess->valid() )
            guess = 0;
    }

    if ( !guess ||
         ( !guess->wellformed() &&
           ( guess->name() == "ISO-8859-1" ||
             guess->name() == "ISO-8859-15" ) ) ) {
        // Some people believe that Windows codepage 1252 is
        // ISO-8859-1. Let's see if that works.
        Codec * windoze = new Cp1252Codec;
        (void)windoze->toUnicode( body );
        if ( windoze->wellformed() )
            guess = windoze;
    }


    // Some user-agents add a <meta http-equiv="content-type"> instead
    // of the Content-Type field. Maybe that exists? And if it exists,
    // is it more likely to be correct than our guess above?

    EString b = body.lower().simplified();
    int i = 0;
    while ( i >= 0 ) {
        EString tag( "<meta http-equiv=\"content-type\" content=\"" );
        i = b.find( tag, i );
        if ( i >= 0 ) {
            i = i + tag.length();
            int j = i;
            while ( j < (int)b.length() && b[j] != '"' )
                j++;
            HeaderField * hf
                = HeaderField::create( "Content-Type",
                                       b.mid( i, j-i ) );
            EString cs = ((MimeField*)hf)->parameter( "charset" );
            Codec * meta = 0;
            if ( !cs.isEmpty() )
                meta = Codec::byName( cs );
            UString m;
            if ( meta )
                m = meta->toUnicode( body );
            UString g;
            if ( guess )
                g = guess->toUnicode( body );
            if ( meta &&
                 ( ( !m.isEmpty() && m == g ) ||
                   ( meta->wellformed() &&
                     ( !guess || !guess->wellformed() ) ) ||
                   ( meta->valid() && !guess ) ||
                   ( meta->valid() && guess &&
                     guess->name() == "ISO-8859-1" ) ||
                   ( meta->valid() && guess && !guess->valid() ) ) &&
                 meta->toUnicode( b ).ascii().contains( tag ) ) {
                guess = meta;
            }
        }
    }

    return guess;
}
开发者ID:,项目名称:,代码行数:70,代码来源:


注:本文中的Codec::valid方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。