当前位置: 首页>>代码示例>>C#>>正文


C# IFilter.GetChunk方法代码示例

本文整理汇总了C#中IFilter.GetChunk方法的典型用法代码示例。如果您正苦于以下问题:C# IFilter.GetChunk方法的具体用法?C# IFilter.GetChunk怎么用?C# IFilter.GetChunk使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在IFilter的用法示例。


在下文中一共展示了IFilter.GetChunk方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetJson

        private static RavenJObject GetJson(IFilter filter)
        {
            // initialize a buffer for text results
            const int defaultBufferSize = 4096;
            var buffer = new StringBuilder(defaultBufferSize);

            // Initialize the json writers
            using (var textWriter = new RavenJTokenWriter())
            using (var propWriter = new RavenJTokenWriter())
            {
                // Write the beginning of the json arrays
                textWriter.WriteStartArray();
                propWriter.WriteStartArray();

                string last = null;

                // Outer loop will read chunks from the document.
                // For those chunks that have text, the contents will be written to json.
                while (true)
                {
                    // Try to get a chunk of data
                    STAT_CHUNK statChunk;
                    var chunkStatus = filter.GetChunk(out statChunk);
                    switch (chunkStatus)
                    {
                        case IFilterReturnCodes.S_OK:
                            // We have a good chunk of data
                            break;

                        case IFilterReturnCodes.FILTER_E_END_OF_CHUNKS:
                            // No more data.
                            if (buffer.Length > 0)
                            {
                                // Make sure we have no unwritten data first.
                                textWriter.WriteLines(buffer.ToString());
                                buffer.Clear();
                            }

                            // close the json array and flush the writers
                            textWriter.WriteEndArray();
                            propWriter.WriteEndArray();
                            textWriter.Flush();
                            propWriter.Flush();

                            // assemble and return the document
                            return new RavenJObject
                                   {
                                       //{ "Properties", propWriter.Token }, // TODO: restore this when properties can be retrieved
                                       { "Text", textWriter.Token }
                                   };

                        case IFilterReturnCodes.FILTER_E_EMBEDDING_UNAVAILABLE:
                        case IFilterReturnCodes.FILTER_E_LINK_UNAVAILABLE:
                            // Ignore these warnings
                            continue;

                        default:
                            // Something else - throw an exception
                            throw new COMException("IFilter COM error while getting a chunk of data: " + chunkStatus);
                    }

                    //// Handle property value chunks  TODO: make this work so we can index properties in addition to text
                    //if (statChunk.flags.HasFlag(CHUNKSTATE.CHUNK_VALUE))
                    //{
                    //    // get the property name  TODO: This doesn't seem to work
                    //    var propInfo = statChunk.attribute.psProperty;
                    //    var propName = propInfo.ulKind == 0 ? Marshal.PtrToStringAuto(propInfo.lpwstr) : propInfo.propid.ToString();
                    //

                    //    // will this help?
                    //    var propGuid = statChunk.attribute.guidPropSet;

                    //    // get the value  TODO: This doesn't seem to work
                    //    PROPVARIANT ppPropValue;
                    //    var valueStatus = filter.GetValue(out ppPropValue);
                    //    if (valueStatus == IFilterReturnCodes.S_OK || valueStatus == IFilterReturnCodes.FILTER_S_LAST_VALUES)
                    //    {
                    //        // write the value to json
                    //        propWriter.WriteStartObject();
                    //        propWriter.WritePropertyName(propName);
                    //        propWriter.WriteValue(ppPropValue.Value);
                    //        propWriter.WriteEndObject();

                    //        // free unmanaged memory from the PropVariant
                    //        ppPropValue.Clear();
                    //    }
                    //}

                    // the rest of this code is for text chunks only
                    if (!statChunk.flags.HasFlag(CHUNKSTATE.CHUNK_TEXT))
                        continue;

                    // Check for white space items and add the appropriate breaks.
                    switch (statChunk.breakType)
                    {
                        case CHUNK_BREAKTYPE.CHUNK_EOW:
                            if (buffer.Length > 0 && !char.IsWhiteSpace(buffer[buffer.Length - 1]))
                                buffer.Append(' ');
                            break;

//.........这里部分代码省略.........
开发者ID:tzarger,项目名称:contrib,代码行数:101,代码来源:Extractor.cs

示例2: GetTexts

        private static IEnumerable<string> GetTexts(IFilter filter)
        {
            StatChunk chunkInfo;

            while (filter.GetChunk(out chunkInfo) == (int)(FilterReturnCodes.Success))
            {
                if (chunkInfo.flags != Chunkstate.ChunkText)
                    continue;

                var chunks = GetTextChunks(filter);
                foreach (var chunk in chunks)
                    yield return chunk;
            }
        }
开发者ID:JoakimBrannstrom,项目名称:TextExtractor,代码行数:14,代码来源:TextExtractor.cs

示例3: ExtractText

        private static string ExtractText(IFilter filter)
        {
            var plainTextResult = new StringBuilder();
            var ps = new STAT_CHUNK();
            IFILTER_INIT mFlags = 0;

            uint i = 0;
            filter.Init(mFlags, 0, null, ref i);

            int resultChunk = 0;

            resultChunk = filter.GetChunk(out ps);
            while (resultChunk == 0)
            {
                if (ps.flags == CHUNKSTATE.CHUNK_TEXT)
                {
                    uint sizeBuffer = 60000;
                    var resultText = 0;
                    while (resultText == Constants.FILTER_S_LAST_TEXT || resultText == 0)
                    {
                        sizeBuffer = 60000;
                        var sbBuffer = new StringBuilder((int)sizeBuffer);
                        resultText = filter.GetText(ref sizeBuffer, sbBuffer);

                        if (sizeBuffer > 0 && sbBuffer.Length > 0)
                        {
                            string chunk = sbBuffer.ToString(0, (int)sizeBuffer);
                            plainTextResult.Append(chunk);
                        }
                    }
                }
                resultChunk = filter.GetChunk(out ps);
            }
            return plainTextResult.ToString();
        }
开发者ID:ralreegorganon,项目名称:Indexzor,代码行数:35,代码来源:Parser.cs


注:本文中的IFilter.GetChunk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。