当前位置: 首页>>代码示例>>C#>>正文


C# IFilter.GetText方法代码示例

本文整理汇总了C#中IFilter.GetText方法的典型用法代码示例。如果您正苦于以下问题:C# IFilter.GetText方法的具体用法?C# IFilter.GetText怎么用?C# IFilter.GetText使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在IFilter的用法示例。


在下文中一共展示了IFilter.GetText方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetJson


//.........这里部分代码省略.........
                            }

                            // close the json array and flush the writers
                            textWriter.WriteEndArray();
                            propWriter.WriteEndArray();
                            textWriter.Flush();
                            propWriter.Flush();

                            // assemble and return the document
                            return new RavenJObject
                                   {
                                       //{ "Properties", propWriter.Token }, // TODO: restore this when properties can be retrieved
                                       { "Text", textWriter.Token }
                                   };

                        case IFilterReturnCodes.FILTER_E_EMBEDDING_UNAVAILABLE:
                        case IFilterReturnCodes.FILTER_E_LINK_UNAVAILABLE:
                            // Ignore these warnings
                            continue;

                        default:
                            // Something else - throw an exception
                            throw new COMException("IFilter COM error while getting a chunk of data: " + chunkStatus);
                    }

                    //// Handle property value chunks  TODO: make this work so we can index properties in addition to text
                    //if (statChunk.flags.HasFlag(CHUNKSTATE.CHUNK_VALUE))
                    //{
                    //    // get the property name  TODO: This doesn't seem to work
                    //    var propInfo = statChunk.attribute.psProperty;
                    //    var propName = propInfo.ulKind == 0 ? Marshal.PtrToStringAuto(propInfo.lpwstr) : propInfo.propid.ToString();
                    //

                    //    // will this help?
                    //    var propGuid = statChunk.attribute.guidPropSet;

                    //    // get the value  TODO: This doesn't seem to work
                    //    PROPVARIANT ppPropValue;
                    //    var valueStatus = filter.GetValue(out ppPropValue);
                    //    if (valueStatus == IFilterReturnCodes.S_OK || valueStatus == IFilterReturnCodes.FILTER_S_LAST_VALUES)
                    //    {
                    //        // write the value to json
                    //        propWriter.WriteStartObject();
                    //        propWriter.WritePropertyName(propName);
                    //        propWriter.WriteValue(ppPropValue.Value);
                    //        propWriter.WriteEndObject();

                    //        // free unmanaged memory from the PropVariant
                    //        ppPropValue.Clear();
                    //    }
                    //}

                    // the rest of this code is for text chunks only
                    if (!statChunk.flags.HasFlag(CHUNKSTATE.CHUNK_TEXT))
                        continue;

                    // Check for white space items and add the appropriate breaks.
                    switch (statChunk.breakType)
                    {
                        case CHUNK_BREAKTYPE.CHUNK_EOW:
                            if (buffer.Length > 0 && !char.IsWhiteSpace(buffer[buffer.Length - 1]))
                                buffer.Append(' ');
                            break;

                        case CHUNK_BREAKTYPE.CHUNK_EOC:
                        case CHUNK_BREAKTYPE.CHUNK_EOP:
                        case CHUNK_BREAKTYPE.CHUNK_EOS:
                            // Each chapter, paragraph or sentence break can be in a new json value in our array.
                            // This will keep any one string from getting too big.
                            if (buffer.Length > 0)
                            {
                                textWriter.WriteLines(buffer.ToString());

                                buffer.Clear();
                            }
                            break;
                    }

                    while (true)
                    {
                        // Create a temporary string buffer we can use for the parsing algorithm.
                        int cBuffer = defaultBufferSize;
                        var sbBuffer = new StringBuilder(defaultBufferSize);

                        // Read the next piece of data up to the size of our local buffer.
                        var textStatus = filter.GetText(ref cBuffer, sbBuffer);
                        if (textStatus == IFilterReturnCodes.S_OK || textStatus == IFilterReturnCodes.FILTER_S_LAST_TEXT)
                        {
                            // If any data was returned, add it to the buffer.
                            buffer.Append(sbBuffer.ToString(), 0, cBuffer);
                        }

                        // Once all data is exhausted, we are done so terminate the loop.
                        if (textStatus == IFilterReturnCodes.FILTER_S_LAST_TEXT || textStatus == IFilterReturnCodes.FILTER_E_NO_MORE_TEXT)
                            break;
                    }

                }
            }
        }
开发者ID:tzarger,项目名称:contrib,代码行数:101,代码来源:Extractor.cs

示例2: GetTextChunks

        private static IEnumerable<string> GetTextChunks(IFilter filter)
        {
            FilterReturnCodes scode;

            do
            {
                uint pcwcBuffer = 65536;
                var chunkBuffer = new StringBuilder((int)pcwcBuffer);

                scode = (FilterReturnCodes)filter.GetText(ref pcwcBuffer, chunkBuffer);

                if (pcwcBuffer > 0 && chunkBuffer.Length > 0)
                {
                    if (chunkBuffer.Length < pcwcBuffer) // Should never happen, but it happens !
                        pcwcBuffer = (uint)chunkBuffer.Length;

                    yield return chunkBuffer.ToString(0, (int)pcwcBuffer);
                }
            } while (scode == FilterReturnCodes.Success || scode == FilterReturnCodes.LastTextInCurrentChunk);
        }
开发者ID:JoakimBrannstrom,项目名称:TextExtractor,代码行数:20,代码来源:TextExtractor.cs

示例3: ExtractText

        private static string ExtractText(IFilter filter)
        {
            var plainTextResult = new StringBuilder();
            var ps = new STAT_CHUNK();
            IFILTER_INIT mFlags = 0;

            uint i = 0;
            filter.Init(mFlags, 0, null, ref i);

            int resultChunk = 0;

            resultChunk = filter.GetChunk(out ps);
            while (resultChunk == 0)
            {
                if (ps.flags == CHUNKSTATE.CHUNK_TEXT)
                {
                    uint sizeBuffer = 60000;
                    var resultText = 0;
                    while (resultText == Constants.FILTER_S_LAST_TEXT || resultText == 0)
                    {
                        sizeBuffer = 60000;
                        var sbBuffer = new StringBuilder((int)sizeBuffer);
                        resultText = filter.GetText(ref sizeBuffer, sbBuffer);

                        if (sizeBuffer > 0 && sbBuffer.Length > 0)
                        {
                            string chunk = sbBuffer.ToString(0, (int)sizeBuffer);
                            plainTextResult.Append(chunk);
                        }
                    }
                }
                resultChunk = filter.GetChunk(out ps);
            }
            return plainTextResult.ToString();
        }
开发者ID:ralreegorganon,项目名称:Indexzor,代码行数:35,代码来源:Parser.cs


注:本文中的IFilter.GetText方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。