首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >CustomHTMLTagProcessor for iTextSharp

CustomHTMLTagProcessor for iTextSharp
EN

Stack Overflow用户
提问于 2015-12-10 14:28:09
回答 1查看 912关注 0票数 0

我有以下代码;

代码语言:javascript
复制
Dim sr As StreamReader = New StreamReader("C:\\temp\\test.htm")
Dim line As String
line = sr.ReadToEnd
sr.Close()
Dim fsNew As New StringReader(line)
Dim Document As New Document()
Using fs As New FileStream("C:\\temp\\test.pdf", FileMode.Create)
    PdfWriter.GetInstance(Document, fs)

    Using stringReader As New StringReader(line)
        Dim parsedList As List(Of IElement) = HTMLWorker.ParseToList(stringReader, Nothing)

        Document.Open()
        For Each item As Object In parsedList
            Document.Add(DirectCast(item, IElement))
        Next
        Document.Close()
    End Using
End Using
Document.Close()

我正在尝试修复引用base64的嵌入式here图像的问题。我创建了使用CustomImageHTMLTagProcessor接口的IHTMLTagProcessor,但是当修改HTMLWorker类时,我不知道该更改什么。这是HTMLWorker类。

代码语言:javascript
复制
using System;
using System.IO;
using System.Text;
using System.Collections.Generic;
using System.Globalization;
using System.util;
using iTextSharp.text;
using iTextSharp.text.log;
using iTextSharp.text.html;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.draw;
using iTextSharp.text.xml.simpleparser;

namespace iTextSharp.text.html.simpleparser {

    [Obsolete]
    public class HTMLWorker : ISimpleXMLDocHandler, IDocListener {

        private static readonly ILogger LOGGER = LoggerFactory.GetLogger(typeof(HTMLWorker));

        protected IDocListener document;

        protected internal IDictionary<String, IHTMLTagProcessor> tags;
        public HTMLWorker(IDocListener document) : this(document, null, null) {
        }

        public HTMLWorker(IDocListener document, IDictionary<String, IHTMLTagProcessor> tags, StyleSheet style) {
            this.document = document;
            SetSupportedTags(tags);
            SetStyleSheet(style);
        }

        virtual public void SetSupportedTags(IDictionary<String, IHTMLTagProcessor> tags) {
            if (tags == null)
                tags = new HTMLTagProcessors();
            this.tags = tags;
        }

        virtual public void SetStyleSheet(StyleSheet style) {
            if (style == null)
                style = new StyleSheet();
            this.style = style;
        }

        virtual public void Parse(TextReader reader) {
            LOGGER.Info("Please note, there is a more extended version of the HTMLWorker available in the iText XMLWorker");
            SimpleXMLParser.Parse(this, null, reader, true);
        }

        // state machine

        protected Stack<IElement> stack = new Stack<IElement>();

        protected Paragraph currentParagraph;

        private ChainedProperties chain = new ChainedProperties();

        public virtual void StartDocument() {
            Dictionary<String, String> attrs = new Dictionary<String, String>();
            style.ApplyStyle(HtmlTags.BODY, attrs);
            chain.AddToChain(HtmlTags.BODY, attrs);
        }

        public virtual void StartElement(String tag, IDictionary<String, String> attrs) {
            IHTMLTagProcessor htmlTag;
            tags.TryGetValue(tag, out htmlTag);
            if (htmlTag == null) {
                return;
            }

            style.ApplyStyle(tag, attrs);

            StyleSheet.ResolveStyleAttribute(attrs, chain);

            htmlTag.StartElement(this, tag, attrs);
        }

        public virtual void Text(String content) {
            if (skipText)
                return;
            if (currentParagraph == null) {
                currentParagraph = CreateParagraph();
            }
            if (!insidePRE) {
                // newlines and carriage returns are ignored
                if (content.Trim().Length == 0 && content.IndexOf(' ') < 0) {
                    return;
                }
                content = HtmlUtilities.EliminateWhiteSpace(content);
            }
            Chunk chunk = CreateChunk(content);
            currentParagraph.Add(chunk);
        }

        public virtual void EndElement(String tag) {
            IHTMLTagProcessor htmlTag;
            tags.TryGetValue(tag, out htmlTag);
            if (htmlTag == null) {
                return;
            }
            // process the tag
            htmlTag.EndElement(this, tag);
        }

        public virtual void EndDocument() {
            // flush the stack
            foreach (IElement e in stack)
                document.Add(e);
            // add current paragraph
            if (currentParagraph != null)
                document.Add(currentParagraph);
            currentParagraph = null;
        }
        virtual public void NewLine() {
            if (currentParagraph == null) {
                currentParagraph = new Paragraph();
            }
            currentParagraph.Add(CreateChunk("\n"));
        }

        virtual public void CarriageReturn() {
            if (currentParagraph == null)
                return;
            if (stack.Count == 0)
                document.Add(currentParagraph);
            else {
                IElement obj = stack.Pop();
                if (obj is ITextElementArray) {
                    ITextElementArray current = (ITextElementArray) obj;
                    current.Add(currentParagraph);
                }
                stack.Push(obj);
            }
            currentParagraph = null;
        }

        /**
         * Stacks the current paragraph, indicating that we're starting
         * a new span.
         * @since 5.0.6
         */
        virtual public void FlushContent() {
            PushToStack(currentParagraph);
            currentParagraph = new Paragraph();
        }

        /**
         * Pushes an element to the Stack.
         * @param element
         * @since 5.0.6
         */
        virtual public void PushToStack(IElement element) {
            if (element != null)
                stack.Push(element);
        }

        /**
         * Updates the chain with a new tag and new attributes.
         * @param tag   the new tag
         * @param attrs the corresponding attributes
         * @since 5.0.6
         */
        virtual public void UpdateChain(String tag, IDictionary<String, String> attrs) {
            chain.AddToChain(tag, attrs);
        }

        /**
         * Updates the chain by removing a tag.
         * @param tag   the new tag
         * @since 5.0.6
         */
        virtual public void UpdateChain(String tag) {
            chain.RemoveChain(tag);
        }

        // providers that help find resources such as images and fonts

        /**
         * Key used to store the image provider in the providers map.
         * @since 5.0.6
         */
        public const String IMG_PROVIDER = "img_provider";

        /**
         * Key used to store the image processor in the providers map.
         * @since 5.0.6
         */
        public const String IMG_PROCESSOR = "img_interface";

        /**
         * Key used to store the image store in the providers map.
         * @since 5.0.6
         */
        public const String IMG_STORE = "img_static";

        /**
         * Key used to store the image baseurl provider in the providers map.
         * @since 5.0.6
         */
        public const String IMG_BASEURL = "img_baseurl";

        /**
         * Key used to store the font provider in the providers map.
         * @since 5.0.6
         */
        public const String FONT_PROVIDER = "font_factory";

        /**
         * Key used to store the link provider in the providers map.
         * @since 5.0.6
         */
        public const String LINK_PROVIDER = "alink_interface";

        /**
         * IDictionary containing providers such as a FontProvider or ImageProvider.
         * @since 5.0.6 (renamed from interfaceProps)
         */
        private IDictionary<String, Object> providers = new Dictionary<String, Object>();

        /**
         * Setter for the providers.
         * If a FontProvider is added, the ElementFactory is updated.
         * @param providers a IDictionary with different providers
         * @since 5.0.6
         */
        virtual public void SetProviders(IDictionary<String, Object> providers) {
            if (providers == null)
                return;
            this.providers = providers;
            IFontProvider ff = null;
            if (providers.ContainsKey(FONT_PROVIDER))
                ff = (IFontProvider)providers[FONT_PROVIDER];
            if (ff != null)
                factory.FontProvider = ff;
        }

        // factory that helps create objects

        /**
         * Factory that is able to create iText Element objects.
         * @since 5.0.6
         */
        private ElementFactory factory = new ElementFactory();

        /**
         * Creates a Chunk using the factory.
         * @param content   the content of the chunk
         * @return  a Chunk with content
         * @since 5.0.6
         */
        virtual public Chunk CreateChunk(String content) {
            return factory.CreateChunk(content, chain);
        }
        /**
         * Creates a Paragraph using the factory.
         * @return  a Paragraph without any content
         * @since 5.0.6
         */
        virtual public Paragraph CreateParagraph() {
            return factory.CreateParagraph(chain);
        }
        /**
         * Creates a List object.
         * @param tag should be "ol" or "ul"
         * @return  a List object
         * @since 5.0.6
         */
        virtual public List CreateList(String tag) {
            return factory.CreateList(tag, chain);
        }
        /**
         * Creates a ListItem object.
         * @return a ListItem object
         * @since 5.0.6
         */
        virtual public ListItem CreateListItem() {
            return factory.CreateListItem(chain);
        }
        /**
         * Creates a LineSeparator object.
         * @param attrs properties of the LineSeparator
         * @return a LineSeparator object
         * @since 5.0.6
         */
        virtual public LineSeparator CreateLineSeparator(IDictionary<String, String> attrs) {
            return factory.CreateLineSeparator(attrs, currentParagraph.Leading / 2);
        }

        /**
         * Creates an Image object.
         * @param attrs properties of the Image
         * @return an Image object (or null if the Image couldn't be found)
         * @throws DocumentException
         * @throws IOException
         * @since 5.0.6
         */
        virtual public Image CreateImage(IDictionary<String, String> attrs) {
            String src;
            attrs.TryGetValue(HtmlTags.SRC, out src);
            if (src == null)
                return null;
            Image img = factory.CreateImage(
                    src, attrs, chain, document,
                    providers.ContainsKey(IMG_PROVIDER) ? (IImageProvider)providers[IMG_PROVIDER] : null, 
                    providers.ContainsKey(IMG_STORE) ? (ImageStore)providers[IMG_STORE] : null, 
                    providers.ContainsKey(IMG_BASEURL) ? (string)providers[IMG_BASEURL] : null);
            return img;
        }

        /**
         * Creates a Cell.
         * @param tag   the tag
         * @return  a CellWrapper object
         * @since 5.0.6
         */
        virtual public CellWrapper CreateCell(String tag) {
            return new CellWrapper(tag, chain);
        }

        // processing objects

        /**
         * Adds a link to the current paragraph.
         * @since 5.0.6
         */
        virtual public void ProcessLink() {
            if (currentParagraph == null) {
                currentParagraph = new Paragraph();
            }
            // The link provider allows you to do additional processing
            ILinkProcessor i = null;
            if (providers.ContainsKey(LINK_PROVIDER))
                i = (ILinkProcessor) providers[LINK_PROVIDER];
            if (i == null || !i.Process(currentParagraph, chain)) {
                // sets an Anchor for all the Chunks in the current paragraph
                String href = chain[HtmlTags.HREF];
                if (href != null) {
                    foreach (Chunk ck in currentParagraph.Chunks) {
                        ck.SetAnchor(href);
                    }
                }
            }
            // a link should be added to the current paragraph as a phrase
            if (stack.Count == 0) {
                // no paragraph to add too, 'a' tag is first element
                Paragraph tmp = new Paragraph(new Phrase(currentParagraph));
                currentParagraph = tmp;
            } else {
                Paragraph tmp = (Paragraph) stack.Pop();
                tmp.Add(new Phrase(currentParagraph));
                currentParagraph = tmp;
            }
        }

        /**
         * Fetches the List from the Stack and adds it to
         * the TextElementArray on top of the Stack,
         * or to the Document if the Stack is empty.
         * @throws DocumentException
         * @since 5.0.6
         */
        virtual public void ProcessList() {
            if (stack.Count == 0)
                return;
            IElement obj = stack.Pop();
            if (!(obj is List)) {
                stack.Push(obj);
                return;
            }
            if (stack.Count == 0)
                document.Add(obj);
            else
                ((ITextElementArray) stack.Peek()).Add(obj);
        }

        /**
         * Looks for the List object on the Stack,
         * and adds the ListItem to the List.
         * @throws DocumentException
         * @since 5.0.6
         */
        virtual public void ProcessListItem() {
            if (stack.Count == 0)
                return;
            IElement obj = stack.Pop();
            if (!(obj is ListItem)) {
                stack.Push(obj);
                return;
            }
            if (stack.Count == 0) {
                document.Add(obj);
                return;
            }
            ListItem item = (ListItem) obj;
            IElement list = stack.Pop();
            if (!(list is List)) {
                stack.Push(list);
                return;
            }
            ((List) list).Add(item);
            item.AdjustListSymbolFont();
            stack.Push(list);
        }

        /**
         * Processes an Image.
         * @param img
         * @param attrs
         * @throws DocumentException
         * @since   5.0.6
         */
        virtual public void ProcessImage(Image img, IDictionary<String, String> attrs) {
            IImageProcessor processor = null;
            if (providers.ContainsKey(IMG_PROCESSOR))
                processor = (IImageProcessor)providers[IMG_PROCESSOR];
            if (processor == null || !processor.Process(img, attrs, chain, document)) {
                String align;
                attrs.TryGetValue(HtmlTags.ALIGN, out align);
                if (align != null) {
                    CarriageReturn();
                }
                if (currentParagraph == null) {
                    currentParagraph = CreateParagraph();
                }
                currentParagraph.Add(new Chunk(img, 0, 0, true));
                currentParagraph.Alignment = HtmlUtilities.AlignmentValue(align);
                if (align != null) {
                    CarriageReturn();
                }
            }
        }

        /**
         * Processes the Table.
         * @throws DocumentException
         * @since 5.0.6
         */
        virtual public void ProcessTable() {
            TableWrapper table = (TableWrapper) stack.Pop();
            PdfPTable tb = table.CreateTable();
            tb.SplitRows = true;
            if (stack.Count == 0)
                document.Add(tb);
            else
                ((ITextElementArray) stack.Peek()).Add(tb);
        }

        /**
         * Gets the TableWrapper from the Stack and adds a new row.
         * @since 5.0.6
         */
        virtual public void ProcessRow() {
            List<PdfPCell> row = new List<PdfPCell>();
            List<float> cellWidths = new List<float>();
            bool percentage = false;
            float width;
            float totalWidth = 0;
            int zeroWidth = 0;
            TableWrapper table = null;
            while (true) {
                IElement obj = stack.Pop();
                if (obj is CellWrapper) {
                    CellWrapper cell = (CellWrapper)obj;
                    width = cell.Width;
                    cellWidths.Add(width);
                    percentage |= cell.IsPercentage;
                    if (width == 0) {
                        zeroWidth++;
                    }
                    else {
                        totalWidth += width;
                    }
                    row.Add(cell.Cell);
                }
                if (obj is TableWrapper) {
                    table = (TableWrapper) obj;
                    break;
                }
            }
            table.AddRow(row);
            if (cellWidths.Count > 0) {
                // cells come off the stack in reverse, naturally
                totalWidth = 100 - totalWidth;
                cellWidths.Reverse();
                float[] widths = new float[cellWidths.Count];
                bool hasZero = false;
                for (int i = 0; i < widths.Length; i++) {
                    widths[i] = cellWidths[i];
                    if (widths[i] == 0 && percentage && zeroWidth > 0) {
                        widths[i] = totalWidth / zeroWidth;
                    }
                    if (widths[i] == 0) {
                        hasZero = true;
                        break;
                    }
                }
                if (!hasZero)
                    table.ColWidths = widths;
            }
            stack.Push(table);
        }

        // state variables and methods

        /** Stack to keep track of table tags. */
        private Stack<bool[]> tableState = new Stack<bool[]>();

        /** Boolean to keep track of TR tags. */
        private bool pendingTR = false;

        /** Boolean to keep track of TD and TH tags */
        private bool pendingTD = false;

        /** Boolean to keep track of LI tags */
        private bool pendingLI = false;

        /**
         * Boolean to keep track of PRE tags
         * @since 5.0.6 renamed from isPRE
         */
        private bool insidePRE = false;

        /**
         * Indicates if text needs to be skipped.
         * @since iText 5.0.6 (private => protected)
         */
        protected internal bool skipText = false;

        /**
         * Pushes the values of pendingTR and pendingTD
         * to a state stack.
         * @since 5.0.6
         */
        virtual public void PushTableState() {
            tableState.Push(new bool[] { pendingTR, pendingTD });
        }

        /**
         * Pops the values of pendingTR and pendingTD
         * from a state stack.
         * @since 5.0.6
         */
        virtual public void PopTableState() {
            bool[] state = tableState.Pop();
            pendingTR = state[0];
            pendingTD = state[1];
        }

        /**
         * @return the pendingTR
         * @since 5.0.6
         */
        virtual public bool IsPendingTR() {
            return pendingTR;
        }

        /**
         * @param pendingTR the pendingTR to set
         * @since 5.0.6
         */
        virtual public void SetPendingTR(bool pendingTR) {
            this.pendingTR = pendingTR;
        }

        /**
         * @return the pendingTD
         * @since 5.0.6
         */
        virtual public bool IsPendingTD() {
            return pendingTD;
        }

        /**
         * @param pendingTD the pendingTD to set
         * @since 5.0.6
         */
        virtual public void SetPendingTD(bool pendingTD) {
            this.pendingTD = pendingTD;
        }

        /**
         * @return the pendingLI
         * @since 5.0.6
         */
        virtual public bool IsPendingLI() {
            return pendingLI;
        }

        /**
         * @param pendingLI the pendingLI to set
         * @since 5.0.6
         */
        virtual public void SetPendingLI(bool pendingLI) {
            this.pendingLI = pendingLI;
        }

        /**
         * @return the insidePRE
         * @since 5.0.6
         */
        virtual public bool IsInsidePRE() {
            return insidePRE;
        }

        /**
         * @param insidePRE the insidePRE to set
         * @since 5.0.6
         */
        virtual public void SetInsidePRE(bool insidePRE) {
            this.insidePRE = insidePRE;
        }

        /**
         * @return the skipText
         * @since 5.0.6
         */
        virtual public bool IsSkipText() {
            return skipText;
        }

        /**
         * @param skipText the skipText to set
         * @since 5.0.6
         */
        virtual public void SetSkipText(bool skipText) {
            this.skipText = skipText;
        }

        // static methods to parse HTML to a List of Element objects.

        /** The resulting list of elements. */
        protected List<IElement> objectList;

        /**
         * Parses an HTML source to a List of Element objects
         * @param reader    the HTML source
         * @param style     a StyleSheet object
         * @return a List of Element objects
         * @throws IOException
         */
        public static List<IElement> ParseToList(TextReader reader, StyleSheet style) {
            return ParseToList(reader, style, null);
        }

        /**
         * Parses an HTML source to a List of Element objects
         * @param reader    the HTML source
         * @param style     a StyleSheet object
         * @param providers map containing classes with extra info
         * @return a List of Element objects
         * @throws IOException
         */
        public static List<IElement> ParseToList(TextReader reader, StyleSheet style,
                Dictionary<String, Object> providers) {
            return ParseToList(reader, style, null, providers);
        }

        /**
         * Parses an HTML source to a List of Element objects
         * @param reader    the HTML source
         * @param style     a StyleSheet object
         * @param tags      a map containing supported tags and their processors
         * @param providers map containing classes with extra info
         * @return a List of Element objects
         * @throws IOException
         * @since 5.0.6
         */
        public static List<IElement> ParseToList(TextReader reader, StyleSheet style,
                IDictionary<String, IHTMLTagProcessor> tags, Dictionary<String, Object> providers) {
            HTMLWorker worker = new HTMLWorker(null, tags, style);
            worker.document = worker;
            worker.SetProviders(providers);
            worker.objectList = new List<IElement>();
            worker.Parse(reader);
            return worker.objectList;
        }

        // DocListener interface

        /**
         * @see com.itextpdf.text.ElementListener#add(com.itextpdf.text.Element)
         */
        virtual public bool Add(IElement element) {
            objectList.Add(element);
            return true;
        }

        /**
         * @see com.itextpdf.text.DocListener#close()
         */
        virtual public void Close() {
        }

        /**
         * @see com.itextpdf.text.DocListener#newPage()
         */
        virtual public bool NewPage() {
            return true;
        }

        /**
         * @see com.itextpdf.text.DocListener#open()
         */
        virtual public void Open() {
        }

        /**
         * @see com.itextpdf.text.DocListener#resetPageCount()
         */
        virtual public void ResetPageCount() {
        }

        /**
         * @see com.itextpdf.text.DocListener#setMarginMirroring(bool)
         */
        virtual public bool SetMarginMirroring(bool marginMirroring) {
            return false;
        }

        /**
         * @see com.itextpdf.text.DocListener#setMarginMirroring(bool)
         * @since   2.1.6
         */
        virtual public bool SetMarginMirroringTopBottom(bool marginMirroring) {
            return false;
        }

        /**
         * @see com.itextpdf.text.DocListener#setMargins(float, float, float, float)
         */
        virtual public bool SetMargins(float marginLeft, float marginRight,
                float marginTop, float marginBottom) {
            return true;
        }

        /**
         * @see com.itextpdf.text.DocListener#setPageCount(int)
         */
        virtual public int PageCount {
            set {
            }
        }

        /**
         * @see com.itextpdf.text.DocListener#setPageSize(com.itextpdf.text.Rectangle)
         */
        virtual public bool SetPageSize(Rectangle pageSize) {
            return true;
        }

        // deprecated methods

        /**
         * Sets the providers.
         * @deprecated use SetProviders() instead
         */
        virtual public void SetInterfaceProps(Dictionary<String, Object> providers) {
            SetProviders(providers);
        }
        /**
         * Gets the providers
         * @deprecated use GetProviders() instead
         */
        virtual public IDictionary<String, Object> GetInterfaceProps() {
            return providers;
        }

        public virtual void Dispose() {
            Close();
        }
    }
}
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2015-12-10 15:25:56

正如Chris提到的,HtmlWorker被废弃了。XmlWorker是将HTML转换为PDF的新方法。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/34204392

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档