我有以下代码;
Dim sr As StreamReader = New StreamReader("C:\\temp\\test.htm")
Dim line As String
line = sr.ReadToEnd
sr.Close()
Dim fsNew As New StringReader(line)
Dim Document As New Document()
Using fs As New FileStream("C:\\temp\\test.pdf", FileMode.Create)
PdfWriter.GetInstance(Document, fs)
Using stringReader As New StringReader(line)
Dim parsedList As List(Of IElement) = HTMLWorker.ParseToList(stringReader, Nothing)
Document.Open()
For Each item As Object In parsedList
Document.Add(DirectCast(item, IElement))
Next
Document.Close()
End Using
End Using
Document.Close()我正在尝试修复引用base64的嵌入式here图像的问题。我创建了使用CustomImageHTMLTagProcessor接口的IHTMLTagProcessor,但是当修改HTMLWorker类时,我不知道该更改什么。这是HTMLWorker类。
using System;
using System.IO;
using System.Text;
using System.Collections.Generic;
using System.Globalization;
using System.util;
using iTextSharp.text;
using iTextSharp.text.log;
using iTextSharp.text.html;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.draw;
using iTextSharp.text.xml.simpleparser;
namespace iTextSharp.text.html.simpleparser {
[Obsolete]
public class HTMLWorker : ISimpleXMLDocHandler, IDocListener {
private static readonly ILogger LOGGER = LoggerFactory.GetLogger(typeof(HTMLWorker));
protected IDocListener document;
protected internal IDictionary<String, IHTMLTagProcessor> tags;
public HTMLWorker(IDocListener document) : this(document, null, null) {
}
public HTMLWorker(IDocListener document, IDictionary<String, IHTMLTagProcessor> tags, StyleSheet style) {
this.document = document;
SetSupportedTags(tags);
SetStyleSheet(style);
}
virtual public void SetSupportedTags(IDictionary<String, IHTMLTagProcessor> tags) {
if (tags == null)
tags = new HTMLTagProcessors();
this.tags = tags;
}
virtual public void SetStyleSheet(StyleSheet style) {
if (style == null)
style = new StyleSheet();
this.style = style;
}
virtual public void Parse(TextReader reader) {
LOGGER.Info("Please note, there is a more extended version of the HTMLWorker available in the iText XMLWorker");
SimpleXMLParser.Parse(this, null, reader, true);
}
// state machine
protected Stack<IElement> stack = new Stack<IElement>();
protected Paragraph currentParagraph;
private ChainedProperties chain = new ChainedProperties();
public virtual void StartDocument() {
Dictionary<String, String> attrs = new Dictionary<String, String>();
style.ApplyStyle(HtmlTags.BODY, attrs);
chain.AddToChain(HtmlTags.BODY, attrs);
}
public virtual void StartElement(String tag, IDictionary<String, String> attrs) {
IHTMLTagProcessor htmlTag;
tags.TryGetValue(tag, out htmlTag);
if (htmlTag == null) {
return;
}
style.ApplyStyle(tag, attrs);
StyleSheet.ResolveStyleAttribute(attrs, chain);
htmlTag.StartElement(this, tag, attrs);
}
public virtual void Text(String content) {
if (skipText)
return;
if (currentParagraph == null) {
currentParagraph = CreateParagraph();
}
if (!insidePRE) {
// newlines and carriage returns are ignored
if (content.Trim().Length == 0 && content.IndexOf(' ') < 0) {
return;
}
content = HtmlUtilities.EliminateWhiteSpace(content);
}
Chunk chunk = CreateChunk(content);
currentParagraph.Add(chunk);
}
public virtual void EndElement(String tag) {
IHTMLTagProcessor htmlTag;
tags.TryGetValue(tag, out htmlTag);
if (htmlTag == null) {
return;
}
// process the tag
htmlTag.EndElement(this, tag);
}
public virtual void EndDocument() {
// flush the stack
foreach (IElement e in stack)
document.Add(e);
// add current paragraph
if (currentParagraph != null)
document.Add(currentParagraph);
currentParagraph = null;
}
virtual public void NewLine() {
if (currentParagraph == null) {
currentParagraph = new Paragraph();
}
currentParagraph.Add(CreateChunk("\n"));
}
virtual public void CarriageReturn() {
if (currentParagraph == null)
return;
if (stack.Count == 0)
document.Add(currentParagraph);
else {
IElement obj = stack.Pop();
if (obj is ITextElementArray) {
ITextElementArray current = (ITextElementArray) obj;
current.Add(currentParagraph);
}
stack.Push(obj);
}
currentParagraph = null;
}
/**
* Stacks the current paragraph, indicating that we're starting
* a new span.
* @since 5.0.6
*/
virtual public void FlushContent() {
PushToStack(currentParagraph);
currentParagraph = new Paragraph();
}
/**
* Pushes an element to the Stack.
* @param element
* @since 5.0.6
*/
virtual public void PushToStack(IElement element) {
if (element != null)
stack.Push(element);
}
/**
* Updates the chain with a new tag and new attributes.
* @param tag the new tag
* @param attrs the corresponding attributes
* @since 5.0.6
*/
virtual public void UpdateChain(String tag, IDictionary<String, String> attrs) {
chain.AddToChain(tag, attrs);
}
/**
* Updates the chain by removing a tag.
* @param tag the new tag
* @since 5.0.6
*/
virtual public void UpdateChain(String tag) {
chain.RemoveChain(tag);
}
// providers that help find resources such as images and fonts
/**
* Key used to store the image provider in the providers map.
* @since 5.0.6
*/
public const String IMG_PROVIDER = "img_provider";
/**
* Key used to store the image processor in the providers map.
* @since 5.0.6
*/
public const String IMG_PROCESSOR = "img_interface";
/**
* Key used to store the image store in the providers map.
* @since 5.0.6
*/
public const String IMG_STORE = "img_static";
/**
* Key used to store the image baseurl provider in the providers map.
* @since 5.0.6
*/
public const String IMG_BASEURL = "img_baseurl";
/**
* Key used to store the font provider in the providers map.
* @since 5.0.6
*/
public const String FONT_PROVIDER = "font_factory";
/**
* Key used to store the link provider in the providers map.
* @since 5.0.6
*/
public const String LINK_PROVIDER = "alink_interface";
/**
* IDictionary containing providers such as a FontProvider or ImageProvider.
* @since 5.0.6 (renamed from interfaceProps)
*/
private IDictionary<String, Object> providers = new Dictionary<String, Object>();
/**
* Setter for the providers.
* If a FontProvider is added, the ElementFactory is updated.
* @param providers a IDictionary with different providers
* @since 5.0.6
*/
virtual public void SetProviders(IDictionary<String, Object> providers) {
if (providers == null)
return;
this.providers = providers;
IFontProvider ff = null;
if (providers.ContainsKey(FONT_PROVIDER))
ff = (IFontProvider)providers[FONT_PROVIDER];
if (ff != null)
factory.FontProvider = ff;
}
// factory that helps create objects
/**
* Factory that is able to create iText Element objects.
* @since 5.0.6
*/
private ElementFactory factory = new ElementFactory();
/**
* Creates a Chunk using the factory.
* @param content the content of the chunk
* @return a Chunk with content
* @since 5.0.6
*/
virtual public Chunk CreateChunk(String content) {
return factory.CreateChunk(content, chain);
}
/**
* Creates a Paragraph using the factory.
* @return a Paragraph without any content
* @since 5.0.6
*/
virtual public Paragraph CreateParagraph() {
return factory.CreateParagraph(chain);
}
/**
* Creates a List object.
* @param tag should be "ol" or "ul"
* @return a List object
* @since 5.0.6
*/
virtual public List CreateList(String tag) {
return factory.CreateList(tag, chain);
}
/**
* Creates a ListItem object.
* @return a ListItem object
* @since 5.0.6
*/
virtual public ListItem CreateListItem() {
return factory.CreateListItem(chain);
}
/**
* Creates a LineSeparator object.
* @param attrs properties of the LineSeparator
* @return a LineSeparator object
* @since 5.0.6
*/
virtual public LineSeparator CreateLineSeparator(IDictionary<String, String> attrs) {
return factory.CreateLineSeparator(attrs, currentParagraph.Leading / 2);
}
/**
* Creates an Image object.
* @param attrs properties of the Image
* @return an Image object (or null if the Image couldn't be found)
* @throws DocumentException
* @throws IOException
* @since 5.0.6
*/
virtual public Image CreateImage(IDictionary<String, String> attrs) {
String src;
attrs.TryGetValue(HtmlTags.SRC, out src);
if (src == null)
return null;
Image img = factory.CreateImage(
src, attrs, chain, document,
providers.ContainsKey(IMG_PROVIDER) ? (IImageProvider)providers[IMG_PROVIDER] : null,
providers.ContainsKey(IMG_STORE) ? (ImageStore)providers[IMG_STORE] : null,
providers.ContainsKey(IMG_BASEURL) ? (string)providers[IMG_BASEURL] : null);
return img;
}
/**
* Creates a Cell.
* @param tag the tag
* @return a CellWrapper object
* @since 5.0.6
*/
virtual public CellWrapper CreateCell(String tag) {
return new CellWrapper(tag, chain);
}
// processing objects
/**
* Adds a link to the current paragraph.
* @since 5.0.6
*/
virtual public void ProcessLink() {
if (currentParagraph == null) {
currentParagraph = new Paragraph();
}
// The link provider allows you to do additional processing
ILinkProcessor i = null;
if (providers.ContainsKey(LINK_PROVIDER))
i = (ILinkProcessor) providers[LINK_PROVIDER];
if (i == null || !i.Process(currentParagraph, chain)) {
// sets an Anchor for all the Chunks in the current paragraph
String href = chain[HtmlTags.HREF];
if (href != null) {
foreach (Chunk ck in currentParagraph.Chunks) {
ck.SetAnchor(href);
}
}
}
// a link should be added to the current paragraph as a phrase
if (stack.Count == 0) {
// no paragraph to add too, 'a' tag is first element
Paragraph tmp = new Paragraph(new Phrase(currentParagraph));
currentParagraph = tmp;
} else {
Paragraph tmp = (Paragraph) stack.Pop();
tmp.Add(new Phrase(currentParagraph));
currentParagraph = tmp;
}
}
/**
* Fetches the List from the Stack and adds it to
* the TextElementArray on top of the Stack,
* or to the Document if the Stack is empty.
* @throws DocumentException
* @since 5.0.6
*/
virtual public void ProcessList() {
if (stack.Count == 0)
return;
IElement obj = stack.Pop();
if (!(obj is List)) {
stack.Push(obj);
return;
}
if (stack.Count == 0)
document.Add(obj);
else
((ITextElementArray) stack.Peek()).Add(obj);
}
/**
* Looks for the List object on the Stack,
* and adds the ListItem to the List.
* @throws DocumentException
* @since 5.0.6
*/
virtual public void ProcessListItem() {
if (stack.Count == 0)
return;
IElement obj = stack.Pop();
if (!(obj is ListItem)) {
stack.Push(obj);
return;
}
if (stack.Count == 0) {
document.Add(obj);
return;
}
ListItem item = (ListItem) obj;
IElement list = stack.Pop();
if (!(list is List)) {
stack.Push(list);
return;
}
((List) list).Add(item);
item.AdjustListSymbolFont();
stack.Push(list);
}
/**
* Processes an Image.
* @param img
* @param attrs
* @throws DocumentException
* @since 5.0.6
*/
virtual public void ProcessImage(Image img, IDictionary<String, String> attrs) {
IImageProcessor processor = null;
if (providers.ContainsKey(IMG_PROCESSOR))
processor = (IImageProcessor)providers[IMG_PROCESSOR];
if (processor == null || !processor.Process(img, attrs, chain, document)) {
String align;
attrs.TryGetValue(HtmlTags.ALIGN, out align);
if (align != null) {
CarriageReturn();
}
if (currentParagraph == null) {
currentParagraph = CreateParagraph();
}
currentParagraph.Add(new Chunk(img, 0, 0, true));
currentParagraph.Alignment = HtmlUtilities.AlignmentValue(align);
if (align != null) {
CarriageReturn();
}
}
}
/**
* Processes the Table.
* @throws DocumentException
* @since 5.0.6
*/
virtual public void ProcessTable() {
TableWrapper table = (TableWrapper) stack.Pop();
PdfPTable tb = table.CreateTable();
tb.SplitRows = true;
if (stack.Count == 0)
document.Add(tb);
else
((ITextElementArray) stack.Peek()).Add(tb);
}
/**
* Gets the TableWrapper from the Stack and adds a new row.
* @since 5.0.6
*/
virtual public void ProcessRow() {
List<PdfPCell> row = new List<PdfPCell>();
List<float> cellWidths = new List<float>();
bool percentage = false;
float width;
float totalWidth = 0;
int zeroWidth = 0;
TableWrapper table = null;
while (true) {
IElement obj = stack.Pop();
if (obj is CellWrapper) {
CellWrapper cell = (CellWrapper)obj;
width = cell.Width;
cellWidths.Add(width);
percentage |= cell.IsPercentage;
if (width == 0) {
zeroWidth++;
}
else {
totalWidth += width;
}
row.Add(cell.Cell);
}
if (obj is TableWrapper) {
table = (TableWrapper) obj;
break;
}
}
table.AddRow(row);
if (cellWidths.Count > 0) {
// cells come off the stack in reverse, naturally
totalWidth = 100 - totalWidth;
cellWidths.Reverse();
float[] widths = new float[cellWidths.Count];
bool hasZero = false;
for (int i = 0; i < widths.Length; i++) {
widths[i] = cellWidths[i];
if (widths[i] == 0 && percentage && zeroWidth > 0) {
widths[i] = totalWidth / zeroWidth;
}
if (widths[i] == 0) {
hasZero = true;
break;
}
}
if (!hasZero)
table.ColWidths = widths;
}
stack.Push(table);
}
// state variables and methods
/** Stack to keep track of table tags. */
private Stack<bool[]> tableState = new Stack<bool[]>();
/** Boolean to keep track of TR tags. */
private bool pendingTR = false;
/** Boolean to keep track of TD and TH tags */
private bool pendingTD = false;
/** Boolean to keep track of LI tags */
private bool pendingLI = false;
/**
* Boolean to keep track of PRE tags
* @since 5.0.6 renamed from isPRE
*/
private bool insidePRE = false;
/**
* Indicates if text needs to be skipped.
* @since iText 5.0.6 (private => protected)
*/
protected internal bool skipText = false;
/**
* Pushes the values of pendingTR and pendingTD
* to a state stack.
* @since 5.0.6
*/
virtual public void PushTableState() {
tableState.Push(new bool[] { pendingTR, pendingTD });
}
/**
* Pops the values of pendingTR and pendingTD
* from a state stack.
* @since 5.0.6
*/
virtual public void PopTableState() {
bool[] state = tableState.Pop();
pendingTR = state[0];
pendingTD = state[1];
}
/**
* @return the pendingTR
* @since 5.0.6
*/
virtual public bool IsPendingTR() {
return pendingTR;
}
/**
* @param pendingTR the pendingTR to set
* @since 5.0.6
*/
virtual public void SetPendingTR(bool pendingTR) {
this.pendingTR = pendingTR;
}
/**
* @return the pendingTD
* @since 5.0.6
*/
virtual public bool IsPendingTD() {
return pendingTD;
}
/**
* @param pendingTD the pendingTD to set
* @since 5.0.6
*/
virtual public void SetPendingTD(bool pendingTD) {
this.pendingTD = pendingTD;
}
/**
* @return the pendingLI
* @since 5.0.6
*/
virtual public bool IsPendingLI() {
return pendingLI;
}
/**
* @param pendingLI the pendingLI to set
* @since 5.0.6
*/
virtual public void SetPendingLI(bool pendingLI) {
this.pendingLI = pendingLI;
}
/**
* @return the insidePRE
* @since 5.0.6
*/
virtual public bool IsInsidePRE() {
return insidePRE;
}
/**
* @param insidePRE the insidePRE to set
* @since 5.0.6
*/
virtual public void SetInsidePRE(bool insidePRE) {
this.insidePRE = insidePRE;
}
/**
* @return the skipText
* @since 5.0.6
*/
virtual public bool IsSkipText() {
return skipText;
}
/**
* @param skipText the skipText to set
* @since 5.0.6
*/
virtual public void SetSkipText(bool skipText) {
this.skipText = skipText;
}
// static methods to parse HTML to a List of Element objects.
/** The resulting list of elements. */
protected List<IElement> objectList;
/**
* Parses an HTML source to a List of Element objects
* @param reader the HTML source
* @param style a StyleSheet object
* @return a List of Element objects
* @throws IOException
*/
public static List<IElement> ParseToList(TextReader reader, StyleSheet style) {
return ParseToList(reader, style, null);
}
/**
* Parses an HTML source to a List of Element objects
* @param reader the HTML source
* @param style a StyleSheet object
* @param providers map containing classes with extra info
* @return a List of Element objects
* @throws IOException
*/
public static List<IElement> ParseToList(TextReader reader, StyleSheet style,
Dictionary<String, Object> providers) {
return ParseToList(reader, style, null, providers);
}
/**
* Parses an HTML source to a List of Element objects
* @param reader the HTML source
* @param style a StyleSheet object
* @param tags a map containing supported tags and their processors
* @param providers map containing classes with extra info
* @return a List of Element objects
* @throws IOException
* @since 5.0.6
*/
public static List<IElement> ParseToList(TextReader reader, StyleSheet style,
IDictionary<String, IHTMLTagProcessor> tags, Dictionary<String, Object> providers) {
HTMLWorker worker = new HTMLWorker(null, tags, style);
worker.document = worker;
worker.SetProviders(providers);
worker.objectList = new List<IElement>();
worker.Parse(reader);
return worker.objectList;
}
// DocListener interface
/**
* @see com.itextpdf.text.ElementListener#add(com.itextpdf.text.Element)
*/
virtual public bool Add(IElement element) {
objectList.Add(element);
return true;
}
/**
* @see com.itextpdf.text.DocListener#close()
*/
virtual public void Close() {
}
/**
* @see com.itextpdf.text.DocListener#newPage()
*/
virtual public bool NewPage() {
return true;
}
/**
* @see com.itextpdf.text.DocListener#open()
*/
virtual public void Open() {
}
/**
* @see com.itextpdf.text.DocListener#resetPageCount()
*/
virtual public void ResetPageCount() {
}
/**
* @see com.itextpdf.text.DocListener#setMarginMirroring(bool)
*/
virtual public bool SetMarginMirroring(bool marginMirroring) {
return false;
}
/**
* @see com.itextpdf.text.DocListener#setMarginMirroring(bool)
* @since 2.1.6
*/
virtual public bool SetMarginMirroringTopBottom(bool marginMirroring) {
return false;
}
/**
* @see com.itextpdf.text.DocListener#setMargins(float, float, float, float)
*/
virtual public bool SetMargins(float marginLeft, float marginRight,
float marginTop, float marginBottom) {
return true;
}
/**
* @see com.itextpdf.text.DocListener#setPageCount(int)
*/
virtual public int PageCount {
set {
}
}
/**
* @see com.itextpdf.text.DocListener#setPageSize(com.itextpdf.text.Rectangle)
*/
virtual public bool SetPageSize(Rectangle pageSize) {
return true;
}
// deprecated methods
/**
* Sets the providers.
* @deprecated use SetProviders() instead
*/
virtual public void SetInterfaceProps(Dictionary<String, Object> providers) {
SetProviders(providers);
}
/**
* Gets the providers
* @deprecated use GetProviders() instead
*/
virtual public IDictionary<String, Object> GetInterfaceProps() {
return providers;
}
public virtual void Dispose() {
Close();
}
}
}发布于 2015-12-10 15:25:56
正如Chris提到的,HtmlWorker被废弃了。XmlWorker是将HTML转换为PDF的新方法。
https://stackoverflow.com/questions/34204392
复制相似问题