itextsharp html to pdf - c#

I want to change some HTML in a pdf. All my html is in HTML string but I don't know how to pass it in correctly within iTextSharp.
public void PDF()
{
// Create a doc object
var doc = new doc(PageSize.A4, 50, 50, 25, 25);
// Create a new PdfWrite object, writing the output to the file ~/PDFTemplate/SimpleFormFieldDemo.pdf
var output = new FileStream(Server.MapPath("t.pdf"), FileMode.Create);
var writer = PdfWriter.GetInstance(doc, output);
// Open the doc for writing
doc.Open();
//Add Wallpaper image to the pdf
var Wallpaper = iTextSharp.text.Image.GetInstance(Server.MapPath("hfc.png"));
Wallpaper.SetAbsolutePosition(0, 0);
Wallpaper.ScaleAbsolute(600, 840);
doc.Add(Wallpaper);
iTextSharp.text.html.simpleparser.HTMLWorker hw = new iTextSharp.text.html.simpleparser.HTMLWorker(doc);
StyleSheet css = new StyleSheet();
css.LoadTagStyle("body", "face", "Garamond");
css.LoadTagStyle("body", "encoding", "Identity-H");
css.LoadTagStyle("body", "size", "12pt");
hw.Parse(new StringReader(HTML));
doc.Close();
Response.Redirect("t.pdf");
}
If anyone knows how to make this work.. it be good.
Thanks
Dom

Please download The Best iText Questions on StackOverflow. It's a free ebook, you'll benefit from it.
Once you have downloaded is, go to the section entitled "Parsing XML and XHTML".
Allow me to quote from the answer to this question: RowSpan does not work in iTextSharp?
You are using HTMLWorker instead of XML Worker, and you are right:
HTMLWorker has no support for CSS. Saying CSS doesn't work in
iTextSharp is wrong. It doesn't work when you use HTMLWorker, but
that's documented: the CSS you need works in XML Worker.
Please throw away your code, and start anew using XML Worker.
There are many examples (simple ones as well as complex ones) in the book. Let me give you only one:
using (var fsOut = new FileStream(outputFile, FileMode.Create, FileAccess.Write))
using (var stringReader = new StringReader(result))
{
var document = new Document();
var pdfWriter = PdfWriter.GetInstance(document, fsOut);
pdfWriter.InitialLeading = 12.5f;
document.Open();
var xmlWorkerHelper = XMLWorkerHelper.GetInstance();
var cssResolver = new StyleAttrCSSResolver();
var xmlWorkerFontProvider = new XMLWorkerFontProvider();
foreach (string font in fonts)
{
xmlWorkerFontProvider.Register(font);
}
var cssAppliers = new CssAppliersImpl(xmlWorkerFontProvider);
var htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());
PdfWriterPipeline pdfWriterPipeline = new PdfWriterPipeline(document, pdfWriter);
HtmlPipeline htmlPipeline = new HtmlPipeline(htmlContext, pdfWriterPipeline);
CssResolverPipeline cssResolverPipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker xmlWorker = new XMLWorker(cssResolverPipeline, true);
XMLParser xmlParser = new XMLParser(xmlWorker);
xmlParser.Parse(stringReader);
document.Close();
}
}
(Source: iTextSharp XmlWorker: right-to-left)
If you want an easier example, take a look at the answers of these questions:
How to parse multiple HTML files into a single PDF?
How to add a rich Textbox (HTML) to a table cell?
...
The code that parses an HTML string and a CSS string to a list of iText(Sharp) elements is as simple as this:
ElementList list = XMLWorkerHelper.parseToElementList(html, css);
You can find more examples on the official iText web site.

Related

Parser results in "The document has no pages"

Trying to generate a PDF with HTML/CSS by using ITextSharp v5. The error I get is "Document has no pages". Is my parser set up wrong? How do I get the parsed HTML added to my document?
public void ConvertHtmlToPdf(string xHtml, string css)
{
using (var stream = new FileStream("App_Data/pdfs/testt.pdf", FileMode.Create))
{
using (var document = new Document(PageSize.A4, 10f, 10f, 10f, 0f))
{
var writer = PdfWriter.GetInstance(document, stream);
document.Open();
// instantiate custom tag processor and add to `HtmlPipelineContext`.
var tagProcessorFactory = Tags.GetHtmlTagProcessorFactory();
var htmlPipelineContext = new HtmlPipelineContext(null);
htmlPipelineContext.SetTagFactory(tagProcessorFactory);
var pdfWriterPipeline = new PdfWriterPipeline(document, writer);
var htmlPipeline = new HtmlPipeline(htmlPipelineContext, pdfWriterPipeline);
// get an ICssResolver and add the custom CSS
var cssResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(true);
cssResolver.AddCss(css, "utf-8", true);
var cssResolverPipeline = new CssResolverPipeline(
cssResolver, htmlPipeline
);
var worker = new XMLWorker(cssResolverPipeline, true);
var parser = new XMLParser(worker);
using (var stringReader = new StringReader(xHtml))
{
parser.Parse(stringReader);
}
document.Close();
writer.Close();
}
}
}
The document is empty as no data is written to it from Worker classes.
Immediately after opening the document, always add an empty chunk to document so that you can avoid this exception.
document.add(new Chunk(''));
To Convert HTML to PDF, you can check this example
How to convert HTML to PDF using iText

create multiple page pdf from asp.net mvc?

I was trying to create a pdf dynamically and send it by attaching in the mail.
This is my code and it works perfectly for me.
public ActionResult sendmail()
{
MemoryStream ms = new MemoryStream();
Document doc = new Document(PageSize.A4, 10f, 10f, 100f, 0.0f);
PdfWriter writer = PdfWriter.GetInstance(doc, ms);
doc.Open(); //open doc for editing
doc.Add(new Paragraph("First Paragraph"));
doc.Add(new Paragraph("Second Paragraph"));
writer.CloseStream = false; //important
doc.Close(); //build the doc.
ms.Position = 0;
SmtpClient smtpClient = new SmtpClient();
smtpClient.Host = "provider.com";
smtpClient.Credentials = new NetworkCredential("credentialmailid", "password");
MailMessage mailMessage = new MailMessage()
{
From = new MailAddress("from#gmail.com")
};
mailMessage.To.Add(new MailAddress("to#gmail.com"));
mailMessage.Subject = "Pdf attached";
mailMessage.Attachments.Add(new Attachment(ms, "pdfname.pdf"));
smtpClient.Send(mailMessage);
return RedirectToAction("index");
}
Now my issue is : Document that I have to send is a purchase confirmation . it will have 3 pages. Many headings and styles will be there in it.
also some values I have to pass dynamically like who purchased it and date like wise a lot datas should pass dynamically
How to do this? I thought to create an Html of pdf file to be send and use something like this add parameters dynamically...
string mailpath = Server.MapPath("~/Mail/HtmlOF_pdfToSend.html");
string mailbody = System.IO.File.ReadAllText(mailpath);
mailbody = mailbody.Replace("##CompanyName", "Bhavin Merchant");
mailbody = mailbody.Replace("##BusinessType", "Bhavin business");
Fist You have to add iTextSharp dll then u have to add some packages :
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.html;
using iTextSharp.text.xml;
using iTextSharp.text.html.simpleparser;
then as per your question. you want to pass dynamically values so i post some syntax as example :
// Create a Document object
var document = new Document(PageSize.A4, 50, 50, 25, 25);
var output = new MemoryStream();
// Create a new PdfWriter object, specifying the output stream
var writer = PdfWriter.GetInstance(document, output);
// Open the Document for writing
document.Open();
Suppose you have header in your pdf documnet so syntax will be :
var logo = iTextSharp.text.Image.GetInstance(Server.MapPath("~/images/it.jpg"));
logo.SetAbsolutePosition(300, 750);
document.Add(logo);
If you want to add phrase:
Phrase titl = new Phrase("\nE-Ticket\n");
titl.Font.SetStyle(Font.BOLD);
document.Add(titl);
Add lines :
Phrase titl1 = new Phrase("--------------------------------------------------------------------------------------\n\n");
titl1.Font.SetStyle(Font.BOLD);
document.Add(titl1);
Change the style of text :
Here you can change the font style & color.
Phrase title = new Phrase("Booking Date-" + txtDate1.Text + "\n");
title.Font.SetStyle(Font.BOLD);
document.Add(title);
If you want to add pdf table:-dt is data table.
PdfPTable UserInfoTable = new PdfPTable(dt.Columns.Count);
PdfPRow row = null;
UserInfoTable.AddCell(--add cell----);
document.Add(UserInfoTable);
Close the Document - this saves the document contents to the output stream
document.Close();
Response.ContentType = "application/pdf";
Response.AddHeader("Content-Disposition", string.Format("attachment;filename=Receipt-{0}.pdf", "hello"));
Response.BinaryWrite(output.ToArray())
Here I paste some example code as your question.
You can add more pages to the document like this:
doc.Open(); //open doc for editing
doc.Add(new Paragraph("First Paragraph"));
doc.newPage();
doc.add(new Paragraph("This is a new page =)"));

itextsharp(xmlworker) parsing is slow

I have been using iTextSharp for converting a MVC view to pdf .the view uses inline styling. Everything works fine with below code but the parsing is slow-
using (var ms = new MemoryStream())
{
using (var doc = new Document(PageSize.A4, 0, 1, 0,0))
{
using (var writer = PdfWriter.GetInstance(doc, ms))
{
doc.Open();
XMLWorkerHelper.GetInstance().ParseXHtml(writer,doc, htmlcontent);
//Above line is too slow
doc.Close();
}
}
as suggested by the experts here I moved on to below modifications-
registering fonts
Moved stylinging to diffrent css file
Now i am using the below code but the generated pdf is blank. it does retain the style but no fonts and even this approach takes same time to parse
using (var ms = new MemoryStream())
{
using (var doc = new Document(PageSize.A4, 0, 1, 0,0))
{
using (var writer = PdfWriter.GetInstance(doc, ms))
{
doc.Open();
// css
var cssResolver = new StyleAttrCSSResolver();
var cssFile = XMLWorkerHelper.GetCSS((new FileStream(Server.MapPath("~/Content/scptpdf.css"), FileMode.Open, FileAccess.Read)));
cssResolver.AddCss(cssFile);
// html
var fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
fontProvider.Register(Server.MapPath("~/Content/fonts/arial.ttf"));
fontProvider.Register(Server.MapPath("~/Content/fonts/arialbd."));
fontProvider.AddFontSubstitute("calibri","ARIAL");
var cssAppliers = new CssAppliersImpl(fontProvider);
var htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());
var pdf = new PdfWriterPipeline(doc, writer);
var html = new HtmlPipeline(htmlContext, pdf);
var css = new CssResolverPipeline(cssResolver, html);
var worker = new XMLWorker(css,true);
var p = new XMLParser(worker);
byte[] byteArray = Encoding.UTF8.GetBytes(pdftext);
var htmlstream = new MemoryStream(byteArray);
p.Parse(htmlstream);
//XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, htmlcontent);
doc.Close();
}
}
I need to over the latency. Can some help with this. Thanks in advance.
I removed the font types. Now iTEXT sharp uses its OWN . Its fast too.

Trying to debug some iTextSharp code that is designed to convert an web page into a PDF document via a string variable

I am stuck trying to debug some code that is designed to convert an web page into a PDF document via a string variable. It uses the iTextSharp c# tool (xmlworker) and is a modification of the example code that comes with the sourceforge source code called html2pdf.csproj. This example code converts an existing html file into a PDF file and saves it in the same directory as the file that was converted. I have a string variable containing html formatted text and I am trying to make it able to be returned as a byte array that will be passed to the client side in a web environment for printing purposes. The problem is that I get an "IOException was unhandled by user code" message that states "The document has no pages." I'm a little unsure what this is suppose to mean, nor how to go about diagnosing the problem. The example code using the file based system works and I have successfully converted a static version of the html string to PDF. Below is the modified code:
private byte[] createPDF(string html, string filename) {
MemoryStream msOutput = new MemoryStream();
string printPDFCSS = Server.MapPath("/content/printPDF.css");
Document doc = new Document(PageSize.LETTER);
doc.SetMargins(doc.LeftMargin, doc.RightMargin, 35, 0);
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, msOutput);
doc.Open();
Dictionary<String, String> substFonts = new Dictionary<String, String>();
substFonts["Arial Unicode MS"] = "Helvetica";
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.Add(XMLWorkerHelper.GetCSS(new FileStream(printPDFCSS, FileMode.Open)));
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new UnembedFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS, substFonts)));
hpc.SetImageProvider(new ImageProvider(filename));
hpc.SetAcceptUnknown(true).AutoBookmark(true).SetTagFactory(Tags.GetHtmlTagProcessorFactory());
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, pdfWriter));
IPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser xmlParse = new XMLParser(true, worker);
xmlParse.Parse(msOutput);
doc.Close();
return msOutput.ToArray();
}
Bellow is the code I finally found that works, there were a number of issues with the code above, but this seems to work:
private byte[] createPDF(string html, string filename){
MemoryStream msInput = new MemoryStream(ASCIIEncoding.Default.GetBytes(html));
MemoryStream msOutput = new MemoryStream();
string printPDFCSS = Server.MapPath("/content/printPDF.css");
Document doc = new Document(PageSize.LETTER);
doc.SetMargins(doc.LeftMargin, doc.RightMargin, 35, 0);
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, msOutput);
doc.Open();
Dictionary<String, String> substFonts = new Dictionary<String, String>();
substFonts["Arial Unicode MS"] = "Helvetica";
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.Add(XMLWorkerHelper.GetCSS(new FileStream(printPDFCSS, FileMode.Open)));
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new UnembedFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS, substFonts)));
hpc.SetImageProvider(new ImageProvider(filename));
hpc.SetAcceptUnknown(true).AutoBookmark(true).SetTagFactory(Tags.GetHtmlTagProcessorFactory());
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, pdfWriter));
IPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser xmlParse = new XMLParser(true, worker);
xmlParse.Parse(msInput);
doc.Close();
return msOutput.ToArray();
}

iTextSharp HTMLWorker ParseHTML Tablestyle and PDFStamper

Hi I have succesfully used a HTMLWorker to convert a gridview using asp.NET / C#.
(1) I have applied some limited style to the resulting table but cannot see how to apply tablestyle for instance grid lines or apply other formatting style such as a large column width for example for a particular column.
(2) I would actually like to put this text onto a pre-existing template which contains a logo etc. I've used PDF Stamper before for this but cannot see how I can use both PDFStamper and HTMLWorker at once. HTMLWorker needs a Document which implements iDocListener ... but that doesnt seem compatible with usign a PDFStamper. I guess what I am looking for is a way to create a PDFStamper, write title etc, then add the parsed HTML from the grid. The other problem is that the parsed content doesnt interact with the other stuff on the page. For instance below I add a title chunk to the page. Rather than starting below it, the parsed HTML writes over the top. How do I place / interact the parsed HTML content with the rest of what is on the PDF document ?
Thanks in advance
Rob
Here';s the code I have already
Document pdfDoc = new Document(PageSize.A4, 10f, 10f, 30f, 0f);
HTMLWorker htmlWorker = new HTMLWorker(pdfDoc);
StyleSheet styles = new StyleSheet();
styles.LoadTagStyle("th", "size", "12px");
styles.LoadTagStyle("th", "face", "helvetica");
styles.LoadTagStyle("span", "size", "10px");
styles.LoadTagStyle("span", "face", "helvetica");
styles.LoadTagStyle("td", "size", "10px");
styles.LoadTagStyle("td", "face", "helvetica");
htmlWorker.SetStyleSheet(styles);
PdfWriter.GetInstance(pdfDoc, HttpContext.Current.Response.OutputStream);
pdfDoc.Open();
//Title - but this gets obsured by data, doesnt move it down
Font font = new Font(Font.FontFamily.HELVETICA, 14, Font.BOLD);
Chunk chunk = new Chunk(title, font);
pdfDoc.Add(chunk);
//Body
htmlWorker.Parse(sr);
Let me first give you a couple of links to look over when you get a chance:
ItextSharp support for HTML and CSS
How to apply font properties on while passing html to pdf using itextsharp
These answers go deeper into what's going on and I recommend reading them when you get a chance. Specifically the second one will show you why you need to use pt instead of px.
To answer your first question let me show you a different way to use the HTMLWorker class. This class has a static method on it called ParseToList that will convert HTML to a List<IElement>. The objects in that list are all iTextSharp specific versions of your HTML. Normally you would do a foreach on those and just add them to a document but you can modify them before adding which is what you want to do. Below is code that takes a static string and does that:
string file1 = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "File1.pdf");
using (FileStream fs = new FileStream(file1, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
//Our HTML
string html = "<table><tr><th>First Name</th><th>Last Name</th></tr><tr><td>Chris</td><td>Haas</td></tr></table>";
//ParseToList requires a StreamReader instead of just a string so just wrap it
using (StringReader sr = new StringReader(html))
{
//Create a style sheet
StyleSheet styles = new StyleSheet();
//...styles omitted for brevity
//Convert our HTML to iTextSharp elements
List<IElement> elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
//Loop through each element (in this case there's actually just one PdfPTable)
foreach (IElement el in elements)
{
//If the element is a PdfPTable
if (el is PdfPTable)
{
//Cast it
PdfPTable tt = (PdfPTable)el;
//Change the widths, these are relative width by the way
tt.SetWidths(new float[] { 75, 25 });
}
//Add the element to the document
doc.Add(el);
}
}
doc.Close();
}
}
}
Hopefully you can see that once you get access to the raw PdfPTable you can tweak it as necessary.
To answer your second question, if you want to use the normal Paragraph and Chunk objects with a PdfStamper then you need to use a PdfContentByte object. You can get this from your stamper in one of two ways, either by asking for one that sits "above" existing content, stamper.GetOverContent(int) or one that sits "below" existing content, stamper.GetUnderContent(int). Both versions take a single parameter saying what page to work with. Once you have a PdfContentByte you can create a ColumnText object bound to it and use this object's AddElement() method to add your normal elements. Before doing this (and this answers your third question), you'll want to create at least one "column". When I do this I generally create one that essentially covers the entire page. (This part might sound weird but we're essentially make a single row, single column table cell to add our objects to.)
Below is a full working C# 2010 WinForms app targeting iTextSharp 5.1.1.0 that shows off everything above. First it creates a generic PDF on the desktop. Then it creates a second document based off of the first, adds a paragraph and then some HTML. See the comments in the code for any questions.
using System;
using System.Collections.Generic;
using System.Text;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.html.simpleparser;
using iTextSharp.text.pdf;
using System.IO;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//The two files that we are creating
string file1 = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "File1.pdf");
string file2 = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "File2.pdf");
//Create a base file to write on top of
using (FileStream fs = new FileStream(file1, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
doc.Add(new Paragraph("Hello world"));
doc.Close();
}
}
}
//Bind a reader to our first document
PdfReader reader = new PdfReader(file1);
//Create our second document
using (FileStream fs = new FileStream(file2, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (PdfStamper stamper = new PdfStamper(reader, fs))
{
StyleSheet styles = new StyleSheet();
//...styles omitted for brevity
//Our HTML
string html = "<table><tr><th>First Name</th><th>Last Name</th></tr><tr><td>Chris</td><td>Haas</td></tr></table>";
//ParseToList requires a StreamReader instead of just a string so just wrap it
using (StringReader sr = new StringReader(html))
{
//Get our raw PdfContentByte object letting us draw "above" existing content
PdfContentByte cb = stamper.GetOverContent(1);
//Create a new ColumnText object bound to the above PdfContentByte object
ColumnText ct = new ColumnText(cb);
//Get the dimensions of the first page of our source document
iTextSharp.text.Rectangle page1size = reader.GetPageSize(1);
//Create a single column object spanning the entire page
ct.SetSimpleColumn(0, 0, page1size.Width, page1size.Height);
ct.AddElement(new Paragraph("Hello world!"));
//Convert our HTML to iTextSharp elements
List<IElement> elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
//Loop through each element (in this case there's actually just one PdfPTable)
foreach (IElement el in elements)
{
//If the element is a PdfPTable
if (el is PdfPTable)
{
//Cast it
PdfPTable tt = (PdfPTable)el;
//Change the widths, these are relative width by the way
tt.SetWidths(new float[] { 75, 25 });
}
//Add the element to the ColumnText
ct.AddElement(el);
}
//IMPORTANT, this actually commits our object to the PDF
ct.Go();
}
}
}
this.Close();
}
}
}
protected void LinkPdf_Click(object sender, EventArgs e)
{
Response.ContentType = "application/pdf";
Response.AddHeader("content-disposition", "attachment;filename=TestPage.pdf");
Response.Cache.SetCacheability(HttpCacheability.NoCache);
StringWriter sw = new StringWriter();
HtmlTextWriter hw = new HtmlTextWriter(sw);
this.Page.RenderControl(hw);
StringReader sr = new StringReader(sw.ToString());
Document pdfDoc = new Document(PageSize.A4, 10f, 10f, 100f, 0f);
HTMLWorker htmlparser = new HTMLWorker(pdfDoc);
PdfWriter.GetInstance(pdfDoc, Response.OutputStream);
pdfDoc.Open();
htmlparser.Parse(sr);
pdfDoc.Close();
Response.Write(pdfDoc);
Response.End();
}

Categories

Resources