I need so help with my coding. I am trying to convert a HTML with Chinese Character to PDF. I manage to Convert but my chinese characters has disappear
This is my HTML file which i convert to string and i have set font with Arial Unicode MS in the td
string HTMLTemplate = "<table border=0 cellspacing='0' cellpadding='3'><tr><td style='width:100%;font: 10px/1.5em Verdana, Arial Unicode MS, Helvetica, sans-serif;'>GIGI无合约 F&B</td></tr></table>"
This is my code
XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
fontProvider.Register("C:\\fonts\\ARIALUNI.TTF");
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
iTextSharp.text.Document doc = new iTextSharp.text.Document(iTextSharp.text.PageSize.LETTER, 7, 7, 7, 7);
iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(doc, new FileStream(FilePath, FileMode.Create));
iTextSharp.tool.xml.pipeline.html.HtmlPipelineContext htmlContext = new iTextSharp.tool.xml.pipeline.html.HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(iTextSharp.tool.xml.html.Tags.GetHtmlTagProcessorFactory());
//create a cssresolver to apply css
iTextSharp.tool.xml.pipeline.css.ICSSResolver cssResolver = iTextSharp.tool.xml.XMLWorkerHelper.GetInstance().GetDefaultCssResolver(false);
//Create and attach pipline, without pipline parser will not work on css
iTextSharp.tool.xml.IPipeline pipeline = new iTextSharp.tool.xml.pipeline.css.CssResolverPipeline(cssResolver, new iTextSharp.tool.xml.pipeline.html.HtmlPipeline(htmlContext, new iTextSharp.tool.xml.pipeline.end.PdfWriterPipeline(doc, writer)));
//Create XMLWorker and attach a parser to it
iTextSharp.tool.xml.XMLWorker worker = new iTextSharp.tool.xml.XMLWorker(pipeline, true);
iTextSharp.tool.xml.parser.XMLParser xmlParser = new iTextSharp.tool.xml.parser.XMLParser(worker);
//All is well open documnet and start writing.
doc.Open();
xmlParser.Parse(new StringReader(AP_TEMPLATE_HTML));
doc.NewPage();
doc.Close();
Related
Using the following code to create a PDF document in C# using iText 5. The text does not render in the courier font. Why not?
private void SimpleFontDoc(string pdfDocPath)
{
Document doc = new Document(PageSize.LETTER, 10, 10, 42, 30);
var fs = new FileStream(pdfDocPath, FileMode.Create);
PdfWriter writer = PdfWriter.GetInstance(doc, fs);
doc.Open();
string[] lines = new string[]
{
"First text line",
"Second text line"
};
var font = FontFactory.GetFont("courier", 12.0f, BaseColor.BLACK);
foreach (var line in lines)
{
var para = new iTextSharp.text.Paragraph(line);
para.Font = font;
doc.Add(para);
}
doc.Close();
}
In iText5 you have to specify the font before adding text to the Paragraph element (or alternatively pass it to the constructor).
Change
var para = new iTextSharp.text.Paragraph(line);
para.Font = font;
into
var para = new iTextSharp.text.Paragraph(line, font);
i tried using iTextSharp to get the text from a pdf document,
it works great if the pdf file is with english text(latin chars).
If i try to get the text from a pdf doc with cyrillic characters the output is just question marks. Are there some settings to be made, or cyrillic isnt supported?
this is the code for creating the pdf:
string testText = "зззi";
string tmpFile = #"C:\items\test.pdf";
string myFont = #"C:\windows\fonts\verdana.ttf";
iTextSharp.text.Rectangle pgeSize = new iTextSharp.text.Rectangle(595, 792);
iTextSharp.text.Document doc = new iTextSharp.text.Document(pgeSize, 10, 10, 10, 10);
iTextSharp.text.pdf.PdfWriter wrtr;
wrtr = iTextSharp.text.pdf.PdfWriter.GetInstance(doc,
new System.IO.FileStream(tmpFile, System.IO.FileMode.Create));
doc.Open();
doc.NewPage();
iTextSharp.text.pdf.BaseFont bfR;
bfR = BaseFont.CreateFont(myFont, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
iTextSharp.text.BaseColor clrBlack =
new iTextSharp.text.BaseColor(0, 0, 0);
iTextSharp.text.Font fntHead =
new iTextSharp.text.Font(bfR, 34, iTextSharp.text.Font.NORMAL, clrBlack);
iTextSharp.text.Paragraph pgr =
new iTextSharp.text.Paragraph(testText, fntHead);
doc.Add(pgr);
doc.Close();
this is the code for retrieving the text:
PdfReader reader1
= new PdfReader("c:/items/test.pdf");
Console.WriteLine(PdfTextExtractor.GetTextFromPage(reader1, 1, new SimpleTextExtractionStrategy()));
Console.ReadLine();
the output is: ???i
EDIT 2
i managed to read text from the pdf i created, but still cant get the text from a random pdf. How can i check if that pdf provides the required info for text extraction?
I want to change some HTML in a pdf. All my html is in HTML string but I don't know how to pass it in correctly within iTextSharp.
public void PDF()
{
// Create a doc object
var doc = new doc(PageSize.A4, 50, 50, 25, 25);
// Create a new PdfWrite object, writing the output to the file ~/PDFTemplate/SimpleFormFieldDemo.pdf
var output = new FileStream(Server.MapPath("t.pdf"), FileMode.Create);
var writer = PdfWriter.GetInstance(doc, output);
// Open the doc for writing
doc.Open();
//Add Wallpaper image to the pdf
var Wallpaper = iTextSharp.text.Image.GetInstance(Server.MapPath("hfc.png"));
Wallpaper.SetAbsolutePosition(0, 0);
Wallpaper.ScaleAbsolute(600, 840);
doc.Add(Wallpaper);
iTextSharp.text.html.simpleparser.HTMLWorker hw = new iTextSharp.text.html.simpleparser.HTMLWorker(doc);
StyleSheet css = new StyleSheet();
css.LoadTagStyle("body", "face", "Garamond");
css.LoadTagStyle("body", "encoding", "Identity-H");
css.LoadTagStyle("body", "size", "12pt");
hw.Parse(new StringReader(HTML));
doc.Close();
Response.Redirect("t.pdf");
}
If anyone knows how to make this work.. it be good.
Thanks
Dom
Please download The Best iText Questions on StackOverflow. It's a free ebook, you'll benefit from it.
Once you have downloaded is, go to the section entitled "Parsing XML and XHTML".
Allow me to quote from the answer to this question: RowSpan does not work in iTextSharp?
You are using HTMLWorker instead of XML Worker, and you are right:
HTMLWorker has no support for CSS. Saying CSS doesn't work in
iTextSharp is wrong. It doesn't work when you use HTMLWorker, but
that's documented: the CSS you need works in XML Worker.
Please throw away your code, and start anew using XML Worker.
There are many examples (simple ones as well as complex ones) in the book. Let me give you only one:
using (var fsOut = new FileStream(outputFile, FileMode.Create, FileAccess.Write))
using (var stringReader = new StringReader(result))
{
var document = new Document();
var pdfWriter = PdfWriter.GetInstance(document, fsOut);
pdfWriter.InitialLeading = 12.5f;
document.Open();
var xmlWorkerHelper = XMLWorkerHelper.GetInstance();
var cssResolver = new StyleAttrCSSResolver();
var xmlWorkerFontProvider = new XMLWorkerFontProvider();
foreach (string font in fonts)
{
xmlWorkerFontProvider.Register(font);
}
var cssAppliers = new CssAppliersImpl(xmlWorkerFontProvider);
var htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());
PdfWriterPipeline pdfWriterPipeline = new PdfWriterPipeline(document, pdfWriter);
HtmlPipeline htmlPipeline = new HtmlPipeline(htmlContext, pdfWriterPipeline);
CssResolverPipeline cssResolverPipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker xmlWorker = new XMLWorker(cssResolverPipeline, true);
XMLParser xmlParser = new XMLParser(xmlWorker);
xmlParser.Parse(stringReader);
document.Close();
}
}
(Source: iTextSharp XmlWorker: right-to-left)
If you want an easier example, take a look at the answers of these questions:
How to parse multiple HTML files into a single PDF?
How to add a rich Textbox (HTML) to a table cell?
...
The code that parses an HTML string and a CSS string to a list of iText(Sharp) elements is as simple as this:
ElementList list = XMLWorkerHelper.parseToElementList(html, css);
You can find more examples on the official iText web site.
I am making a report.following is a code and sample.I am using html table for reports.When I run the code pdf is successfully generated but Arabic is not showing.Can you guide me how can i embed Arabic in it.Can you modify my following code which shows arabic data.
Response.ContentType = "application/pdf";
Response.AddHeader("content-disposition", "attachment;filename=TestPage.pdf");
Response.Cache.SetCacheability(HttpCacheability.NoCache);
StringWriter sw = new StringWriter();
HtmlTextWriter hw = new HtmlTextWriter(sw);
tblid1.RenderControl(hw);
StringReader sr = new StringReader(sw.ToString());
Document pdfDoc = new Document(PageSize.A4, 80f, 80f, -2f, 35f);
HTMLWorker htmlparser = new HTMLWorker(pdfDoc);
PdfWriter writer = PdfWriter.GetInstance(pdfDoc, Response.OutputStream);
pdfDoc.Open();
htmlparser.Parse(sr);
pdfDoc.Close();
Response.Write(pdfDoc);
Response.End();
<table id="tblid1" runat="server">
<tr>
<td>سلطانالخارج</td>
<td>مسندم</td>
</tr>
</table>
You would need to embed a font into your pdf that supports arabic glyphs.
string fontpath = Environment.GetEnvironmentVariable( "SystemRoot" ) + "\\fonts\\arabtype.ttf";
BaseFont basefont = BaseFont.CreateFont( fontpath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED );
Font arabicFont = new Font( basefont, 10f, Font.NORMAL );
Answer found in thread: Itextsharp and arabic character?
EDIT: This is how I would do it based on the examples I could find and what you're trying to accomplish:
using(WebClient client = new WebClient()) {
string htmlString = client.DownloadString(url);
}
FontFactory.Register("c:/windows/fonts/arabtype.TTF");
StyleSheet style = new StyleSheet();
style.LoadTagStyle("body", "face", "%NAME OF ARABIC FONT%");
style.LoadTagStyle("body", "encoding", BaseFont.IDENTITY_H);
using (Document document = new Document(PageSize.A4, 80f, 80f, -2f, 35f)) {
PdfWriter writer = PdfWriter.GetInstance(
document, Response.OutputStream
);
document.Open();
foreach(IElement element in HTMLWorker.ParseToList(
new StringReader(htmlString), style))
{
document.Add(element);
}
}
Note that you would need to ensure you are registering the correct TTF file that contains the encoding for arabic characters and that you would need to replace %NAME OF ARABIC FONT% with the name of the font you're using, and replace %VARIABLE CONTAINING YOUR RAW HTML% with the actual HTML.
var arialFontPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Fonts), "ARIALUNI.TTF");
FontFactory.Register(arialFontPath);
BaseFont bf = BaseFont.CreateFont(arialFontPath, BaseFont.IDENTITY_H, true);
iTextSharp.text.Font FontAr = new iTextSharp.text.Font(bf);
iTextSharp.text.FontFactory.RegisterDirectory(arialFontPath);
StyleSheet styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.DIV, HtmlTags.FONTSIZE, "16");
styles.LoadTagStyle(HtmlTags.DIV, HtmlTags.COLOR, "navy");
styles.LoadTagStyle(HtmlTags.DIV, HtmlTags.FONTWEIGHT, "bold");
styles.LoadTagStyle(HtmlTags.P, HtmlTags.INDENT, "30px");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.FACE, "Arial Unicode MS");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
List<IElement> htmlarraylist = HTMLWorker.ParseToList(new StringReader(htmlText), styles);
for (int k = 0; k < htmlarraylist.Count; k++)
{
pdfDocument.Add((IElement)htmlarraylist[k]);
}
this piece of code worked for me, arabic language works perfect in this,
just pass your html text to my htmltext Variable..
Hopefully it will work fine.
I am stuck trying to debug some code that is designed to convert an web page into a PDF document via a string variable. It uses the iTextSharp c# tool (xmlworker) and is a modification of the example code that comes with the sourceforge source code called html2pdf.csproj. This example code converts an existing html file into a PDF file and saves it in the same directory as the file that was converted. I have a string variable containing html formatted text and I am trying to make it able to be returned as a byte array that will be passed to the client side in a web environment for printing purposes. The problem is that I get an "IOException was unhandled by user code" message that states "The document has no pages." I'm a little unsure what this is suppose to mean, nor how to go about diagnosing the problem. The example code using the file based system works and I have successfully converted a static version of the html string to PDF. Below is the modified code:
private byte[] createPDF(string html, string filename) {
MemoryStream msOutput = new MemoryStream();
string printPDFCSS = Server.MapPath("/content/printPDF.css");
Document doc = new Document(PageSize.LETTER);
doc.SetMargins(doc.LeftMargin, doc.RightMargin, 35, 0);
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, msOutput);
doc.Open();
Dictionary<String, String> substFonts = new Dictionary<String, String>();
substFonts["Arial Unicode MS"] = "Helvetica";
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.Add(XMLWorkerHelper.GetCSS(new FileStream(printPDFCSS, FileMode.Open)));
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new UnembedFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS, substFonts)));
hpc.SetImageProvider(new ImageProvider(filename));
hpc.SetAcceptUnknown(true).AutoBookmark(true).SetTagFactory(Tags.GetHtmlTagProcessorFactory());
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, pdfWriter));
IPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser xmlParse = new XMLParser(true, worker);
xmlParse.Parse(msOutput);
doc.Close();
return msOutput.ToArray();
}
Bellow is the code I finally found that works, there were a number of issues with the code above, but this seems to work:
private byte[] createPDF(string html, string filename){
MemoryStream msInput = new MemoryStream(ASCIIEncoding.Default.GetBytes(html));
MemoryStream msOutput = new MemoryStream();
string printPDFCSS = Server.MapPath("/content/printPDF.css");
Document doc = new Document(PageSize.LETTER);
doc.SetMargins(doc.LeftMargin, doc.RightMargin, 35, 0);
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, msOutput);
doc.Open();
Dictionary<String, String> substFonts = new Dictionary<String, String>();
substFonts["Arial Unicode MS"] = "Helvetica";
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.Add(XMLWorkerHelper.GetCSS(new FileStream(printPDFCSS, FileMode.Open)));
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new UnembedFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS, substFonts)));
hpc.SetImageProvider(new ImageProvider(filename));
hpc.SetAcceptUnknown(true).AutoBookmark(true).SetTagFactory(Tags.GetHtmlTagProcessorFactory());
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, pdfWriter));
IPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser xmlParse = new XMLParser(true, worker);
xmlParse.Parse(msInput);
doc.Close();
return msOutput.ToArray();
}