hiqpdf html conversion issue - c#

I am using HiQPdf to convert and combine a list of html page into one pdf document.
this is how i'm doing this:
public class HtmlToPdfEditor
{
private string _firstPage;
private string _secondPage;
//private const string _HiQPdfSerialNumber = "";
private PdfDocument _document;
public HtmlToPdfEditor(string firstPage, string secondPage)
{
_firstPage = firstPage;
_secondPage=secondPage;
}
public void ConvertAll(string outputPath)
{
HtmlToPdf htmlToPdfConverter = new HtmlToPdf();
_document = new PdfDocument();
//_document.SerialNumber = _HiQPdfSerialNumber;
string firstPageDoc = GetDocument(_firstPage, "firstPage.pdf");
string secondPageDoc = GetDocument(_secondPage, "secondtPage.pdf");
this.JoinDocument(PdfFromFile(firstPageDoc));
this.JoinDocument(PdfFromFile(secondPageDoc));
_document.WriteToFile(outputPath);
_document.Close();
_document = null;
}
private PdfDocument PdfFromFile(string path)
{
return PdfDocument.FromFile(path);
}
private int JoinDocument(PdfDocument document)
{
var nbPages = _document.Pages.Count;
_document.AddDocument(document);
document.Close();
return nbPages;
}
private string GetDocument(string content, string outputFile)
{
var baseUrl = "";
var htmlToPdfConverter = GetPdfExporter();
htmlToPdfConverter.ConvertHtmlToFile(content, baseUrl, outputFile);
return outputFile;
}
public HtmlToPdf GetPdfExporter()
{
HtmlToPdf htmlToPdfConverter = new HtmlToPdf();
//htmlToPdfConverter.SerialNumber = _HiQPdfSerialNumber;
htmlToPdfConverter.Document.PageSize = PdfPageSize.A4;
htmlToPdfConverter.Document.PageOrientation = PdfPageOrientation.Portrait;
htmlToPdfConverter.Document.Margins = new PdfMargins(2);
htmlToPdfConverter.HtmlLoadedTimeout = 60;
htmlToPdfConverter.TriggerMode = ConversionTriggerMode.WaitTime; //Time to load the html
htmlToPdfConverter.WaitBeforeConvert = 1;
return htmlToPdfConverter;
}
}
The issue here is that in the resulting document the page converted from html are displayed as empty pages, only google chrome display them correctly , in firefox these pages continue indefinitely in the loading state.
Notice that if I convert the Html to a PdfDocument instead of storing it to a file and then joining it. The resulting document is perfectly readable but unfortunately I can't use this method.
Any help will be much apreciated!! thx!!

Yes, that's correct, the PDF documents you add to a main document must remain opened until you close the main document.
If the PDF documents you merge are produced from HTML there is actually an easier way to merge the HTML documents in a PDF following the approach from Convert Many HTML to PDF example.
// create an empty PDF document
PdfDocument document = new PdfDocument();
// add a page to document
PdfPage page1 = document.AddPage(PdfPageSize.A4, new PdfDocumentMargins(5),
PdfPageOrientation.Portrait);
try
{
// set the document header and footer before
// adding any objects to document
SetHeader(document);
SetFooter(document);
// layout the HTML from URL 1
PdfHtml html1 = new PdfHtml(textBoxUrl1.Text);
PdfLayoutInfo html1LayoutInfo = page1.Layout(html1);
// determine the PDF page where to add URL 2
PdfPage page2 = null;
System.Drawing.PointF location2 = System.Drawing.PointF.Empty;
if (checkBoxNewPage.Checked)
{
// URL 2 is laid out on a new page with the selected orientation
page2 = document.AddPage(PdfPageSize.A4, new PdfDocumentMargins(5),
GetSelectedPageOrientation());
location2 = System.Drawing.PointF.Empty;
}
else
{
// URL 2 is laid out immediately after URL 1 and html1LayoutInfo
// gives the location where the URL 1 layout finished
page2 = document.Pages[html1LayoutInfo.LastPageIndex];
location2 = new System.Drawing.PointF(html1LayoutInfo.LastPageRectangle.X,
html1LayoutInfo.LastPageRectangle.Bottom);
}
// layout the HTML from URL 2
PdfHtml html2 = new PdfHtml(location2.X, location2.Y, textBoxUrl2.Text);
page2.Layout(html2);
// write the PDF document to a memory buffer
byte[] pdfBuffer = document.WriteToMemory();
// inform the browser about the binary data format
HttpContext.Current.Response.AddHeader("Content-Type", "application/pdf");
// let the browser know how to open the PDF document
HttpContext.Current.Response.AddHeader("Content-Disposition",
String.Format("attachment; filename=LayoutMultipleHtml.pdf;
size={0}",
pdfBuffer.Length.ToString()));
// write the PDF buffer to HTTP response
HttpContext.Current.Response.BinaryWrite(pdfBuffer);
// call End() method of HTTP response
// to stop ASP.NET page processing
HttpContext.Current.Response.End();
}
finally
{
document.Close();
}

Ok I resolved this issue by ensuring that all pdf documents to be merged are closed after closing the final document. In other words the method JoinDocument will no longer call document.Close(). I'll call it later after closing the final document (_document).

Related

hiqpdf, how to save file to server instead of attachment?

I am trying to save a PDF file to a server path rather than downloading it as an attachment, which I have made with the "hiqpdf" tool. :
protected void btnPrint_Click(object sender, EventArgs e)
{
// create the HTML to PDF converter
HtmlToPdf htmlToPdfConverter = new HtmlToPdf();
// set browser width
htmlToPdfConverter.BrowserWidth = int.Parse("805");
// set browser height if specified, otherwise use the default
//if (textBoxBrowserHeight.Text.Length > 0)
// htmlToPdfConverter.BrowserHeight = int.Parse(textBoxBrowserHeight.Text);
// set HTML Load timeout
htmlToPdfConverter.HtmlLoadedTimeout = int.Parse("180");
// set PDF page size and orientation
htmlToPdfConverter.Document.PageSize = PdfPageSize.A4;
htmlToPdfConverter.Document.PageOrientation = PdfPageOrientation.Portrait;
// set PDF page margins
htmlToPdfConverter.Document.Margins = new PdfMargins(0);
// set a wait time before starting the conversion
htmlToPdfConverter.WaitBeforeConvert = int.Parse("5");
// convert HTML to PDF
byte[] pdfBuffer = null;
//if (radioButtonConvertUrl.Checked)
//{
// convert URL to a PDF memory buffer
string url = System.Configuration.ConfigurationManager.AppSettings["SiteURL"] + "PrintFullApplicantProfile.aspx?MaidID=" + Request.QueryString["aid"].ToString();
pdfBuffer = htmlToPdfConverter.ConvertUrlToMemory(url);
// inform the browser about the binary data format
HttpContext.Current.Response.AddHeader("Content-Type", "application/pdf");
// let the browser know how to open the PDF document, attachment or inline, and the file name
//HttpContext.Current.Response.AddHeader("Content-Disposition", String.Format("{0}; filename=HtmlToPdf.pdf; size={1}",
// checkBoxOpenInline.Checked ? "inline" : "attachment", pdfBuffer.Length.ToString()));
HttpContext.Current.Response.AddHeader("Content-Disposition", String.Format("{0}; filename=Applicant-" + Request.QueryString["aid"].ToString() + ".pdf; size={1}", "attachment", pdfBuffer.Length.ToString()));
// write the PDF buffer to HTTP response
HttpContext.Current.Response.BinaryWrite(pdfBuffer);
// call End() method of HTTP response to stop ASP.NET page processing
HttpContext.Current.Response.End();
}
Any suggestion to save the file to the server path?
Thanks in advance.

iTextSharp 5 multiple signatures - last signature renders previous signatures invalid - Document has been altered or corrupted

I am trying to add multiple signatures with iTextSharp 5.5.13.1.
Only the last signature is valid.
And all previous signatures are invalid with the message:
"Document has been altered or corrupted since it was signed" - 1 Page(s) Modified
I don't necessarily need certified signatures.
I use signature append mode but still can't figure out what modifies the document.
In notepad the first part of document till the second signature seems unchanged.
The code I used is:
private string SignFile(string fileToSign, string certname, float xPercent, float yPercent, int page)
{
string signedFile = fileToSign.Replace(".pdf", ".signed.pdf");
using (PdfReader pdfReader = new PdfReader(fileToSign))
{
int pages = pdfReader.NumberOfPages;
var currentSignaturesCount = pdfReader.AcroFields.GetSignatureNames().Count();
using (FileStream signedPdf = new FileStream(signedFile, FileMode.Create, FileAccess.ReadWrite))
{
string tempDir = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), ".tempfiles");
Directory.CreateDirectory(tempDir);
string tempFileName = Path.Combine(tempDir, Guid.NewGuid().ToString("N") + ".pdf");
if (!File.Exists(tempFileName))
File.Create(tempFileName).Close();
using (PdfStamper pdfStamper = PdfStamper.CreateSignature(pdfReader, signedPdf, '\0', tempFileName, true)) // Append mode
{
// Add signature image
if (page <= pages && page > 0)
{
var pdfContentByte = pdfStamper.GetOverContent(page);
var pageSize = pdfReader.GetPageSize(i);
float pageWidth = pageSize.Width;
float pageHeight = pageSize.Height;
// GenerateStamp() = simplified function that will get a custom bitmap (code not included here)
System.Drawing.Bitmap img = GenerateStamp();
var image = iTextSharp.text.Image.GetInstance(img, true);
image.SetAbsolutePosition(xPercent * pageWidth, pageHeight - yPercent * pageHeight - image.ScaledHeight);
pdfContentByte.AddImage(image);
}
//Also tried adding the image directly to signatureAppearance
//signatureAppearance.SignatureGraphic = image;
//signatureAppearance.SetVisibleSignature(rectangle, page, signatureFieldName);
// and getting the error "Document has been altered or corrupted since it was signed"
PdfSignatureAppearance signatureAppearance = pdfStamper.SignatureAppearance;
signatureAppearance.Reason = "Test";
signatureAppearance.SignDate = DateTime.Now;
signatureAppearance.SignatureRenderingMode = PdfSignatureAppearance.RenderingMode.GRAPHIC;
signatureAppearance.Acro6Layers = false;
//Also tried like this:
//signatureAppearance.CertificationLevel = currentSignaturesCount == 0 ? PdfSignatureAppearance.CERTIFIED_FORM_FILLING_AND_ANNOTATIONS : PdfSignatureAppearance.NOT_CERTIFIED;
// with message: "There have been changes made to this document that invalidate the signature"
// sign document
try
{
X509Certificate2 cert = GetCertificateByName(certname);
Org.BouncyCastle.X509.X509CertificateParser cp = new Org.BouncyCastle.X509.X509CertificateParser();
Org.BouncyCastle.X509.X509Certificate[] chain = new Org.BouncyCastle.X509.X509Certificate[] { cp.ReadCertificate(cert.RawData) };
IExternalSignature externalSignature = new X509Certificate2Signature(cert, "SHA-256");
MakeSignature.SignDetached(signatureAppearance, externalSignature, chain, null, null, null, 0, CryptoStandard.CMS);
}
catch (Exception ex)
{
throw;
}
}
}
}
return signedFile;
}
Any help is appreciated. Thanks
Your code as is adds an image to the static content of a page. That is forbidden to do to a signed file. For details on allowed and disallowed changes to a signed PDF read this answer.
According to your code comments, though, you also tried to alternatively add the image to the signature appearance. That is not forbidden as such. But analyzing the provided example PDFs it becomes apparent that in this attempt additional content streams have been added to the page. Even though they essentially are empty, this is considered a change of page content which is disallowed.
As it turned out, you didn't add the image to the page content in this attempt but you still retrieved the OverContent of the page:
var pdfContentByte = pdfStamper.GetOverContent(page);
This operation already adds extra content streams to the page for the OverContent to come. Strictly speaking, therefore, the method should be named CreateOrGetOverContent instead...
After removing the GetOverContent call signing does not damage the previous signatures anymore.

All HTML Pages are not converting to PDF with SelectPdf

I am using the following code to convert large HTML content to PDF using iTextSharp SelectPDF. None of the HTML pages are correctly converted. The data vanishes after 6 pages into the PDF.
public static void CreatePDFFromHTMLFile(string HtmlStream, string FileName)
{
try
{
// read parameters from the webpage
string htmlString = HtmlStream;
string baseUrl = "";
string pdf_page_size = "A4";
PdfPageSize pageSize = (PdfPageSize)Enum.Parse(typeof(PdfPageSize),
pdf_page_size, true);
string pdf_orientation = "Portrait";
PdfPageOrientation pdfOrientation =
(PdfPageOrientation)Enum.Parse(typeof(PdfPageOrientation),
pdf_orientation, true);
int webPageWidth = 1024;
int webPageHeight = 0;
HtmlToPdf converter = new HtmlToPdf();
// set converter options
converter.Options.PdfPageSize = pageSize;
converter.Options.PdfPageOrientation = pdfOrientation;
converter.Options.WebPageWidth = webPageWidth;
converter.Options.WebPageHeight = webPageHeight;
// create a new pdf document converting an url
PdfDocument doc = converter.ConvertHtmlString(htmlString, baseUrl);
doc.Save(FileName);
doc.Close();
}
catch (Exception ex)
{
Tracing.HandleException(ex);
}
}
That is happening because you are using the community edition of SelectPdf which is free with the limitation that you can only convert to pdf up to 5-6 pages long, if you need more than that you will need to use the non-free one instead from here https://selectpdf.com/downloads/
The problem is that you are using SelectPdf community edition. According to SelectPdf (http://selectpdf.com/community-edition/), the free version is limited to 5 pages.
If you want to convert longer pages, you need to use the commercial edition, but that is not free.

Value of a string for file's location is nil but a stored value says it isn't

I'm trying to convert secured PDFs to XPS and back to PDF using FreeSpire and then combine them using iTextSharp. Below is my code snippet for converting various files.
char[] delimiter = { '\\' };
string WorkDir = #"C:\Users\*******\Desktop\PDF\Test";
Directory.SetCurrentDirectory(WorkDir);
string[] SubWorkDir = Directory.GetDirectories(WorkDir);
//convert items to PDF
foreach (string subdir in SubWorkDir)
{
string[] samplelist = Directory.GetFiles(subdir);
for (int f = 0; f < samplelist.Length - 1; f++)
{
if (samplelist[f].EndsWith(".doc") || samplelist[f].EndsWith(".DOC"))
{
Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
doc.LoadFromFile(sampleist[f], FileFormat.DOC);
doc.SaveToFile((Path.ChangeExtension(samplelist[f],".pdf")), FileFormat.PDF);
doc.Close();
}
. //other extension cases
.
.
else if (samplelist[f].EndsWith(".pdf") || sampleList[f].EndsWith(".PDF"))
{
PdfReader reader = new PdfReader(samplelist[f]);
bool PDFCheck = reader.IsOpenedWithFullPermissions;
reader.Close();
if (PDFCheck)
{
Console.WriteLine("{0}\\Full Permisions", Loan_list[f]);
reader.Close();
}
else
{
Console.WriteLine("{0}\\Secured", samplelist[f]);
Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
string path = Loan_List[f];
doc.LoadFromFile(samplelist[f]);
doc.SaveToFile((Path.ChangeExtension(samplelist[f], ".xps")), FileFormat.XPS);
doc.Close();
Spire.Pdf.PdfDocument doc2 = new Spire.Pdf.PdfDocument();
doc2.LoadFromFile((Path.ChangeExtension(samplelist[f], ".xps")), FileFormat.XPS);
doc2.SaveToFile(samplelist[f], FileFormat.PDF);
doc2.Close();
}
The issue is I get a Value cannot be null error in doc.LoadFromFile(samplelist[f]);.I have the string path = sampleList[f]; to check if samplelist[f] was empty but it was not. I tried to replace the samplelist[f] parameter with the variable named path but it also does not go though. I tested the PDF conversion on a smaller scale it it worked (see below)
string PDFDoc = #"C:\Users\****\Desktop\Test\Test\Test.PDF";
string XPSDoc = #"C:\Users\****\Desktop\Test\Test\Test.xps";
//Convert PDF file to XPS file
PdfDocument doc = new PdfDocument();
doc.LoadFromFile(PDFDoc);
doc.SaveToFile(XPSDoc, FileFormat.XPS);
doc.Close();
//Convert XPS file to PDF
PdfDocument doc2 = new PdfDocument();
doc2.LoadFromFile(XPSDoc, FileFormat.XPS);
doc2.SaveToFile(PDFDoc, FileFormat.PDF);
doc2.Close();
I would like to understand why I am getting this error and how to fix it.
There would be 2 solutions for the problem you are facing.
Get the Document in the Document Object not in PDFDocument. And then probably try to SaveToFile Something like this
Document document = new Document();
//Load a Document in document Object
document.SaveToFile("Sample.pdf", FileFormat.PDF);
You can use Stream for the same something like this
PdfDocument doc = new PdfDocument();
//Load PDF file from stream.
FileStream from_stream = File.OpenRead(Loan_list[f]);
//Make sure the Loan_list[f] is the complete path of the file with extension.
doc.LoadFromStream(from_stream);
//Save the PDF document.
doc.SaveToFile(Loan_list[f] + ".pdf",FileFormat.PDF);
Second approach is the easy one, but I would recommend you to use the first one as for obvious reasons like document will give better convertability than stream. Since the document have section, paragraph, page setup, text, fonts everything which need to be required to do a better or exact formatting required.

how can I put a content in a mergefield in docx

I'm developing a web application with asp.net and I have a file called Template.docx that works like a template to generate other reports. Inside this Template.docx I have some MergeFields (Title, CustomerName, Content, Footer, etc) to replace for some dynamic content in C#.
I would like to know, how can I put a content in a mergefield in docx ?
I don't know if MergeFields is the right way to do this or if there is another way. If you can suggest me, I appreciate!
PS: I have openxml referenced in my web application.
Edits:
private MemoryStream LoadFileIntoStream(string fileName)
{
MemoryStream memoryStream = new MemoryStream();
using (FileStream fileStream = File.OpenRead(fileName))
{
memoryStream.SetLength(fileStream.Length);
fileStream.Read(memoryStream.GetBuffer(), 0, (int) fileStream.Length);
memoryStream.Flush();
fileStream.Close();
}
return memoryStream;
}
public MemoryStream GenerateWord()
{
string templateDoc = "C:\\temp\\template.docx";
string reportFileName = "C:\\temp\\result.docx";
var reportStream = LoadFileIntoStream(templateDoc);
// Copy a new file name from template file
//File.Copy(templateDoc, reportFileName, true);
// Open the new Package
Package pkg = Package.Open(reportStream, FileMode.Open, FileAccess.ReadWrite);
// Specify the URI of the part to be read
Uri uri = new Uri("/word/document.xml", UriKind.Relative);
PackagePart part = pkg.GetPart(uri);
XmlDocument xmlMainXMLDoc = new XmlDocument();
xmlMainXMLDoc.Load(part.GetStream(FileMode.Open, FileAccess.Read));
// replace some keys inside xml (it will come from database, it's just a test)
xmlMainXMLDoc.InnerXml = xmlMainXMLDoc.InnerXml.Replace("field_customer", "My Customer Name");
xmlMainXMLDoc.InnerXml = xmlMainXMLDoc.InnerXml.Replace("field_title", "Report of Documents");
xmlMainXMLDoc.InnerXml = xmlMainXMLDoc.InnerXml.Replace("field_content", "Content of Document");
// Open the stream to write document
StreamWriter partWrt = new StreamWriter(part.GetStream(FileMode.Open, FileAccess.Write));
//doc.Save(partWrt);
xmlMainXMLDoc.Save(partWrt);
partWrt.Flush();
partWrt.Close();
reportStream.Flush();
pkg.Close();
return reportStream;
}
PS: When I convert MemoryStream to a file, I got a corrupted file. Thanks!
I know this is an old post, but I could not get the accepted answer to work for me. The project linked would not even compile (which someone has already commented in that link). Also, it seems to use other Nuget packages like WPFToolkit.
So I'm adding my answer here in case someone finds it useful. This only uses the OpenXML SDK 2.5 and also the WindowsBase v4. This works on MS Word 2010 and later.
string sourceFile = #"C:\Template.docx";
string targetFile = #"C:\Result.docx";
File.Copy(sourceFile, targetFile, true);
using (WordprocessingDocument document = WordprocessingDocument.Open(targetFile, true))
{
// If your sourceFile is a different type (e.g., .DOTX), you will need to change the target type like so:
document.ChangeDocumentType(WordprocessingDocumentType.Document);
// Get the MainPart of the document
MainDocumentPart mainPart = document.MainDocumentPart;
var mergeFields = mainPart.RootElement.Descendants<FieldCode>();
var mergeFieldName = "SenderFullName";
var replacementText = "John Smith";
ReplaceMergeFieldWithText(mergeFields, mergeFieldName, replacementText);
// Save the document
mainPart.Document.Save();
}
private void ReplaceMergeFieldWithText(IEnumerable<FieldCode> fields, string mergeFieldName, string replacementText)
{
var field = fields
.Where(f => f.InnerText.Contains(mergeFieldName))
.FirstOrDefault();
if (field != null)
{
// Get the Run that contains our FieldCode
// Then get the parent container of this Run
Run rFldCode = (Run)field.Parent;
// Get the three (3) other Runs that make up our merge field
Run rBegin = rFldCode.PreviousSibling<Run>();
Run rSep = rFldCode.NextSibling<Run>();
Run rText = rSep.NextSibling<Run>();
Run rEnd = rText.NextSibling<Run>();
// Get the Run that holds the Text element for our merge field
// Get the Text element and replace the text content
Text t = rText.GetFirstChild<Text>();
t.Text = replacementText;
// Remove all the four (4) Runs for our merge field
rFldCode.Remove();
rBegin.Remove();
rSep.Remove();
rEnd.Remove();
}
}
What the code above does is basically this:
Identify the 4 Runs that make up the merge field named "SenderFullName".
Identify the Run that contains the Text element for our merge field.
Remove the 4 Runs.
Update the text property of the Text element for our merge field.
UPDATE
For anyone interested, here is a simple static class I used to help me with replacing merge fields.
Frank Fajardo's answer was 99% of the way there for me, but it is important to note that MERGEFIELDS can be SimpleFields or FieldCodes.
In the case of SimpleFields, the text runs displayed to the user in the document are children of the SimpleField.
In the case of FieldCodes, the text runs shown to the user are between the runs containing FieldChars with the Separate and the End FieldCharValues. Occasionally, several text containing runs exist between the Separate and End Elements.
The code below deals with these problems. Further details of how to get all the MERGEFIELDS from the document, including the header and footer is available in a GitHub repository at https://github.com/mcshaz/SimPlanner/blob/master/SP.DTOs/Utilities/OpenXmlExtensions.cs
private static Run CreateSimpleTextRun(string text)
{
Run returnVar = new Run();
RunProperties runProp = new RunProperties();
runProp.Append(new NoProof());
returnVar.Append(runProp);
returnVar.Append(new Text() { Text = text });
return returnVar;
}
private static void InsertMergeFieldText(OpenXmlElement field, string replacementText)
{
var sf = field as SimpleField;
if (sf != null)
{
var textChildren = sf.Descendants<Text>();
textChildren.First().Text = replacementText;
foreach (var others in textChildren.Skip(1))
{
others.Remove();
}
}
else
{
var runs = GetAssociatedRuns((FieldCode)field);
var rEnd = runs[runs.Count - 1];
foreach (var r in runs
.SkipWhile(r => !r.ContainsCharType(FieldCharValues.Separate))
.Skip(1)
.TakeWhile(r=>r!= rEnd))
{
r.Remove();
}
rEnd.InsertBeforeSelf(CreateSimpleTextRun(replacementText));
}
}
private static IList<Run> GetAssociatedRuns(FieldCode fieldCode)
{
Run rFieldCode = (Run)fieldCode.Parent;
Run rBegin = rFieldCode.PreviousSibling<Run>();
Run rCurrent = rFieldCode.NextSibling<Run>();
var runs = new List<Run>(new[] { rBegin, rCurrent });
while (!rCurrent.ContainsCharType(FieldCharValues.End))
{
rCurrent = rCurrent.NextSibling<Run>();
runs.Add(rCurrent);
};
return runs;
}
private static bool ContainsCharType(this Run run, FieldCharValues fieldCharType)
{
var fc = run.GetFirstChild<FieldChar>();
return fc == null
? false
: fc.FieldCharType.Value == fieldCharType;
}
You could try http://www.codeproject.com/KB/office/Fill_Mergefields.aspx which uses the Open XML SDK to do this.

Categories

Resources