ItextSharper PDF to Html Convert

ItextSharper PDF to Html Convert - c#

I should convert PDF File to Html File. I'm coding a library using itextSharper. I found codes about it but with this code its only convert texts in PDF. I need to convert all page(image tables and another things) How can I do that.
This is my code:
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
public class PdfToHtmlManager
{
public string ConvertPdf(string inputFileName, string outputFileName)
{
if (string.IsNullOrEmpty(inputFileName) || string.IsNullOrEmpty(outputFileName))
{
return "File name error";
}
else if (!File.Exists(inputFileName))
{
return "File is not exist";
}
else
{
PdfReader pr = new PdfReader(inputFileName);
int maxPage = pr.NumberOfPages;
pr.Close();
StreamWriter outFile = new StreamWriter(outputFileName, true, System.Text.Encoding.UTF8);
int page = 1;
while (page<=maxPage)
{
ITextExtractionStrategy its = new SimpleTextExtractionStrategy();
PdfReader reader = new PdfReader(inputFileName);
outFile.Write(PdfTextExtractor.GetTextFromPage(reader, page, its));
reader.Close();
page++;
}
outFile.Close();
}
return "success";
}
}

Related

How do I pull all values from PDF?

I have a working solution that opens a PDF file and grabs the text. Unfortunately the values that I need are in form fields. I've tried a few ways to get the values but I can only get what appears to be the form name. The key values are correct, but the value received is wrong.
Key ValueReturned Company Name iText.Forms.Fields.PdfTextFormField
Phone Number iText.Forms.Fields.PdfTextFormField Business Contact
Data iText.Forms.Fields.PdfTextFormField Name
iText.Forms.Fields.PdfTextFormField
The values in the form fields are not being returned. Is there a better way to do this?
using System;
using System.Collections.Generic;
using iText.Forms;
using iText.Forms.Fields;
using iText.Kernel.Pdf;
namespace ConsoleApplication1 {
class Class1 {
public string pdfthree(string pdfPath) {
PdfReader reader = new PdfReader(pdfPath);
PdfDocument document = new PdfDocument(reader);
PdfAcroForm acroForm = PdfAcroForm.GetAcroForm(document, false);
IDictionary<string, PdfFormField> Map = new Dictionary<string, PdfFormField>();
Map = acroForm.GetFormFields();
acroForm.GetField("Name");
string output = "";
foreach (String fldName in Map.Keys) {
output += fldName + ": " + Map[fldName].ToString() + "\n";
}
System.IO.File.WriteAllText(pdfPath, output);
document.Close();
reader.Close();
return output;
}
}
}

Instead of calling PdfFormField#ToString(), you should call PdfFormField#GetValueAsString() to get the value of the field.
Full code:
using System;
using System.Collections.Generic;
using iText.Forms;
using iText.Forms.Fields;
using iText.Kernel.Pdf;
namespace ConsoleApplication1 {
class Class1 {
public string pdfthree(string pdfPath) {
PdfReader reader = new PdfReader(pdfPath);
PdfDocument document = new PdfDocument(reader);
PdfAcroForm acroForm = PdfAcroForm.GetAcroForm(document, false);
IDictionary<string, PdfFormField> Map = new Dictionary<string, PdfFormField>();
Map = acroForm.GetFormFields();
acroForm.GetField("Name");
string output = "";
foreach (String fldName in Map.Keys) {
output += fldName + ": " + Map[fldName].GetValueAsString() + "\n";
}
System.IO.File.WriteAllText(pdfPath, output);
document.Close();
reader.Close();
return output;
}
}
}

How to update custom Excel ribbon

I need update custom Excel ribbon of working file without closing file from another Excel file.
I try make this task using Open XML SDK for Office.
Here class that export and import custom ribbon.
public static class OpenDocumentTools
{
public static bool ExportCustomRibbon(string fileName, string exportDir)
{
try
{
//Get RibbonExtensibilityPart
SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, true);
RibbonExtensibilityPart ribbon = spreadsheetDocument.RibbonExtensibilityPart;
//Clear directory
foreach (string fil in Directory.EnumerateFiles(exportDir).ToList())
File.Delete(fil);
//Processing images
foreach (ImagePart imagePart in ribbon.ImageParts.ToList())
{
Image img = Image.FromStream(imagePart.GetStream());
string[] arrPath = imagePart.Uri.OriginalString.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
string imageName = arrPath[arrPath.Length - 1];
img.Save(string.Format("{0}\\{1}", exportDir, imageName));
}
//Save ribbon xml
StreamWriter sw = File.CreateText(string.Format("{0}\\Ribbon.xml", exportDir));
sw.WriteLine(ribbon.CustomUI.OuterXml);
sw.Close();
return true;
}
catch(Exception ex)
{
return false;
}
}
public static bool ImportCustomRibbon(string fileName, string xmlFile, string importDir)
{
try
{
//Read xml content
string content = File.OpenText(xmlFile).ReadToEnd();
using (SpreadsheetDocument document = SpreadsheetDocument.Open(fileName, true))
{
//Get or create RibbonExtensibilityPart
RibbonExtensibilityPart ribbon = document.RibbonExtensibilityPart;
if (ribbon == null)
ribbon = document.AddRibbonExtensibilityPart();
//Load xml content to ribbon
ribbon.CustomUI = new CustomUI(content);
ribbon.CustomUI.Save();
//Remove image parts
foreach (ImagePart imagePart in ribbon.ImageParts.ToList())
ribbon.DeletePart(imagePart);
//Load images to ribbon
foreach (string fileNam in Directory.EnumerateFiles(importDir).ToList())
{
ImagePart imagePart = null;
switch (Path.GetExtension(fileNam))
{
case ".png":
imagePart = ribbon.AddImagePart(ImagePartType.Png, Path.GetFileNameWithoutExtension(fileNam));
break;
case ".jpeg":
imagePart = ribbon.AddImagePart(ImagePartType.Jpeg, Path.GetFileNameWithoutExtension(fileNam));
break;
default:
continue;
}
if (imagePart != null)
{
using (FileStream stream = new FileStream(fileNam, FileMode.Open))
{
stream.Position = 0;
imagePart.FeedData(stream);
}
ribbon.AddPart(imagePart, Path.GetFileNameWithoutExtension(fileNam));
}
}
}
return true;
}
catch(Exception ex)
{
return false;
}
}
}
Here calling code:
static void Main(string[] args)
{
bool res = OpenDocumentTools.ExportCustomRibbon(#"D:\15\Ribbon\Source.xlsm", #"D:\15\Ribbon\Export");
res = OpenDocumentTools.ImportCustomRibbon(#"D:\15\Ribbon\Dest.xlsm", #"D:\15\Ribbon\Export\Ribbon.xml", #"D:\15\Ribbon\Export");
}
When Excel-files are closed, code working fine. But if Excel-file is open, I have exception:
"The process cannot access the file 'D:\15\Ribbon\Dest.xlsm' because it is being used by another process.” This is occurred, when programm trying execute “using (SpreadsheetDocument document = SpreadsheetDocument.Open(fileName, true))”.
Does anybody know what way I can use for updating ribbon without closing file?

convert a jpg/png/txt or any file format to pdf using mvc5

How do i convert a jpg/png/txt or any file format to pdf using mvc c#.
Here is the code:
public ActionResult SaveProfileDocument(string code)
{
bool isSavedSuccessfully = true;
string fName = "";
string _documentname = String.Empty;
try
{
foreach (string fileName in Request.Files)
{
HttpPostedFileBase file = Request.Files[fileName];
//Save file content goes here
fName = file.FileName;
if (file != null && file.ContentLength > 0)
{
var originalDirectory = new DirectoryInfo(string.Format("{0}Documents\\Profile\\" + code, Server.MapPath(#"\")));
string pathString = System.IO.Path.Combine(originalDirectory.ToString());
var fileName1 = Path.GetFileName(file.FileName);
bool isExists = System.IO.Directory.Exists(pathString);
if (!isExists)
System.IO.Directory.CreateDirectory(pathString);
_documentname=fName;
var path = string.Format("{0}\\{1}", pathString, file.FileName);
if (System.IO.File.Exists(path)) {
_documentname=Guid.NewGuid()+"_"+file.FileName;
var path2 = string.Format("{0}\\{1}", pathString,_documentname );
file.SaveAs(path2);
}
else {
file.SaveAs(path);
}
}
}
}
catch (Exception ex)
{
isSavedSuccessfully = false;
}
if (isSavedSuccessfully)
{
return Json(new { Message = fName, documentname = _documentname });
}
else
{
return Json(new { Message = "Error in saving file", documentname=""});
}
}
In the above code i am saving the file.but
here i need to convert the file and then save.
so for convert i need a separate class or method here only call that method.
The thing is that while upload a file inthat time need to convert pdf any file to convert pdf. and save in folder or whatever.

can't convert an image file to PDF. You can create a PDF file and add the image file to it:
string pdfpath = Server.MapPath("PDFs");
string imagepath = Server.MapPath("Images");
Document doc = new Document();
try
{
PdfWriter.GetInstance(doc, new FileStream(pdfpath + "/Images.pdf", FileMode.Create));
doc.Open();
doc.Add(new Paragraph("GIF"));
Image gif = Image.GetInstance(imagepath + "/mikesdotnetting.gif");
doc.Add(gif);
}
catch (Exception ex)
{
//Log error;
}
finally
{
doc.Close();
}
here i am refer:
https://www.mikesdotnetting.com/article/87/itextsharp-working-with-images

send pdf statement without saving on application server

Requirment: To generate invoice in pdf format on company template and send it in email.
Approach I used:
Placed the company template at path: ~Content/InvoiceTemplate/
Using iTextsharp Pdf stamper, generated pdf, saved it at path: ~/Content/reports/
In email module, picked the file generated above and attached to email to be sent
Problem: Every invoice generated is being stored on application server, making application heavier day by day.
Question: What is the other way out to send the generated in voice in email, without saving it on application server?
Code:
public static void WriteInTemplate(List<Models.Statement> statementList)
{
try
{
string invoiceNumber = statementList.FirstOrDefault().Invoice.ToString().Trim();
using (Document document = new Document())
{
FileStream fileStream = new FileStream(HostingEnvironment.MapPath("~/Content/reports/" + invoiceNumber + ".pdf"), FileMode.Create);
using (PdfSmartCopy smartCopy = new PdfSmartCopy(document, fileStream))
{
document.Open();
int statementCounter = 0;
int numberOfItems = statementList.Count();
int remainingItems = numberOfItems;
int maxItemsPerPage = 17;
if (remainingItems > 0)
{
do
{
if (remainingItems < maxItemsPerPage)
maxItemsPerPage = remainingItems;
PdfReader pdfReader = new PdfReader(HostingEnvironment.MapPath("~/Content/InvoiceTemplate/invoiceTemplate.pdf"));
using (var memoryStream = new MemoryStream())
{
using (PdfStamper pdfStamper = new PdfStamper(pdfReader, memoryStream))
{
string month = null;
string day = null;
string year = null;
AcroFields pdfFields = pdfStamper.AcroFields;
{//billing address
pdfFields.SetField("BillToCompany", statementList.FirstOrDefault().BillToCompany.ToString().Trim().ToUpper());
pdfFields.SetField("BillToContact", statementList.FirstOrDefault().BillToContact.ToString().Trim().ToUpper());
}
//---------------------snip------------------------------//
//---------------------snip------------------------------//
}
{//invoice sum up
double subTotal = Convert.ToDouble(statementList.FirstOrDefault().Subtotal);
pdfFields.SetField("Subtotal", statementList.FirstOrDefault().Subtotal.ToString("0.00").Trim());
double misc = Convert.ToDouble(statementList.FirstOrDefault().Misc);
pdfFields.SetField("Misc", statementList.FirstOrDefault().Misc.ToString("0.00").Trim());
double tax = Convert.ToDouble(statementList.FirstOrDefault().Tax);
pdfFields.SetField("Tax", statementList.FirstOrDefault().Tax.ToString("0.00").Trim());
}
pdfStamper.FormFlattening = true; // generate a flat PDF
}
pdfReader = new PdfReader(memoryStream.ToArray());
smartCopy.AddPage(smartCopy.GetImportedPage(pdfReader, 1));
}
remainingItems = remainingItems - maxItemsPerPage;
} while (remainingItems > 0);
}
}
}
emailController.CreateMessageWithAttachment(invoiceNumber);
}
catch (Exception e)
{
}
}

You can try to attach the file from a memory stream. You can search Google for "C# Attach file from memory stream".
Here is a sample snippet:
mail.Attachments.Add(new Attachment(memoryStream, "example.txt", "text/plain"));
Or:
email attachment from the MemoryStream comes empty
http://social.msdn.microsoft.com/Forums/en-US/netfxbcl/thread/049420de-7e93-4fcb-9920-0c1cdf4ca420/
http://www.codeproject.com/KB/IP/InMemoryMailAttachment.aspx

If the pdf files aren't too large, and you're not using a server farm, and you don't have millions of people generating invoices at the same time..
Then you could always use a MemoryStream and pass the memory stream to your email service.

Instead of creating a file in your application directory you should try creating files in temp folder.. and when you are done with the file you should delete them.. this way files won't take so much space on your drive..
this is a tempfile class that i have used with iTextSharp to export pdf after filling the form.
sealed class TempFile : IDisposable
{
string path;
public TempFile() : this(System.IO.Path.GetTempFileName()) { }
public TempFile(string path)
{
if (string.IsNullOrEmpty(path)) throw new ArgumentNullException("path");
this.path = path;
}
public string Path
{
get
{
if (path == null) throw new ObjectDisposedException(GetType().Name);
return path;
}
}
~TempFile() { Dispose(false); }
public void Dispose() { Dispose(true); }
private void Dispose(bool disposing)
{
if (disposing)
{
GC.SuppressFinalize(this);
}
if (path != null)
{
try { File.Delete(path); }
catch { } // best effort
path = null;
}
}
}
you should try
using(TempFile file = new TempFile())
{
.....= new FileStream(file.Path,.....)
//pdf form filling using iTextSharp
var arry = GetBytesArray(file.Path);
//Send Array to response and set content type to pdf..
}

itextsharp trimming pdf document's pages

I have a pdf document that has form fields that I'm filling out programatically with c#. Depending on three conditions, I need to trim (delete) some of the pages from that document.
Is that possible to do?
for condition 1: I need to keep pages 1-4 but delete pages 5 and 6
for condition 2: I need to keep pages 1-4 but delete 5 and keep 6
for condition 3: I need to keep pages 1-5 but delete 6

Use PdfReader.SelectPages() combined with PdfStamper. The code below uses iTextSharp 5.5.1.
public void SelectPages(string inputPdf, string pageSelection, string outputPdf)
{
using (PdfReader reader = new PdfReader(inputPdf))
{
reader.SelectPages(pageSelection);
using (PdfStamper stamper = new PdfStamper(reader, File.Create(outputPdf)))
{
stamper.Close();
}
}
}
Then you call this method with the correct page selection for each condition.
Condition 1:
SelectPages(inputPdf, "1-4", outputPdf);
Condition 2:
SelectPages(inputPdf, "1-4,6", outputPdf);
or
SelectPages(inputPdf, "1-6,!5", outputPdf);
Condition 3:
SelectPages(inputPdf, "1-5", outputPdf);
Here's the comment from the iTextSharp source code on what makes up a page selection. This is in the SequenceList class which is used to process a page selection:
/**
* This class expands a string into a list of numbers. The main use is to select a
* range of pages.
* <p>
* The general systax is:<br>
* [!][o][odd][e][even]start-end
* <p>
* You can have multiple ranges separated by commas ','. The '!' modifier removes the
* range from what is already selected. The range changes are incremental, that is,
* numbers are added or deleted as the range appears. The start or the end, but not both, can be ommited.
*/

Instead of deleting pages in a document what you actually do is create a new document and only import the pages that you want to keep. Below is a full working WinForms app that does that (targetting iTextSharp 5.1.1.0). The last parameter to the function removePagesFromPdf is an array of pages to keep.
The code below works off of physical files but would be very easy to convert to something based on streams so that you don't have to write to disk if you don't want to.
using System;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Windows.Forms;
using iTextSharp.text.pdf;
using iTextSharp.text;
namespace Full_Profile1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//The files that we are working with
string sourceFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string sourceFile = Path.Combine(sourceFolder, "Test.pdf");
string destFile = Path.Combine(sourceFolder, "TestOutput.pdf");
//Remove all pages except 1,2,3,4 and 6
removePagesFromPdf(sourceFile, destFile, 1, 2, 3, 4, 6);
this.Close();
}
public void removePagesFromPdf(String sourceFile, String destinationFile, params int[] pagesToKeep)
{
//Used to pull individual pages from our source
PdfReader r = new PdfReader(sourceFile);
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
//Open the desitination for writing
doc.Open();
//Loop through each page that we want to keep
foreach (int page in pagesToKeep)
{
//Add a new blank page to destination document
doc.NewPage();
//Extract the given page from our reader and add it directly to the destination PDF
w.DirectContent.AddTemplate(w.GetImportedPage(r, page), 0, 0);
}
//Close our document
doc.Close();
}
}
}
}
}
}

Here is the code I use to copy all but the last page of an existing PDF. Everything is in memory streams. The variable pdfByteArray is a byte[] of the original pdf obtained using ms.ToArray(). pdfByteArray is overwritten with the new PDF.
PdfReader originalPDFReader = new PdfReader(pdfByteArray);
using (MemoryStream msCopy = new MemoryStream())
{
using (Document docCopy = new Document())
{
using (PdfCopy copy = new PdfCopy(docCopy, msCopy))
{
docCopy.Open();
for (int pageNum = 1; pageNum <= originalPDFReader.NumberOfPages - 1; pageNum ++)
{
copy.AddPage(copy.GetImportedPage(originalPDFReader, pageNum ));
}
docCopy.Close();
}
}
pdfByteArray = msCopy.ToArray();

I know it's an old post, Simply I extend the #chris-haas solution to the next level.
Delete the selected pages after that save them into the separate pdf file.
//ms is MemoryStream and fs is FileStream
ms.CopyTo(fs);
Save the Stream to a separate pdf file. 100% working without any error.
pageRange="5"
pageRange="2,15-20"
pageRange="1-5,15-20"
You can pass the pageRange vales like the above-given samples.
private void DeletePagesNew(string pageRange, string SourcePdfPath, string OutputPdfPath, string Password = "")
{
try
{
var pagesToDelete = new List<int>();
if (pageRange.IndexOf(",") != -1)
{
var tmpHold = pageRange.Split(',');
foreach (string nonconseq in tmpHold)
{
if (nonconseq.IndexOf("-") != -1)
{
var rangeHold = nonconseq.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo = Convert.ToInt32(rangeHold[1]); i <= loopTo; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(nonconseq));
}
}
}
else if (pageRange.IndexOf("-") != -1)
{
var rangeHold = pageRange.Split('-');
for (int i = Convert.ToInt32(rangeHold[0]), loopTo1 = Convert.ToInt32(rangeHold[1]); i <= loopTo1; i++)
pagesToDelete.Add(i);
}
else
{
pagesToDelete.Add(Convert.ToInt32(pageRange));
}
var Reader = new PdfReader(SourcePdfPath);
int[] pagesToKeep;
pagesToKeep = Enumerable.Range(1, Reader.NumberOfPages).ToArray();
using (var ms = new MemoryStream())
{
using (var fs = new FileStream(OutputPdfPath, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (var doc = new Document())
{
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
foreach (int p in pagesToKeep)
{
if (pagesToDelete.FindIndex(s => s == p) != -1)
{
continue;
}
// doc.NewPage()
// w.DirectContent.AddTemplate(w.GetImportedPage(Reader, p), 0, 0)
//
doc.SetPageSize(Reader.GetPageSize(p));
doc.NewPage();
PdfContentByte cb = w.DirectContent;
PdfImportedPage pageImport = w.GetImportedPage(Reader, p);
int rot = Reader.GetPageRotation(p);
if (rot == 90 || rot == 270)
{
cb.AddTemplate(pageImport, 0, -1.0f, 1.0f, 0, 0, Reader.GetPageSizeWithRotation(p).Height);
}
else
{
cb.AddTemplate(pageImport, 1.0f, 0, 0, 1.0f, 0, 0);
}
cb = default;
pageImport = default;
rot = default;
}
ms.CopyTo(fs);
fs.Flush();
doc.Close();
}
}
}
}
pagesToDelete = null;
Reader.Close();
Reader = default;
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

ItextSharper PDF to Html Convert - c#

Related

How do I pull all values from PDF?

How to update custom Excel ribbon

convert a jpg/png/txt or any file format to pdf using mvc5

send pdf statement without saving on application server

itextsharp trimming pdf document's pages

Categories

Resources