PDFBox doesn't embedd all Fonts

PDFBox doesn't embedd all Fonts - c#

i'm using PDFBox in C# Project to create PDF/A and PDF/A3. Almost everything is working except that when I use PDFBox to convert a normal PDF-File to PDF/A not all Fonts are embedded. If I use Word to save as PDF/A the Font in Question is embedded. How do I embedd ArialMT too?
private PDDocumentCatalog makeDocPDFAcompliant(String producer, String creator)
{
PDDocumentCatalog cat = doc.getDocumentCatalog();
PDMetadata metadata = new PDMetadata(doc);
cat.setMetadata(metadata);
List<Dictionary<string, PDFont>> lstFonts = new List<Dictionary<string, PDFont>>();
List<PDFont> lstPDFonts= new List<PDFont>();
List pages = cat.getAllPages();
Iterator it = pages.iterator();
while (it.hasNext())
{
PDPage page = (PDPage)it.next();
var pageFont = page.getResources().getFonts();
lstFonts.Add(pageFont.ToDicitonary<string, PDFont>());
}
foreach (Dictionary<string, PDFont> d in lstFonts)
{
foreach (KeyValuePair<string, PDFont> entry in d)
{
PDFont font = entry.Value;
if (!lstPDFonts.Contains(font))
{
lstPDFonts.Add(font);
}
}
}
//PDType0Font font0 = PDType0Font.Load(doc,)
XMPMetadata xmp = new XMPMetadata();
XMPSchemaPDFAId pdfaid = new XMPSchemaPDFAId(xmp);
xmp.addSchema(pdfaid);
pdfaid.setConformance("A");
pdfaid.setPart(java.lang.Integer.valueOf(1));
pdfaid.setAbout("");
metadata.importXMPMetadata(xmp);
//System.IO.Stream asset = Zaumzeug.Properties.Resources.sRGB_Color_Space_Profile
System.IO.Stream stream = new System.IO.MemoryStream(Zaumzeug.Properties.Resources.sRGB_Color_Space_Profile);
InputStream colorProfile = new ikvm.io.InputStreamWrapper(stream);
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);
oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");
cat.addOutputIntent(oi);
doc.save(#"D:\Examples .Net\Data\FontsNormalA.pdf");
return cat;
}

Related

Images.png Manipulation to SVG in C#

iam using Imagemagick & potrace in c# to convert png to pnm and then to svg but unfortunately the SVG File comes with out colour!
how i can solve the colour problem? should i use another converter? suggestions please?
i have tried many packages in visual studio with C# but either don't work or the output comes out with the copyright sentence and that doesn't make my svg file nice!
i want my output with out the copyright sentence please!

You can use Aspose.Imaging for .NET API. Please open the NuGet package manager, search for "Aspose.Imaging " and install it.
Tip: I am using .net 5.0
The modified code is as follows:
using Aspose.Imaging;
using Aspose.Imaging.FileFormats.Png;
using Aspose.Imaging.ImageOptions;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Picture
{
public static void Main()
{
string templatesFolder = #"C:\Users\Administrator\Downloads\"; //the document you want to store
string dataDir = templatesFolder;
ProcessConvertion();
void ProcessConvertion()
{
//Get list of supported formats in
//Aspose.Imaging for loading and saving images
var formats = GetAvailableImageFormats();
var importFormats = formats.Import;
var exportFormats = formats.Export;
//Process each raster and vector format that can be loaded
foreach (var format in importFormats)
{
string formatExt = format.Key;
var inputFile = #"C:\Users\Administrator\Downloads\1.png"; //your PNG of picture path
//Process each raster and vector format
//to which we can save imported image
foreach (var exportFormat in exportFormats)
{
var outputFile = Path.Combine(templatesFolder, $"convert-{formatExt}-to-{exportFormat.Key}.{exportFormat.Key}");
System.Console.WriteLine("Processing conversion:" + outputFile);
//More about load method can be found at
//https://apireference.aspose.com/imaging/net/aspose.imaging.image/load/methods/2
//Load imported image
using (var image = Image.Load(inputFile))
{
//Obtain default saving options defined for each image
ImageOptionsBase exportOptions = exportFormat.Value.Clone();
//If loaded image is vector, need to specify vector rasterization options
//for export to another vector
if (image is VectorImage)
{
VectorRasterizationOptions rasterizationOptions = format.Value;
rasterizationOptions.PageWidth = image.Width;
rasterizationOptions.PageHeight = image.Height;
exportOptions.VectorRasterizationOptions = rasterizationOptions;
}
if (Path.GetExtension(outputFile).Equals(".svg"))
{
image.Save(outputFile, exportOptions);
}
}
//File.Delete(outputFile);
}
break;
//System.GC.Collect();
}
}
(Dictionary<string, VectorRasterizationOptions> Import, Dictionary<string, ImageOptionsBase> Export) GetAvailableImageFormats()
{
///Raster and vector formats to that we can export images
//Raster image formats that support both - save and load and their default save options
Dictionary<string, ImageOptionsBase> rasterFormatsThatSupportExportAndImport = new Dictionary<string, ImageOptionsBase>()
{
{ "bmp", new BmpOptions()},
{ "gif", new GifOptions()},
{ "dicom", new DicomOptions()},
{ "jpg", new JpegOptions()},
{ "jpeg", new JpegOptions()},
{ "jpeg2000", new Jpeg2000Options() },
{ "j2k", new Jpeg2000Options { Codec = Aspose.Imaging.FileFormats.Jpeg2000.Jpeg2000Codec.J2K } },
{ "jp2", new Jpeg2000Options { Codec = Aspose.Imaging.FileFormats.Jpeg2000.Jpeg2000Codec.Jp2 }},
{ "png",new PngOptions(){ ColorType = PngColorType.TruecolorWithAlpha} },
{ "apng", new ApngOptions()},
{ "tiff", new Aspose.Imaging.ImageOptions.TiffOptions(Aspose.Imaging.FileFormats.Tiff.Enums.TiffExpectedFormat.Default)},
{ "tif", new Aspose.Imaging.ImageOptions.TiffOptions(Aspose.Imaging.FileFormats.Tiff.Enums.TiffExpectedFormat.Default)},
{ "tga", new TgaOptions()},
{ "webp", new WebPOptions()},
{ "ico", new IcoOptions()}
};
//Vector image formats that support both - save and load, their default save options
//and their rasterization options when exporting to another vector image
Dictionary<string, (ImageOptionsBase, VectorRasterizationOptions)> vectorFormatsThatSupportExportAndImport
= new Dictionary<string, (ImageOptionsBase, VectorRasterizationOptions)>()
{
{ "emf", (new EmfOptions(),new EmfRasterizationOptions()) },
{ "svg", (new SvgOptions(), new SvgRasterizationOptions())},
{ "wmf", (new WmfOptions(), new WmfRasterizationOptions())},
{ "emz", (new Aspose.Imaging.ImageOptions.EmfOptions(){ Compress = true }, new EmfRasterizationOptions())},
{ "wmz", (new Aspose.Imaging.ImageOptions.WmfOptions(){ Compress = true }, new WmfRasterizationOptions())},
{ "svgz", (new Aspose.Imaging.ImageOptions.SvgOptions(){ Compress = true }, new SvgRasterizationOptions())},
};
///Raster and vector formats from which we can load images
//Formats that can be only saved (supported only save to this formats)
Dictionary<string, ImageOptionsBase> formatsOnlyForExport = new Dictionary<string, ImageOptionsBase>()
{
{ "psd", new PsdOptions()},
{ "dxf", new DxfOptions(){ TextAsLines = true,ConvertTextBeziers = true} },
{ "pdf", new PdfOptions()},
{ "html", new Html5CanvasOptions()},
};
//Raster formats that can be only loaded
List<string> formatsOnlyForImport = new List<string>()
{
"djvu", "dng", "dib"
};
//Vector formats only for loading and their rasterization options when exporting to another vector format
Dictionary<string, VectorRasterizationOptions> vectorFormatsOnlyForImport = new Dictionary<string, VectorRasterizationOptions>()
{
{"eps", new EpsRasterizationOptions()},
{"cdr", new CdrRasterizationOptions() },
{"cmx", new CmxRasterizationOptions() },
{"otg", new OtgRasterizationOptions() },
{"odg", new OdgRasterizationOptions() }
};
//Get total set of formats to what we can export images
Dictionary<string, ImageOptionsBase> exportFormats = vectorFormatsThatSupportExportAndImport
.ToDictionary(s => s.Key, s => s.Value.Item1)
.Union(formatsOnlyForExport)
.Concat(rasterFormatsThatSupportExportAndImport)
.ToDictionary(s => s.Key, s => s.Value);
//Get total set of formats that can be loaded
Dictionary<string, VectorRasterizationOptions> importFormats = vectorFormatsOnlyForImport
.Union(formatsOnlyForImport.ToDictionary(s => s, s => new VectorRasterizationOptions()))
.Union(vectorFormatsThatSupportExportAndImport.ToDictionary(s => s.Key, s => s.Value.Item2))
.ToDictionary(s => s.Key, s => s.Value);
return (Import: importFormats, Export: exportFormats);
}
}
}
You can also refer to the official code: https://products.aspose.com/imaging/net/conversion/png-to-svg/

itextsharp scale image sent as a string

I send the following string
string text = <img alt=\"\" src=\"http://localhost:6666/content/userfiles/admin/images/q4.png\" /><br/>
to:
public static Paragraph CreateSimpleHtmlParagraph (String text)
{
string fontpath = System.Web.HttpContext.Current.Server.MapPath("~/Content/");
BaseFont bf = BaseFont.CreateFont(fontpath + "ARIALUNI.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
var f = new Font(bf, 10, Font.NORMAL);
var p = new Paragraph
{
Alignment = Element.ALIGN_LEFT,
Font = f
};
var styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.SPAN, HtmlTags.FONTSIZE, "10");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
using (var sr = new StringReader(text))
{
var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
foreach (var e in elements)
{
p.Add(e);
}
}
return p;
}
using:
document.Add(CreateSimpleHtmlParagraph("<span style='font-size:10;'>" + "<b><u>" +
"Notes" + "</u></b>" + ": " + "<br/><br/>" + text + "</span>"));
to generate PDF using itextsharp, It works very well except the image is too large! Is there a way to check if the string includes width and height and if not add the to scale the image?

As Bruno said, please upgrade to XMLWorker.
What you need to do is implement the no longer supported IHTMLTagProcessor interface for the HTML tag that you are interested in. You are interested in the img tag so you'll want to just use basically what iText is already doing but with your own logic. Unfortunately their class is private so you can't just subclass it but you can see its contents here. You'll basically end up with a class like this:
public class MyImageTagProcessor : IHTMLTagProcessor {
void IHTMLTagProcessor.EndElement(HTMLWorker worker, string tag) {
//No used
}
void IHTMLTagProcessor.StartElement(HTMLWorker worker, string tag, IDictionary<string, string> attrs) {
if (!attrs.ContainsKey(HtmlTags.WIDTH)) {
//Do something special here
attrs.Add(HtmlTags.WIDTH, "400px");
}
if (!attrs.ContainsKey(HtmlTags.HEIGHT)) {
//Do something special here
attrs.Add(HtmlTags.HEIGHT, "400px");
}
worker.UpdateChain(tag, attrs);
worker.ProcessImage(worker.CreateImage(attrs), attrs);
worker.UpdateChain(tag);
}
}
Then in your code create a Dictionary holding the tag that you are targeting and an instance of that class:
var processors = new Dictionary<string, IHTMLTagProcessor>();
processors.Add(HtmlTags.IMG, new MyImageTagProcessor());
Finally, change your parsing call to use one of the overloads. We don't need to fourth parameter (providers) so we're passing null to that.
var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles, processors, null);

HTMLWorker itextSharp image src

I am trying to use HTMLWorker using the following:
public static string toWorks(string s)
{
string fontpath = System.Web.HttpContext.Current.Server.MapPath("~/Content/");
BaseFont bf = BaseFont.CreateFont(fontpath + "ARIALUNI.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
var f = new Font(bf, 10, Font.NORMAL);
// var p = new Paragraph { Alignment = Element.ALIGN_LEFT, Font = f };
var styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.SPAN, HtmlTags.FONTSIZE, "10");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
using (var sr = new StringReader(s))
{
List<IElement> list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
// var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
foreach (var e in list)
{
list.Add(e);
}
return list.ToString();
}
return null;
}
It converts:
src="/Content/UserFiles/635380078478327671/Images/test.png
To:
C:\Content\UserFiles\635380078478327671\Images\test.png
Any suggestion.

Please compare the following two examples:
HtmlMovies1
HtmlMovies2
If you use the first example to render an HTML file with images, you probably won't succeed. The second example introduces an ImageProvider implementation.
In the getImage() method of the ImageProvider interface, you get information about the path to an image. It is up to you to interpret this path. For instance: if the path is /Content/UserFiles/635380078478327671/Images/test.png, you can create an Image object by loading the bytes from that path, possibly after applying some minor changes to the path.
If you don't create an ImageProvider class, iText will do a single guess to find the path. In your case, that guess is wrong.
You can find the C# equivalent of the examples here: http://tinyurl.com/itextsharpIIA2C09

Merge a list of pdfs and create new bookmarks (C#)

The project is in C# and use iTextSharp.
I have a dictionary with a title (string) and file content (byte array). I loop through this dictionary and merge all files together. What I need now is to add bookmarks to the start of the first page in each file, but I should not add any new pages or text to the final document. I have tried different solutions, but all seem to add a table of contents page, a new page before each page or some text at the start of the page.
None of the files have bookmarks originally.
I am looking for a bookmarks structure that looks something like this:
File1
File2
SomeCategory
File3
File4
I would very much appreciate it if anyone could point me in the right direction.
My function for merging the files looks like this:
/// <summary>
/// Merge PDF files, and stamp certificates. This is a modified version of the example in the link below.
/// See: http://www.codeproject.com/Articles/28283/Simple-NET-PDF-Merger for more information.
/// </summary>
/// <param name="sourceFiles">Files to be merged</param>
/// <returns>Byte array with the combined files.</returns>
public static byte[] MergeFiles(Dictionary<string, byte[]> sourceFiles)
{
var document = new Document();
var output = new MemoryStream();
try
{
// Initialize pdf writer
var writer = PdfWriter.GetInstance(document, output);
writer.PageEvent = new PdfPageEvents();
// Open document to write
document.Open();
var content = writer.DirectContent;
// Iterate through all pdf documents
foreach (var sourceFile in sourceFiles)
{
// Create pdf reader
var reader = new PdfReader(sourceFile.Value);
var numberOfPages = reader.NumberOfPages;
// Iterate through all pages
for (var currentPageIndex = 1; currentPageIndex <=
numberOfPages; currentPageIndex++)
{
// Determine page size for the current page
document.SetPageSize(
reader.GetPageSizeWithRotation(currentPageIndex));
// Create page
document.NewPage();
var importedPage =
writer.GetImportedPage(reader, currentPageIndex);
// Determine page orientation
var pageOrientation = reader.GetPageRotation(currentPageIndex);
if ((pageOrientation == 90) || (pageOrientation == 270))
{
content.AddTemplate(importedPage, 0, -1f, 1f, 0, 0,
reader.GetPageSizeWithRotation(currentPageIndex).Height);
}
else
{
content.AddTemplate(importedPage, 1f, 0, 0, 1f, 0, 0);
}
// Add stamp to certificates
if (sourceFile.Key.IsValidDocumentReference())
AddStamp(content, document, sourceFile.Key, currentPageIndex, numberOfPages);
}
}
}
catch (Exception exception)
{
throw new Exception("An unexpected exception occured during the merging process", exception);
}
finally
{
document.Close();
}
return output.GetBuffer();
}

Thanks to Bruno Lowagie who pointed me in the right direction, I was able to produce a solution to my problem.
This is my solution:
public static byte[] MergeFilesAndAddBookmarks(Dictionary<PrintDocument, byte[]> sourceFiles)
{
using (var ms = new MemoryStream())
{
using (var document = new Document())
{
using (var copy = new PdfCopy(document, ms))
{
//Order the files by chapternumber
var files = sourceFiles.GroupBy(f => f.Key.ChapterNumber);
document.Open();
var outlines = new List<Dictionary<string, object>>();
var pageIndex = 1;
foreach (var chapterGroup in files)
{
var map = new Dictionary<string, object>();
outlines.Add(map);
map.Add("Title", chapterGroup.First().Key.ChapterName);
var kids = new List<Dictionary<string, object>>();
map.Add("Kids", kids);
foreach (var sourceFile in chapterGroup)
{
using (var reader = new PdfReader(sourceFile.Value))
{
// add the pages
var n = reader.NumberOfPages;
for (var page = 0; page < n;)
{
if (page == 0)
{
var kid = new Dictionary<string, object>();
kids.Add(kid);
kid["Title"] = sourceFile.Key.Title;
kid["Action"] = "GoTo";
kid["Page"] = String.Format("{0} Fit", pageIndex);
}
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
pageIndex += n;
reader.Close();
}
}
}
copy.Outlines = outlines;
document.Close();
copy.Close();
ms.Close();
}
}
return ms.ToArray();
}
}
}
public class PrintDocument
{
public string Title { get; set; }
public string ChapterName { get; set; }
public int ChapterNumber { get; set; }
}

Merging xps documents make last one duplicate

I have problem when merging multiple XPS documents into one. When I merge them, the result xps contains last XPS document that duplicated. Here is my function to merge (Modified version of this question):
public XpsDocument CreateXPSStream(List<XpsDocument> ListToMerge)
{
var memoryStream = new MemoryStream();
Package container = Package.Open(memoryStream, FileMode.Create);
string pack = "pack://temp.xps";
PackageStore.RemovePackage(new Uri(pack));
PackageStore.AddPackage(new Uri(pack), container);
XpsDocument xpsDoc = new XpsDocument(container, CompressionOption.SuperFast, "pack://temp.xps");
FixedDocumentSequence seqNew = new FixedDocumentSequence();
foreach (var sourceDocument in ListToMerge)
{
FixedDocumentSequence seqOld = sourceDocument.GetFixedDocumentSequence();
foreach (DocumentReference r in seqOld.References)
{
DocumentReference newRef = new DocumentReference();
((IUriContext)newRef).BaseUri = ((IUriContext)r).BaseUri;
newRef.Source = r.Source;
seqNew.References.Add(newRef);
}
}
XpsDocumentWriter xpsWriter = XpsDocument.CreateXpsDocumentWriter(xpsDoc);
xpsWriter.Write(seqNew);
//xpsDoc.Close();
//container.Close();
return xpsDoc;
}
the result goes to DocumentViewer and display it to user.

I created following function and it works for me.
public void MergeXpsDocument(string newFile, List<XpsDocument> sourceDocuments)
{
if (File.Exists(newFile))
{
File.Delete(newFile);
}
XpsDocument xpsDocument = new XpsDocument(newFile, System.IO.FileAccess.ReadWrite);
XpsDocumentWriter xpsDocumentWriter = XpsDocument.CreateXpsDocumentWriter(xpsDocument);
FixedDocumentSequence fixedDocumentSequence = new FixedDocumentSequence();
foreach(XpsDocument doc in sourceDocuments)
{
FixedDocumentSequence sourceSequence = doc.GetFixedDocumentSequence();
foreach (DocumentReference dr in sourceSequence.References)
{
DocumentReference newDocumentReference = new DocumentReference();
newDocumentReference.Source = dr.Source;
(newDocumentReference as IUriContext).BaseUri = (dr as IUriContext).BaseUri;
FixedDocument fd = newDocumentReference.GetDocument(true);
newDocumentReference.SetDocument(fd);
fixedDocumentSequence.References.Add(newDocumentReference);
}
}
xpsDocumentWriter.Write(fixedDocumentSequence);
xpsDocument.Close();
}

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

PDFBox doesn't embedd all Fonts - c#

Related

Images.png Manipulation to SVG in C#

itextsharp scale image sent as a string

HTMLWorker itextSharp image src

Merge a list of pdfs and create new bookmarks (C#)

Merging xps documents make last one duplicate

Categories

Resources