I'm parsing an HTML with some images inside this.
This images are stored as embedded resource, not in the filesystem.
as I know, i need to set a custom image provider in HtmlPipelineContext, and this provider need to retrieve the image path or the itextsharp image.
The question is, somebody know which method of Abstract Image Provider i need to implement? and how?
this is my code:
var list = new List<string> { text };
byte[] renderedBuffer;
using (var outputMemoryStream = new MemoryStream())
{
using (
var pdfDocument = new Document(PageSize.A4, 30, 30, 30, 30))
{
var pdfWriter = PdfWriter.GetInstance(pdfDocument, outputMemoryStream);
pdfWriter.CloseStream = false;
pdfDocument.Open();
HtmlPipelineContext htmlContext = new HtmlPipelineContext(new CssAppliersImpl());
htmlContext.SetImageProvider(new MyImageProvider());
htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());
ICSSResolver cssResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(true);
CssResolverPipeline pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(htmlContext, new PdfWriterPipeline(pdfDocument, pdfWriter)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser(worker);
foreach (var htmlText in list)
{
using (var htmlViewReader = new StringReader(htmlText))
{
p.Parse(htmlViewReader);
}
}
}
renderedBuffer = new byte[outputMemoryStream.Position];
outputMemoryStream.Position = 0;
outputMemoryStream.Read(renderedBuffer, 0, renderedBuffer.Length);
}
Thanks in advance.
Using a custom Image Provider it doesn't seem to be supported. The only thing it really supports is changing root paths.
However, here's one solution to the problem:
Create a new html tag, called <resimg src="{resource name}"/>, and write a custom tag processor for it.
Here's the implementation:
/// <summary>
/// Our custom HTML Tag to add an IElement.
/// </summary>
public class ResourceImageHtmlTagProcessor : AbstractTagProcessor
{
public override IList<IElement> End(IWorkerContext ctx, Tag tag, IList<IElement> currentContent)
{
var src = tag.Attributes["src"];
var bitmap = (Bitmap)Resources.ResourceManager.GetObject(src);
if (bitmap == null)
throw new RuntimeWorkerException("No resource with the name: " + src);
var converter = new ImageConverter();
var image = Image.GetInstance((byte[])converter.ConvertTo(bitmap, typeof(byte[])));
HtmlPipelineContext htmlPipelineContext = this.GetHtmlPipelineContext(ctx);
return new List<IElement>(1)
{
this.GetCssAppliers().Apply(
new Chunk((Image)this.GetCssAppliers().Apply(image, tag, htmlPipelineContext), 0f, 0f, true),
tag,
htmlPipelineContext)
};
}
}
To configure your new processor replace the line where you specify the TagFactory with the following:
var tagProcessorFactory = Tags.GetHtmlTagProcessorFactory();
tagProcessorFactory.AddProcessor(new ResourceImageHtmlTagProcessor(), new[] { "resimg" });
htmlContext.SetTagFactory(tagProcessorFactory);
Related
I'm traying to test this method that checks for specific tag footer to show.
here is the function:
public void CheckForOrderOfFooterTags(PdfDocument pdfDoc, out TagTreePointer autoP, out TagTreePointer posBackup, out TagTreeInfo p)
{
// the following logic takes care of changing the order of the footer tags
// so that footers appear in the proper order in the tag tree structure
autoP =pdfDoc.GetTagStructureContext().GetAutoTaggingPointer();
posBackup =new TagTreePointer(autoP);
PagesTag.TryGetValue(PageNumber, out p);
if (p?.TagTreePointer != null)
{
// Make sure that content that will be tagged is placed in tag structure specifically where we need it.
int indexInParentKidsList = p.TagTreePointer.GetIndexInParentKidsList();
autoP.MoveToPointer(p.TagTreePointer).MoveToParent();
// setting new index only works on taggable items
if (p.Tagged)
{
autoP.SetNextNewKidIndex(indexInParentKidsList + 1);
}
}
}
and this is the test:
public void Check_For_Specific_Tag_Footer_To_Show_Successfully_Called()
{
var ir = new TestIllustrationReport().Report;
var reportProperties = new TestDocument().ReportProperties;
var sec = new FooterSection(new ProductContent(ir));
sec.Build();
Dictionary<int, TagTreeInfo> dict = new Dictionary<int, TagTreeInfo>();
reportProperties.FooterTag=dict;
FooterEventHandler footerEvent = new FooterEventHandler(sec, reportProperties);
// Must have write permissions to the path folder
var path = System.IO.Path.GetTempPath();
var fileName = System.IO.Path.ChangeExtension("Test", ".pdf");
var com = System.IO.Path.Combine(path, fileName);
// pdf
PdfWriter writer = new PdfWriter(com);
PdfDocument pdf = new PdfDocument(writer);
Event headerEvent= new PdfDocumentEvent("pdf", pdf);
PdfDocumentEvent docEvent = (PdfDocumentEvent)headerEvent;
iText.Layout.Document document = new iText.Layout.Document(pdf);
pdf.GetCatalog().SetLang(new PdfString("en-US"));
// Header and Paragraph
Paragraph header = new Paragraph(TestData.Header)
.SetTextAlignment(TextAlignment.CENTER)
.SetFontSize(20);
document.Add(header);
pdf.SetTagged();
pdf.GetCatalog().SetViewerPreferences(new PdfViewerPreferences().SetDisplayDocTitle(true));
PdfDocumentInfo info = pdf.GetDocumentInfo();
info.SetTitle("Testing tags");
Paragraph p = new Paragraph();
p.Add("The quick brown ");
PdfPage page = docEvent.GetPage();
// Rectangle and canvas
Rectangle rectangle = new Rectangle(
pdf.GetDefaultPageSize().GetX() + document.GetLeftMargin(),
pdf.GetDefaultPageSize().GetTop() - 80,
page.GetPageSize().GetWidth() - document.GetLeftMargin() - document.GetRightMargin(),
50);
Div canvas = new Div().SetFixedPosition(pdf.GetPageNumber(page), rectangle.GetLeft(), rectangle.GetBottom(), rectangle.GetWidth());
Paragraph pFooter = new Paragraph(TestData.Paragraph);
pFooter.GetAccessibilityProperties().SetRole("H");
canvas.Add(pFooter);
document.Add(canvas);
// Don't close document itself! It would close the PdfDocument!
document.GetRenderer().Close();
var pPointer = new TagTreePointer(pdf);
var pInfo = new TagTreeInfo { Tagged = true };
// Act
footerEvent.CheckForOrderOfFooterTags(pdf, out pPointer, out pPointer, out pInfo);
document.Close();
// Assert
Assert.NotNull(pdf);
}
For this test, I needed to create the pdf, set and build the footer section. I added the header to the pdf and the "footer test", I also set the document to be tagged.
I don't know why I always get the page is null if I'm adding a header and a paragraph to it.
I have the following code:
string replaceValueString = parameterValue.ToString();
Run replaceRun = new Run();
replaceRun.Append(new Text(replaceValueString));
contentControl.InsertAfterSelf(replaceRun);
I need to add a similar logic, but instead of Text, I need to add Picture (or some other type of image instance), like that:
replaceRun.Append(new Picture(data));
I have a Bitmap class, I can also pass byte[] or Steam of the image. My only problem is, I found virtually no example how to construct this Picture class, inherited from OpenXmlCompositeElement.
Can someone provide me with some examples and guides, in order to convert from Bitmap/data[]/Steam image into DocumentFormat.OpenXml.Wordprocessing.Picture (or any other OpenXmlElement)?
So far I found only this example:
https://learn.microsoft.com/en-us/office/open-xml/how-to-insert-a-picture-into-a-word-processing-document
...it creates a Drawing class instead of a picture, and it uses some DW SDK, which I don't have access to.
The example you have provided can seem daunting at first but it is very useable if you work through it...
it uses some DW SDK, which I don't have access to.
The DW is simply just an alias to some namespaces that you will no doubt have access to already. Make sure you include the following at the top of your class file:
using A = DocumentFormat.OpenXml.Drawing;
using DW = DocumentFormat.OpenXml.Drawing.Wordprocessing;
using PIC = DocumentFormat.OpenXml.Drawing.Pictures;
With that in mind. Let's start with your Bitmap. First, you want to use that to create an ImagePart. The following code may seem like it is adding an image straight to the main document, but it doesn't quite work like that. Don't worry, we will move it to your run later.
MainDocumentPart mainDocumentPart = wordDoc.MainDocumentPart;
ImagePart imagePart = mainDocumentPart.AddImagePart(ImagePartType.Bmp);
using (System.IO.MemoryStream stream = new System.IO.MemoryStream())
{
image.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp);
stream.Position = 0;
imagePart.FeedData(stream);
}
We need to get the ID of the image part, so that we can use that when creating the Drawing object.
string imagePartId = mainDocumentPart.GetIdOfPart(imagePart);
Next, we need to use that code that you didn't like from here.
Let's just take everything we have so far and stick it into a function that will create a Drawing from a Bitmap.
static Drawing ConvertBitmapToDrawing(WordprocessingDocument wordDoc, System.Drawing.Bitmap image)
{
MainDocumentPart mainDocumentPart = wordDoc.MainDocumentPart;
ImagePart imagePart = mainDocumentPart.AddImagePart(ImagePartType.Bmp);
using (System.IO.MemoryStream stream = new System.IO.MemoryStream())
{
image.Save(stream, System.Drawing.Imaging.ImageFormat.Bmp);
stream.Position = 0;
imagePart.FeedData(stream);
}
string imagePartId = mainDocumentPart.GetIdOfPart(imagePart);
var element =
new Drawing(
new DW.Inline(
new DW.Extent() { Cx = 990000L, Cy = 792000L },
new DW.EffectExtent() { LeftEdge = 0L, TopEdge = 0L,
RightEdge = 0L, BottomEdge = 0L },
new DW.DocProperties() { Id = (UInt32Value)1U,
Name = "Picture 1" },
new DW.NonVisualGraphicFrameDrawingProperties(
new A.GraphicFrameLocks() { NoChangeAspect = true }),
new A.Graphic(
new A.GraphicData(
new PIC.Picture(
new PIC.NonVisualPictureProperties(
new PIC.NonVisualDrawingProperties()
{ Id = (UInt32Value)0U,
Name = "New Bitmap Image.jpg" },
new PIC.NonVisualPictureDrawingProperties()),
new PIC.BlipFill(
new A.Blip(
new A.BlipExtensionList(
new A.BlipExtension()
{ Uri =
"{28A0092B-C50C-407E-A947-70E740481C1C}" })
)
{ Embed = imagePartId,
CompressionState =
A.BlipCompressionValues.Print },
new A.Stretch(
new A.FillRectangle())),
new PIC.ShapeProperties(
new A.Transform2D(
new A.Offset() { X = 0L, Y = 0L },
new A.Extents() { Cx = 990000L, Cy = 792000L }),
new A.PresetGeometry(
new A.AdjustValueList()
) { Preset = A.ShapeTypeValues.Rectangle }))
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
) { DistanceFromTop = (UInt32Value)0U,
DistanceFromBottom = (UInt32Value)0U,
DistanceFromLeft = (UInt32Value)0U,
DistanceFromRight = (UInt32Value)0U, EditId = "50D07946" });
return element;
}
With this function, you can finally just do something like this:
Drawing drawing = ConvertBitmapToDrawing(wordProcessingDocument, myBitmap);
Run newRun = new Run(drawing);
contentControl.InsertAfterSelf(newRun);
I'm trying to extract text from PDF using the following method:
public static string GetRectangleText(string pdfPath, int pageId, float[] rectangleDimensions)
{
using (PdfDocument pdfDoc = new PdfDocument(new PdfReader(pdfPath)))
{
var page = pdfDoc.GetPage(pageId);
iText.Kernel.Geom.Rectangle rect = new iText.Kernel.Geom.Rectangle(rectangleDimensions[0], rectangleDimensions[1], rectangleDimensions[2], rectangleDimensions[3]);
var filter = new IEventFilter[1];
filter[0] = new TextRegionEventFilter(rect);
var filteredTextEventListener = new FilteredTextEventListener(new LocationTextExtractionStrategy(), filter);
var result = PdfTextExtractor.GetTextFromPage(page, filteredTextEventListener);
return result;
}
}
While it works fine for most documents, several PDFs which would seem to have their encoding broken, return strings like ǪȃǷǻȁǭǵǶǬdzȇǹǺǸǶǰǺǭdzȄǹǺǪǨ ,668(')25&216758&7,21 what should in fact be ВЫПУЩЕНО ДЛЯ СТРОИТЕЛЬСТВА / ISSUED FOR CONSTRUCTION
I wonder if some kind of specific LocationTextExtractionStrategy would help?
I'd like to create pdf with barcode using Itex7 library.
There is a lot of examples using older version of Itex, or Java, but I can't find solution for Itex7.
(generally new lib has no implementation of createImageWithBarcode method)
My solution could look like as:
string outputPdfFile = #"c:\DEV\pdfFromScratchWithBarCode.pdf";
using (iText.Kernel.Pdf.PdfWriter writer = new iText.Kernel.Pdf.PdfWriter(outputPdfFile))
{
using (iText.Kernel.Pdf.PdfDocument pdf = new iText.Kernel.Pdf.PdfDocument(writer))
{
iText.Layout.Document doc = new iText.Layout.Document(pdf);
doc.Add(new iText.Layout.Element.Paragraph("Title"));
iText.Barcodes.BarcodeInter25 bar = new iText.Barcodes.BarcodeInter25(pdf);
bar.SetCode("00600123456");
//HOW TO ADD barcode TO PDF ??
// ...
}
}
There is similar answer but for older version:
iText for .NET barcode
Thanks for advices.
I found the solution (create pdf, add barcode {type: Code 25 – Non-interleaved 2 of 5} and set valid postion)
using (iText.Kernel.Pdf.PdfWriter writer = new iText.Kernel.Pdf.PdfWriter(outputPdfFile))
{
using (iText.Kernel.Pdf.PdfDocument pdf = new iText.Kernel.Pdf.PdfDocument(writer))
{
iText.Layout.Document doc = new iText.Layout.Document(pdf);
doc.Add(new iText.Layout.Element.Paragraph("Title"));
//barcode
iText.Barcodes.BarcodeInter25 bar = new iText.Barcodes.BarcodeInter25(pdf);
bar.SetCode("0600123456");
iText.Kernel.Pdf.Canvas.PdfCanvas canvas = new iText.Kernel.Pdf.Canvas.PdfCanvas(pdf.GetFirstPage());
//bar.PlaceBarcode(canvas, iText.Kernel.Colors.ColorConstants.BLUE, iText.Kernel.Colors.ColorConstants.GREEN);
iText.Kernel.Pdf.Xobject.PdfFormXObject barcodeFormXObject = bar.CreateFormXObject(iText.Kernel.Colors.ColorConstants.BLACK, iText.Kernel.Colors.ColorConstants.BLACK, pdf);
float scale = 1;
float x = 450;
float y = 700;
canvas.AddXObject(barcodeFormXObject, scale, 0, 0, scale, x, y);
}
}
You can create an image from a PdfFormXObject by doing this:
var barcodeImg = new Image(bar.CreateFormXObject(pdf));
Here is your code including changes that does the trick:
string outputPdfFile = #"c:\DEV\pdfFromScratchWithBarCode.pdf";
using (var writer = new iText.Kernel.Pdf.PdfWriter(outputPdfFile))
{
using (var pdf = new iText.Kernel.Pdf.PdfDocument(writer))
{
var doc = new Document(pdf);
doc.Add(new Paragraph("Title"));
var bar = new BarcodeInter25(pdf);
bar.SetCode("000600123456");
//Here's how to add barcode to PDF with IText7
var barcodeImg = new Image(bar.CreateFormXObject(pdf));
doc.Add(barcodeImg);
}
}
I am trying to use HTMLWorker using the following:
public static string toWorks(string s)
{
string fontpath = System.Web.HttpContext.Current.Server.MapPath("~/Content/");
BaseFont bf = BaseFont.CreateFont(fontpath + "ARIALUNI.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
var f = new Font(bf, 10, Font.NORMAL);
// var p = new Paragraph { Alignment = Element.ALIGN_LEFT, Font = f };
var styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.SPAN, HtmlTags.FONTSIZE, "10");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
using (var sr = new StringReader(s))
{
List<IElement> list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
// var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
foreach (var e in list)
{
list.Add(e);
}
return list.ToString();
}
return null;
}
It converts:
src="/Content/UserFiles/635380078478327671/Images/test.png
To:
C:\Content\UserFiles\635380078478327671\Images\test.png
Any suggestion.
Please compare the following two examples:
HtmlMovies1
HtmlMovies2
If you use the first example to render an HTML file with images, you probably won't succeed. The second example introduces an ImageProvider implementation.
In the getImage() method of the ImageProvider interface, you get information about the path to an image. It is up to you to interpret this path. For instance: if the path is /Content/UserFiles/635380078478327671/Images/test.png, you can create an Image object by loading the bytes from that path, possibly after applying some minor changes to the path.
If you don't create an ImageProvider class, iText will do a single guess to find the path. In your case, that guess is wrong.
You can find the C# equivalent of the examples here: http://tinyurl.com/itextsharpIIA2C09