I send the following string
string text = <img alt=\"\" src=\"http://localhost:6666/content/userfiles/admin/images/q4.png\" /><br/>
to:
public static Paragraph CreateSimpleHtmlParagraph (String text)
{
string fontpath = System.Web.HttpContext.Current.Server.MapPath("~/Content/");
BaseFont bf = BaseFont.CreateFont(fontpath + "ARIALUNI.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
var f = new Font(bf, 10, Font.NORMAL);
var p = new Paragraph
{
Alignment = Element.ALIGN_LEFT,
Font = f
};
var styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.SPAN, HtmlTags.FONTSIZE, "10");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
using (var sr = new StringReader(text))
{
var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
foreach (var e in elements)
{
p.Add(e);
}
}
return p;
}
using:
document.Add(CreateSimpleHtmlParagraph("<span style='font-size:10;'>" + "<b><u>" +
"Notes" + "</u></b>" + ": " + "<br/><br/>" + text + "</span>"));
to generate PDF using itextsharp, It works very well except the image is too large! Is there a way to check if the string includes width and height and if not add the to scale the image?
As Bruno said, please upgrade to XMLWorker.
What you need to do is implement the no longer supported IHTMLTagProcessor interface for the HTML tag that you are interested in. You are interested in the img tag so you'll want to just use basically what iText is already doing but with your own logic. Unfortunately their class is private so you can't just subclass it but you can see its contents here. You'll basically end up with a class like this:
public class MyImageTagProcessor : IHTMLTagProcessor {
void IHTMLTagProcessor.EndElement(HTMLWorker worker, string tag) {
//No used
}
void IHTMLTagProcessor.StartElement(HTMLWorker worker, string tag, IDictionary<string, string> attrs) {
if (!attrs.ContainsKey(HtmlTags.WIDTH)) {
//Do something special here
attrs.Add(HtmlTags.WIDTH, "400px");
}
if (!attrs.ContainsKey(HtmlTags.HEIGHT)) {
//Do something special here
attrs.Add(HtmlTags.HEIGHT, "400px");
}
worker.UpdateChain(tag, attrs);
worker.ProcessImage(worker.CreateImage(attrs), attrs);
worker.UpdateChain(tag);
}
}
Then in your code create a Dictionary holding the tag that you are targeting and an instance of that class:
var processors = new Dictionary<string, IHTMLTagProcessor>();
processors.Add(HtmlTags.IMG, new MyImageTagProcessor());
Finally, change your parsing call to use one of the overloads. We don't need to fourth parameter (providers) so we're passing null to that.
var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles, processors, null);
Related
I have an input string which is html. It contains images and I want to change the src property on the img
My code so far is as below:
if (htmlStr.Contains("img"))
{
var html = new HtmlDocument();
html.LoadHtml(htmlStr);
var images = html.DocumentNode.SelectNodes("//img");
if (images != null && images.Count > 0)
{
for (int i = 0; i < images.Count; i++)
{
string imageSrc = images[i].Attributes["src"].Value;
string newSrc = "MyNewValue";
images[i].SetAttributeValue("src", newSrc);
}
}
//htmlStr= ???
}
return htmlStr;
What I am missing is how to update the htmlStr I am returning with the newSrc value each image.
As far as I can tell, you have two options:
// Will give you a raw string.
// Not ideal if you are planning to
// send this over the network, or save as a file.
var updatedStr = html.DocumentNode.OuterHtml;
// Will let you write to any stream.
// Here, I'm just writing to a string builder as an example.
var sb = new StringBuilder();
using (var writer = new StringWriter(sb))
{
html.Save(writer);
}
// These two methods generate the same result, though.
Debug.Assert(string.Equals(updatedStr, sb.ToString()));
i'm using PDFBox in C# Project to create PDF/A and PDF/A3. Almost everything is working except that when I use PDFBox to convert a normal PDF-File to PDF/A not all Fonts are embedded. If I use Word to save as PDF/A the Font in Question is embedded. How do I embedd ArialMT too?
private PDDocumentCatalog makeDocPDFAcompliant(String producer, String creator)
{
PDDocumentCatalog cat = doc.getDocumentCatalog();
PDMetadata metadata = new PDMetadata(doc);
cat.setMetadata(metadata);
List<Dictionary<string, PDFont>> lstFonts = new List<Dictionary<string, PDFont>>();
List<PDFont> lstPDFonts= new List<PDFont>();
List pages = cat.getAllPages();
Iterator it = pages.iterator();
while (it.hasNext())
{
PDPage page = (PDPage)it.next();
var pageFont = page.getResources().getFonts();
lstFonts.Add(pageFont.ToDicitonary<string, PDFont>());
}
foreach (Dictionary<string, PDFont> d in lstFonts)
{
foreach (KeyValuePair<string, PDFont> entry in d)
{
PDFont font = entry.Value;
if (!lstPDFonts.Contains(font))
{
lstPDFonts.Add(font);
}
}
}
//PDType0Font font0 = PDType0Font.Load(doc,)
XMPMetadata xmp = new XMPMetadata();
XMPSchemaPDFAId pdfaid = new XMPSchemaPDFAId(xmp);
xmp.addSchema(pdfaid);
pdfaid.setConformance("A");
pdfaid.setPart(java.lang.Integer.valueOf(1));
pdfaid.setAbout("");
metadata.importXMPMetadata(xmp);
//System.IO.Stream asset = Zaumzeug.Properties.Resources.sRGB_Color_Space_Profile
System.IO.Stream stream = new System.IO.MemoryStream(Zaumzeug.Properties.Resources.sRGB_Color_Space_Profile);
InputStream colorProfile = new ikvm.io.InputStreamWrapper(stream);
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);
oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");
cat.addOutputIntent(oi);
doc.save(#"D:\Examples .Net\Data\FontsNormalA.pdf");
return cat;
}
I think this might be a bug, but if anyone can help I'd appreciate it. I currently have another question open that deals with a similar issue, but I think this question better exemplifies the problem, and more simply too. That being said I don't want to delete the old one in case it increases my wait time. I yield to the mods to decide which question is better.
Here's a sample application that creates a pdf, then a table. It adds a cell to the table and then ties a fieldpositioningevent to the cell event.
using System;
using System.Diagnostics;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace iTextSharpTextBoxInTableCell
{
class Program
{
static void Main(string[] args)
{
// Create a PDF with a TextBox in a table cell
BaseFont bfHelvetica = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1250, false);
Font helvetica12 = new Font(bfHelvetica, 12, Font.NORMAL, BaseColor.BLACK);
Document doc = new Document(PageSize.LETTER, 18f, 18f, 18f, 18f);
FileStream fs = new FileStream("TextBoxInTableCell.pdf", FileMode.Create);
PdfWriter writer = PdfWriter.GetInstance(doc, fs);
doc.Open();
PdfPTable myTable = new PdfPTable(1);
myTable.TotalWidth = 568f;
myTable.LockedWidth = true;
myTable.HorizontalAlignment = 0;
TextField tf = new TextField(writer, new iTextSharp.text.Rectangle(67, 585, 140, 800), "cellTextBox");
tf.Text = "test";
PdfPCell tbCell = new PdfPCell(new Phrase(" ", helvetica12));
iTextSharp.text.pdf.events.FieldPositioningEvents events =
new iTextSharp.text.pdf.events.FieldPositioningEvents(writer, tf.GetTextField());
tbCell.CellEvent = events;
myTable.AddCell(tbCell);
doc.Add(myTable);
doc.Close();
fs.Close();
Process.Start("TextBoxInTableCell.pdf");
Console.WriteLine("End Of Program Execution");
Console.ReadLine();
}
}
}
Here's what this field looks like when it's generated:
As you can see, the text is squashed. I've published the generated pdf here.
I'm definitely seeing what you're seeing and as #mkl said in your other post, the problem comes down to the appearance's BBOX entry not being set to the same size as the field. I can't really find too many examples of FieldPositioningEvents in the wild and the ones that do exist appear to be copy-and-paste's of each other for the most part.
Anyway, if you read the actual code for FieldPositioningEvents you'll see that it can be used for both page events as well as cell events which makes me think it was intended for broader purposes possibly, but that's just a guess on my part.
One solution is to just write your own subclass of IPdfPCellEvent. Below is an example of that that follows the example provided by FieldPositioningEvents however it is specific to TextFields since we're interested in setting the /BBOX entry. It has two constructors, one that works very similar to FieldPositioningEvents that takes a PdfWriter and a TextField and one that just takes the most commonly set properties of a TextFields and actually creates it for you. The CellLayout is part of the interface contract and actually figures out where the annotation should be drawn.
public class SingleCellFieldPositioningEvent : IPdfPCellEvent {
public TextField Field { get; set; }
public PdfWriter Writer { get; set; }
public float Padding { get; set; }
public SingleCellFieldPositioningEvent(PdfWriter writer, TextField field) {
this.Field = field;
this.Writer = writer;
}
public SingleCellFieldPositioningEvent(PdfWriter writer, string fieldName, string text = "", BaseFont font = null, float fontSize = 14 ) {
//The rectangle gets changed later so it doesn't matter what we use
var rect = new iTextSharp.text.Rectangle(1, 1);
//Create the field and set various properties
this.Field = new TextField(writer, rect, fieldName);
this.Field.Text = text;
if (null == font) {
font = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.WINANSI, BaseFont.EMBEDDED);
}
this.Field.Font = font;
this.Field.FontSize = fontSize;
this.Writer = writer;
}
public void CellLayout(PdfPCell cell, iTextSharp.text.Rectangle rect, PdfContentByte[] canvases) {
//Create the field's rectangle based on the current cell and requested padded
var newRect = new PdfRectangle(rect.GetLeft(Padding), rect.GetBottom(Padding), rect.GetRight(Padding), rect.GetTop(Padding));
//Set the appearance's rectangle to the same as the box
Field.Box = newRect.Rectangle;
//Get the raw field
var tf = this.Field.GetTextField();
//Change the field's rectangle
tf.Put(PdfName.RECT, newRect);
//Add the annotation to the writer
Writer.AddAnnotation(tf);
}
}
You can use this in two different ways. Either manually create a field and set various properties:
//The rectangle is actually changed in the cell event so it doesn't matter what we use
TextField tf = new TextField(writer, new iTextSharp.text.Rectangle(1, 1), "cellTextBox");
tf.Text = "test";
tf.Font = bfHelvetica;
tf.FontSize = 14;
PdfPCell tbCell = new PdfPCell(new Phrase(" ", helvetica12));
tbCell.CellEvent = new SingleCellFieldPositioningEvent(writer, tf);
Or just pass the properties in:
PdfPCell tbCell = new PdfPCell(new Phrase(" ", helvetica12));
tbCell.CellEvent = new SingleCellFieldPositioningEvent(writer, "cellTextBox", "test", bfHelvetica, 14);
myTable.AddCell(tbCell);
I am trying to use HTMLWorker using the following:
public static string toWorks(string s)
{
string fontpath = System.Web.HttpContext.Current.Server.MapPath("~/Content/");
BaseFont bf = BaseFont.CreateFont(fontpath + "ARIALUNI.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
var f = new Font(bf, 10, Font.NORMAL);
// var p = new Paragraph { Alignment = Element.ALIGN_LEFT, Font = f };
var styles = new StyleSheet();
styles.LoadTagStyle(HtmlTags.SPAN, HtmlTags.FONTSIZE, "10");
styles.LoadTagStyle(HtmlTags.BODY, HtmlTags.ENCODING, BaseFont.IDENTITY_H);
using (var sr = new StringReader(s))
{
List<IElement> list = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
// var elements = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, styles);
foreach (var e in list)
{
list.Add(e);
}
return list.ToString();
}
return null;
}
It converts:
src="/Content/UserFiles/635380078478327671/Images/test.png
To:
C:\Content\UserFiles\635380078478327671\Images\test.png
Any suggestion.
Please compare the following two examples:
HtmlMovies1
HtmlMovies2
If you use the first example to render an HTML file with images, you probably won't succeed. The second example introduces an ImageProvider implementation.
In the getImage() method of the ImageProvider interface, you get information about the path to an image. It is up to you to interpret this path. For instance: if the path is /Content/UserFiles/635380078478327671/Images/test.png, you can create an Image object by loading the bytes from that path, possibly after applying some minor changes to the path.
If you don't create an ImageProvider class, iText will do a single guess to find the path. In your case, that guess is wrong.
You can find the C# equivalent of the examples here: http://tinyurl.com/itextsharpIIA2C09
i need to hide a text by adding a new layer over the text i need to hide.
public void ReplacePDFText(string strSearch, StringComparison scCase, string strSource, string strDest)
{
PdfContentByte pCont = null;
if (File.Exists(strSource)) {
PdfReader pdfFileReader = new PdfReader(strSource);
using (PdfStamper psStamp = new PdfStamper(pdfFileReader, new FileStream(strDest, FileMode.Create))) {
for (int intCurrPage = 1; intCurrPage <= pdfFileReader.NumberOfPages; intCurrPage++) {
LocTextExtractionStrategy Strategy = new LocTextExtractionStrategy();
pCont = psStamp.GetUnderContent(intCurrPage);
Strategy.UndercontentCharacterSpacing = pCont.CharacterSpacing;
Strategy.UndercontentHorizontalScaling = pCont.HorizontalScaling;
string currText = PdfTextExtractor.GetTextFromPage(pdfFileReader, intCurrPage, Strategy);
List<iTextSharp.text.Rectangle> lstMatches = Strategy.GetTextLocations(strSearch, scCase);
PdfLayer pdLayer = default(PdfLayer);
pdLayer = new PdfLayer("over", psStamp.Writer);
pCont.SetColorFill(BaseColor.BLACK);
foreach (Rectangle rctRect in lstMatches) {
pCont.Rectangle(rctRect.Left, rctRect.Bottom, rctRect.Width, rctRect.Height);
pCont.Fill();
}
}
}
pdfFileReader.Close();
}
}
The problem with the approach above, is that the layer is added successfully with black color. So instead of the text i have a beautiful black line over the text.
But if i set the pCont.SetColorFill(BaseColor.BLACK) to WHITE, the text is still displayed.
How can i overcome this issue?
Instead of:
pCont = psStamp.GetUnderContent(intCurrPage);
Use:
pCont = psStamp.GetOverContent(intCurrPage);