I have an excel workbook vsto solution that needs to generate a pdf copy of one of its sheets as output.
I have a license for abcdpdf .net and tried outputting to html, then using abcpdf to convert the html to pdf, but the excel html markup tries to emulate excel with all 4 worksheets with horrible markup. It also messes up the colors (silver background across entire workbook).
Any suggestions?
Here is the code I'm currently using to generate the html file:
FileInfo excelDoc = new FileInfo(Globals.ThisWorkbook.Path + #"\Document.html");
Globals.Sheet2.SaveAs(excelDoc.FullName,
Excel.XlFileFormat.xlHtml, missing, missing, false, false,
Excel.XlSaveAsAccessMode.xlNoChange,
missing, missing, missing);
If I hack away some of the html header tags manually, I can get abcdpf to accept it, but the formatting is a bit off and this solution seems sub optimal.
Thanks in advance.
Solution found: store excel sheet as XPS print out. Import XPS printout into pdf document.
MyImportOperation code adapted from abcpdf XPS sample source code.
public void SaveSheetToPdf(FileInfo outputPDF)
{
FileInfo documentFile = new FileInfo(Globals.ThisWorkbook.Path + #"\tempDoc.xps");
if (documentFile.Exists)
documentFile.Delete();
Globals.Sheet2.PrintOut(1, missing, 1, false, "Microsoft XPS Document Writer", true, false, documentFile.FullName);
Doc theDoc = new Doc();
try
{
MyImportOperation importOp = new MyImportOperation(theDoc);
importOp.Import(documentFile);
}
catch (Exception ex)
{
throw new Exception("Error rendering pdf. PDF Source XPS Path: " + investmentPlanXPSPath, ex);
}
theDoc.Save(outputPDF.FullName);
}
public class MyImportOperation
{
private Doc _doc = null;
private double _margin = 10;
private int _pagesAdded = 0;
public MyImportOperation(Doc doc)
{
_doc = doc;
}
public void Import(string inPath)
{
using (XpsImportOperation op = new XpsImportOperation())
{
op.ProcessingObject += Processing;
op.ProcessedObject += Processed;
op.Import(_doc, inPath);
}
}
public void Processing(object sender, ProcessingObjectEventArgs e)
{
if (e.Info.SourceType == ProcessingSourceType.PageContent)
{
_doc.Page = _doc.AddPage();
e.Info.Handled = true;
_pagesAdded++;
}
}
public void Processed(object sender, ProcessedObjectEventArgs e)
{
if (e.Successful)
{
PixMap pixmap = e.Object as PixMap;
if (pixmap != null)
pixmap.Compress();
}
}
}
Related
How to convert from DOCX to DOCM ?
In this document there is a converting from docm to docx.
https://learn.microsoft.com/en-us/office/open-xml/how-to-convert-a-word-processing-document-from-the-docm-to-the-docx-file-format
can we do the opposite ( DOCX to DOCM ) ?
public void ConvertDOCXtoDOCM(string fileName)
{
bool fileChanged = false;
using (WordprocessingDocument document =
WordprocessingDocument.Open(fileName, true))
{
document.ChangeDocumentType(
WordprocessingDocumentType.MacroEnabledDocument);
// Track that the document has been changed.
fileChanged = true;
}
// If anything goes wrong in this file handling,
// the code will raise an exception back to the caller.
if (fileChanged)
{
// Create the new .docx filename.
var newFileName = Path.ChangeExtension(fileName, ".docm");
// If it already exists, it will be deleted!
if (File.Exists(newFileName))
{
File.Delete(newFileName);
}
// Rename the file.
File.Move(fileName, newFileName);
}
}
I'm new to VSTO and OpenXML and I would like to develop some Word add-in. This add-in should use OpenXML, so is it possible to edit opened document?
For example I have opened Word document and I would like to replace some text using OpenXML on button click.
So I have this code.
var fileFullName = Globals.ThisAddIn.Application.ActiveDocument.FullName;
Globals.ThisAddIn.Application.ActiveDocument.Close(WdSaveOptions.wdSaveChanges, WdOriginalFormat.wdOriginalDocumentFormat, true);
//edit document using OpenXml here
Globals.ThisAddIn.Application.Documents.Open(fileFullName);
And i found this to add text to Word using OpenXML
How to: Open and add text to a word processing document (Open XML SDK)
But i can't figure out how to make them work together.
Can anyone help me with this, Thanks
This is how i solved it:
private void button1_Click(object sender, RibbonControlEventArgs e)
{
var fileFullName = Globals.ThisAddIn.Application.ActiveDocument.FullName;
Globals.ThisAddIn.Application.ActiveDocument.Close(WdSaveOptions.wdSaveChanges, WdOriginalFormat.wdOriginalDocumentFormat, true);
OpenAndAddTextToWordDocument(fileFullName, "[USER_NAME]");
Globals.ThisAddIn.Application.Documents.Open(fileFullName);
}
public static void OpenAndAddTextToWordDocument(string filepath, string txt)
{
// Open a WordprocessingDocument for editing using the filepath.
WordprocessingDocument wordprocessingDocument =
WordprocessingDocument.Open(filepath, true);
// Assign a reference to the existing document body.
Body body = wordprocessingDocument.MainDocumentPart.Document.Body;
// Add new text.
DocumentFormat.OpenXml.Wordprocessing.Paragraph para = body.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
Run run = para.AppendChild(new Run());
run.AppendChild(new Text(txt));
// Close the handle explicitly.
wordprocessingDocument.Close();
}
}
You can do something like that;
public static void SearchAndReplace(string document)
{
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(document, true))
{
string docText = null;
using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
docText = sr.ReadToEnd();
}
Regex regexText = new Regex("Hello world!");
docText = regexText.Replace(docText, "Hi Everyone!");
using (StreamWriter sw = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
sw.Write(docText);
}
}
}
Please read this post for more details.
https://msdn.microsoft.com/en-us/library/office/bb508261.aspx
I need to convert the word document to HTML. I am able to do it with doc file
But when I use docx as input. I received an error
and here is my code
public static string DocToHtml(string path)
{
try
{
//I used Microsoft Interop v12 because this doesn't give me the Access Violation Error.
Microsoft.Office.Interop.Word.Application _App = new Microsoft.Office.Interop.Word.Application();
Microsoft.Office.Interop.Word.Document _Doc = _App.Documents.Open(path);
//Let's save the converted document to the temp folder
string tempDocx = System.IO.Path.GetTempPath() + "_tempConvertedToHtml.html";
object _DocxFileName = tempDocx;
object FileFormat = Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML;
_Doc.Convert();
_Doc.SaveAs(ref _DocxFileName, ref FileFormat);
//Close the Word interface
_Doc.Close();
_App.Quit();
path = tempDocx;
}
catch (Exception ex)
{
Console.WriteLine(ex.StackTrace);
throw ex;
}
return path;
}
i am developing a resume archive where people upload their resume and that resume will be saved in a specific location. the most important things is people may use any version of MS-word to prepare their resume and resume file extension could be doc or docx. so i just like to know is there any free library available which i can use to extract text data from doc or docx file which will work in case of all ms-word version and also work if ms-word is not install in pc. i search google and found some article to extract text data from doc file but i am not sure does they work in case of all ms-word version. so please guide me with info that which library i should use to extract data from ms-word irrespective of ms-word version also give me some good article link on this issue.
also guide me is there any viewer available which i can use to show doc file content from my c# apps irrespective of ms-word version.
thanks
i got the answer
**Need to add this reference Microsoft.Office.Interop.Word**
using System.Runtime.InteropServices.ComTypes;
using System.IO;
public static string GetText(string strfilename)
{
string strRetval = "";
System.Text.StringBuilder strBuilder = new System.Text.StringBuilder();
if (File.Exists(strfilename))
{
try
{
using (StreamReader sr = File.OpenText(strfilename))
{
string s = "";
while ((s = sr.ReadLine()) != null)
{
strBuilder.AppendLine(s);
}
}
}
catch (Exception ex)
{
SendErrorMail(ex);
}
finally
{
if (System.IO.File.Exists(strfilename))
System.IO.File.Delete(strfilename);
}
}
if (strBuilder.ToString().Trim() != "")
strRetval = strBuilder.ToString();
else
strRetval = "";
return strRetval;
}
public static string SaveAsText(string strfilename)
{
string fileName = "";
object miss = System.Reflection.Missing.Value;
Microsoft.Office.Interop.Word.Document doc = null;
try
{
Microsoft.Office.Interop.Word.Application wordApp = new Microsoft.Office.Interop.Word.Application();
fileName = Path.GetDirectoryName(strfilename) + #"\" + Path.GetFileNameWithoutExtension(strfilename) + ".txt";
doc = wordApp.Documents.Open(strfilename, false);
doc.SaveAs(fileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatDOSText);
}
catch (Exception ex)
{
SendErrorMail(ex);
}
finally
{
if (doc != null)
{
doc.Close(ref miss, ref miss, ref miss);
System.Runtime.InteropServices.Marshal.ReleaseComObject(doc);
doc = null;
}
GC.Collect();
GC.WaitForPendingFinalizers();
}
return fileName;
}
See the following:
http://msdn.microsoft.com/en-us/library/cc974107%28office.12%29.aspx
How can i read .docx file?
Microsoft Interop Word Nuget
string docPath = #"C:\whereEverTheFileIs.doc";
Application app = new Application();
Document doc = app.Documents.Open(docPath);
string words = doc.Content.Text;
doc.Close();
app.Quit();
iTextSharp - How to convert PdfPTable to JPEG or other image format?
I believe iTextSharp does not currently support rendering PDF into image files. Ghostscript supports converting PDF files to images. There is a good tutorial here available to convert PDF files to images. Also you can use rendering object like this one.
iTextSharp is only for creating PDF Documents.
There are many other DLL's that can be used to convert PDF to JPG. The most preferred is Ghostscript(GS). you can use the foll. C# Code with GS dll
public static void PdfToJpg(string input, string output)
{
PdfToImage.PDFConvert pp = new PDFConvert();
pp.OutputFormat = "jpeg"; //format
pp.JPEGQuality = 100; //100% quality
pp.ResolutionX = 300; //dpi
pp.ResolutionY = 300;
pp.FirstPageToConvert = 1; //pages you want
pp.LastPageToConvert = 1;
pp.Convert(input , output );
}
namespace PdfToJpeg
{
{
PDFConvert converter = new PDFConvert();
public Form1()
{
InitializeComponent();
}
try
{
PdfToJpg("c:\abc.pdf","c:\" + "output.jpg");
MessageBox.Show("Files Converted");
}
catch (Exception ex)
{
MessageBox.Show("Exception Error Occured... " + ex.Message.ToString());
}
}
}