Programmatically convert Word (docx) to PDF

Programmatically convert Word (docx) to PDF - c#

Ok before you think "Not another question like this" please read this first.
I have an application (web application in ASP.NET MVC 3) the generates Word files in DocX using the DocX library.
The application takes a template and fills it in with all the data from a database.
Now I want to create a PDF version of that created docx-file.
I know apose.word is an option, but not for me since I have little budget. Other libs where I have to spend some money on are also out of the question.
I don't have a sharepoint server so Word Automation Services isn't an option either.
So I have 2 options (that I know) left and they both use iTextSharp. Don't know which is better.
I could use the generated XML from the docx file and transform it to a version that is usable by iTextSharp.
I could create the PDF like I create the docx with a template.
Anybody has any idea on how much work it is, which of those 2 has better performance and if it is even possible to do.
I know that the second option has the downside that when I change a template I have to change it for both versions.
If you have a better solution (free that is), you are welcome to share it.

Another option, even if it needs some work: install OpenOffice on server and, using UNO libraries (including them as assemblies in your app), you can open docx document and save it in PDF directly.
In a few minutes I post an example...
PARTIAL EXAMPLE:
This is a class I created a long time ago and used to convert files to pdf
using unoidl.com.sun.star.lang;
using unoidl.com.sun.star.uno;
using unoidl.com.sun.star.container;
using unoidl.com.sun.star.frame;
using unoidl.com.sun.star.beans;
using unoidl.com.sun.star.view;
using System.Collections.Generic;
using System.IO;
namespace QOpenOffice
{
public enum AppType
{
Writer,
Calc,
Impress,
Draw,
Math
}
public enum ExportFilter{
Word97,
WriterPDF,
CalcPDF,
DrawPDF,
ImpressPDF,
MathPDF
}
class OpenOffice
{
private XComponentContext context;
private XMultiServiceFactory service;
private XComponentLoader component;
private XComponent doc;
private List<string> filters = new List<string>();
#region Constructors
public OpenOffice()
{
/// This will start a new instance of OpenOffice.org if it is not running,
/// or it will obtain an existing instance if it is already open.
context = uno.util.Bootstrap.bootstrap();
/// The next step is to create a new OpenOffice.org service manager
service = (XMultiServiceFactory)context.getServiceManager();
/// Create a new Desktop instance using our service manager
component = (XComponentLoader)service.createInstance("com.sun.star.frame.Desktop");
// Getting filters
XNameContainer filters = (XNameContainer)service.createInstance("com.sun.star.document.FilterFactory");
foreach (string filter in filters.getElementNames())
this.filters.Add(filter);
}
~OpenOffice()
{
if (doc != null)
doc.dispose();
doc = null;
}
#endregion
#region Private methods
private string FilterToString(ExportFilter filter)
{
switch (filter)
{
case ExportFilter.Word97: return "MS Word 97";
case ExportFilter.WriterPDF: return "writer_pdf_Export";
case ExportFilter.CalcPDF: return "calc_pdf_Export";
case ExportFilter.DrawPDF: return "draw_pdf_Export";
case ExportFilter.ImpressPDF: return "impress_pdf_Export";
case ExportFilter.MathPDF: return "math_pdf_Export";
}
return "";
}
#endregion
#region Public methods
public bool Load(string filename, bool hidden)
{
return Load(filename, hidden, "", "");
}
public bool Load(string filename, bool hidden, int filter_index, string filter_options)
{
return Load(filename, hidden, filters[filter_index], filter_options);
}
public bool Load(string filename, bool hidden, string filter_name, string filter_options)
{
List<PropertyValue> pv = new List<PropertyValue>();
pv.Add(new PropertyValue("Hidden", 0, new uno.Any(hidden), PropertyState.DIRECT_VALUE));
if (filter_name != "")
{
pv.Add(new PropertyValue("FilterName", 0, new uno.Any(filter_name), PropertyState.DIRECT_VALUE));
pv.Add(new PropertyValue("FilterOptions", 0, new uno.Any(filter_options), PropertyState.DIRECT_VALUE));
}
try
{
doc = component.loadComponentFromURL(
"file:///" + filename.Replace('\\', '/'), "_blank",
0, pv.ToArray());
return true;
}
catch
{
doc = null;
return false;
}
}
public bool Print()
{
return Print(1, "");
}
public bool Print(int copies, string pages)
{
List<PropertyValue> pv = new List<PropertyValue>();
pv.Add(new PropertyValue("CopyCount", 0, new uno.Any(copies), PropertyState.DIRECT_VALUE));
if (pages != "")
pv.Add(new PropertyValue("Pages", 0, new uno.Any(pages), PropertyState.DIRECT_VALUE));
//if (doc is XPrintable)
try
{
((XPrintable)doc).print(pv.ToArray());
return true;
}
catch { return false; }
}
public bool Save(string filename, ExportFilter filter)
{
return Save(filename, FilterToString(filter));
}
public bool Save(string filename, string filter)
{
List<PropertyValue> pv = new List<PropertyValue>();
pv.Add(new PropertyValue("FilterName", 0, new uno.Any(filter), PropertyState.DIRECT_VALUE));
pv.Add(new PropertyValue("Overwrite", 0, new uno.Any(true), PropertyState.DIRECT_VALUE));
try
{
filename = filename.Replace("\\", "/");
((XStorable)doc).storeToURL("file:///" + filename, pv.ToArray());
return true;
}
catch { return false; }
}
public bool ExportToPdf(string filename)
{
filename = Path.ChangeExtension(filename, ".pdf");
bool ret = Save(filename, "writer_pdf_Export");
if (!ret) ret = Save(filename, "impress_pdf_Export");
if (!ret) ret = Save(filename, "calc_pdf_Export");
if (!ret) ret = Save(filename, "draw_pdf_Export");
if (!ret) ret = Save(filename, "impress_pdf_Export");
if (!ret) ret = Save(filename, "math_pdf_Export");
return ret;
}
public void Close()
{
doc.dispose();
doc = null;
}
public bool New(AppType app, bool hidden)
{
try
{
string sapp = "private:factory/";
switch (app)
{
case AppType.Writer:
sapp += "swriter";
break;
case AppType.Calc:
sapp += "scalc";
break;
case AppType.Impress:
sapp += "simpress";
break;
case AppType.Draw:
sapp += "sdraw";
break;
case AppType.Math:
sapp += "smath";
break;
}
PropertyValue pv = new PropertyValue("Hidden", 0, new uno.Any(hidden), PropertyState.DIRECT_VALUE);
doc = component.loadComponentFromURL(sapp, "_blank", 0, new PropertyValue[1] { pv });
return true;
}
catch
{
doc = null;
return false;
}
}
#endregion
#region Properties
public List<string> Filters
{
get { return filters; }
}
#endregion
}
}

Related

StreamWriter keeps writing to the same file after instantiating it again for another file

I'm trying to implement a simple file-logger object, with the possibility to truncate the file when its size reaches a threshold.
I am using a StreamWriter, which gets written to at each call of the method Log(). In order to decide when to truncate, I am checking the StreamWriter.BaseStream.Length property before each write and, if it is bigger than the threshold, I close the StreamWriter, create a new file and open the StreamWriter on that file.
For example, if I set the threshold to 10Mb files, it will create a new file each 10Mb of written data.
Under normal load (let's say 3-4 seconds between calls to Log()), everything works as it should. However, the product which is going to use this logger will work with lots of data and required logging every 1 second, even less.
The problem is that the logger seems to completely ignore the creation of the new file(and opening the new stream), failing to truncate it and keeps writing to the existing stream.
I also tried to manually compute the stream's length, hoping it would be a problem with the stream, but it does not work.
I have found out that going step by step with the debugger makes it work correctly, but it does not solve my problem. Logging each second seems to make the program skip the UpdateFile() method entirely.
public class Logger
{
private static Logger _logger;
private const string LogsDirectory = "Logs";
private StreamWriter _streamWriter;
private string _path;
private readonly bool _truncate;
private readonly int _maxSizeMb;
private long _currentSize;
//===========================================================//
public static void Set(string filename, bool truncate = false, int maxSizeMb = 10)
{
if (_logger == null)
{
if (filename.Contains('_'))
{
throw new Exception("Filename cannot contain the _ character!");
}
if (filename.Contains('.'))
{
throw new Exception("The filename must not include the extension");
}
_logger = new Logger(filename, truncate, maxSizeMb);
}
}
//===========================================================//
public static void Log(string message, LogType logType = LogType.Info)
{
_logger?.InternalLog(message, logType);
}
//===========================================================//
public static void LogException(Exception ex)
{
_logger?.InternalLogException(ex);
}
//===========================================================//
private Logger(string filename, bool truncate = false, int maxSizeMb = 10)
{
_path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, LogsDirectory, $"{filename}_{DateTimeToPrefix(DateTime.Now)}.log");
if (CheckForExistingLogs())
{
_path = GetLatestLogFilename();
}
_truncate = truncate;
_maxSizeMb = maxSizeMb;
_streamWriter = new StreamWriter(File.Open(_path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite));
_currentSize = _streamWriter.BaseStream.Length;
}
//===========================================================//
private bool CheckForExistingLogs()
{
var directory = Path.GetDirectoryName(_path);
var filename = Path.GetFileNameWithoutExtension(_path);
if (filename.Contains('_'))
{
filename = filename.Split('_').First();
}
return new DirectoryInfo(directory).GetFiles().Any(x => x.Name.ToLower().Contains(filename.ToLower()));
}
//===========================================================//
private string GetLatestLogFilename()
{
var directory = Path.GetDirectoryName(_path);
var filename = Path.GetFileNameWithoutExtension(_path);
if (filename.Contains('_'))
{
filename = filename.Split('_').First();
}
var files = new DirectoryInfo(directory).GetFiles().Where(x => x.Name.ToLower().Contains(filename.ToLower()));
files = files.OrderBy(x => PrefixToDateTime(x.Name.Split('_').Last()));
return files.Last().FullName;
}
//===========================================================//
private void UpdateFile()
{
_streamWriter.Flush();
_streamWriter.Close();
_streamWriter.Dispose();
_streamWriter = StreamWriter.Null;
_path = GenerateNewFilename();
_streamWriter = new StreamWriter(File.Open(_path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite));
_currentSize = _streamWriter.BaseStream.Length;
}
//===========================================================//
private string GenerateNewFilename()
{
var directory = Path.GetDirectoryName(_path);
var oldFilename = Path.GetFileNameWithoutExtension(_path);
if (oldFilename.Contains('_'))
{
oldFilename = oldFilename.Split('_').First();
}
var newFilename = $"{oldFilename}_{DateTimeToPrefix(DateTime.Now)}.log";
return Path.Combine(directory, newFilename);
}
//===========================================================//
private static string DateTimeToPrefix(DateTime dateTime)
{
return dateTime.ToString("yyyyMMddHHmm");
}
//===========================================================//
private static DateTime PrefixToDateTime(string prefix)
{
var year = Convert.ToInt32(string.Join("", prefix.Take(4)));
var month = Convert.ToInt32(string.Join("", prefix.Skip(4).Take(2)));
var day = Convert.ToInt32(string.Join("", prefix.Skip(6).Take(2)));
var hour = Convert.ToInt32(string.Join("", prefix.Skip(8).Take(2)));
var minute = Convert.ToInt32(string.Join("", prefix.Skip(10).Take(2)));
return new DateTime(year, month, day, hour, minute, 0);
}
//===========================================================//
private int ConvertSizeToMb()
{
return Convert.ToInt32(Math.Truncate(_currentSize / 1024f / 1024f));
}
//===========================================================//
public void InternalLog(string message, LogType logType = LogType.Info)
{
if (_truncate && ConvertSizeToMb() >= _maxSizeMb)
{
UpdateFile();
}
var sendMessage = string.Empty;
switch (logType)
{
case LogType.Error:
{
sendMessage += "( E ) ";
break;
}
case LogType.Warning:
{
sendMessage += "( W ) ";
break;
}
case LogType.Info:
{
sendMessage += "( I ) ";
break;
}
}
sendMessage += $"{DateTime.Now:dd.MM.yyyy HH:mm:ss}: {message}";
_streamWriter.WriteLine(sendMessage);
_streamWriter.Flush();
_currentSize += Encoding.ASCII.GetByteCount(sendMessage);
Console.WriteLine(_currentSize);
}
//===========================================================//
public void InternalLogException(Exception ex)
{
if (_truncate && ConvertSizeToMb() >= _maxSizeMb)
{
UpdateFile();
}
var sendMessage = $"( E ) {DateTime.Now:dd.MM.yyyy HH:mm:ss}: {ex.Message}{Environment.NewLine}{ex.StackTrace}";
_streamWriter.WriteLine(sendMessage);
_streamWriter.Flush();
_currentSize += Encoding.ASCII.GetByteCount(sendMessage);
}
}
Usage example:
private static void Main(string[] args)
{
Logger.Set("Log", true, 10);
while (true)
{
Logger.Log("anything");
}
}
Have you ever encountered such a problem before? How can it be solved? Thanks :)

I don't know how much data your application writes to the log each minute. But if the amount is more than 10MB then the method DateTimeToPrefix will return the same name for a second call inside a minute interval. (Well at least for me this is what happens with the code included in the Main method).
I changed the ToString() to include also the seconds and this gives correct amount of data written in the expected files.
private static string DateTimeToPrefix(DateTime dateTime)
{
return dateTime.ToString("yyyyMMddHHmmss");
}

Your code looks "fine", so I'm guessing the issue is being caused by multiple threads accessing the InternalLog method at once. Your code is not thread safe as it does not use any locking mechanisms. The easiest and probably totally sufficient solution for your project is to add a locking object at class level:
private readonly object _lock = new object();
then wrap your whole InternalLog method in a lock(_lock) statement:
public void InternalLog(string message, LogType logType = LogType.Info)
{
lock(_lock)
{
// your existing code
}
}
This is not a perfect solution and could cause a bottleneck especially as you are flushing the StreamWriter in every call to InternalLog. But for now it should be perfectly fine!

OpenXml Excel: throw error in any word after mail address

I read Excel files using OpenXml. all work fine but if the spreadsheet contains one cell that has an address mail and after it a space and another word, such as:
abc#abc.com abc
It throws an exception immediately at the opening of the spreadsheet:
var _doc = SpreadsheetDocument.Open(_filePath, false);
exception:
DocumentFormat.OpenXml.Packaging.OpenXmlPackageException
Additional information:
Invalid Hyperlink: Malformed URI is embedded as a
hyperlink in the document.

There is an open issue on the OpenXml forum related to this problem: Malformed Hyperlink causes exception
In the post they talk about encountering this issue with a malformed "mailto:" hyperlink within a Word document.
They propose a work-around here: Workaround for malformed hyperlink exception
The workaround is essentially a small console application which locates the invalid URL and replaces it with a hard-coded value; here is the code snippet from their sample that does the replacement; you could augment this code to attempt to correct the passed brokenUri:
private static Uri FixUri(string brokenUri)
{
return new Uri("http://broken-link/");
}
The problem I had was actually with an Excel document (like you) and it had to do with a malformed http URL; I was pleasantly surprised to find that their code worked just fine with my Excel file.
Here is the entire work-around source code, just in case one of these links goes away in the future:
void Main(string[] args)
{
var fileName = #"C:\temp\corrupt.xlsx";
var newFileName = #"c:\temp\Fixed.xlsx";
var newFileInfo = new FileInfo(newFileName);
if (newFileInfo.Exists)
newFileInfo.Delete();
File.Copy(fileName, newFileName);
WordprocessingDocument wDoc;
try
{
using (wDoc = WordprocessingDocument.Open(newFileName, true))
{
ProcessDocument(wDoc);
}
}
catch (OpenXmlPackageException e)
{
e.Dump();
if (e.ToString().Contains("The specified package is not valid."))
{
using (FileStream fs = new FileStream(newFileName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
{
UriFixer.FixInvalidUri(fs, brokenUri => FixUri(brokenUri));
}
}
}
}
private static Uri FixUri(string brokenUri)
{
brokenUri.Dump();
return new Uri("http://broken-link/");
}
private static void ProcessDocument(WordprocessingDocument wDoc)
{
var elementCount = wDoc.MainDocumentPart.Document.Descendants().Count();
Console.WriteLine(elementCount);
}
}
public static class UriFixer
{
public static void FixInvalidUri(Stream fs, Func<string, Uri> invalidUriHandler)
{
XNamespace relNs = "http://schemas.openxmlformats.org/package/2006/relationships";
using (ZipArchive za = new ZipArchive(fs, ZipArchiveMode.Update))
{
foreach (var entry in za.Entries.ToList())
{
if (!entry.Name.EndsWith(".rels"))
continue;
bool replaceEntry = false;
XDocument entryXDoc = null;
using (var entryStream = entry.Open())
{
try
{
entryXDoc = XDocument.Load(entryStream);
if (entryXDoc.Root != null && entryXDoc.Root.Name.Namespace == relNs)
{
var urisToCheck = entryXDoc
.Descendants(relNs + "Relationship")
.Where(r => r.Attribute("TargetMode") != null && (string)r.Attribute("TargetMode") == "External");
foreach (var rel in urisToCheck)
{
var target = (string)rel.Attribute("Target");
if (target != null)
{
try
{
Uri uri = new Uri(target);
}
catch (UriFormatException)
{
Uri newUri = invalidUriHandler(target);
rel.Attribute("Target").Value = newUri.ToString();
replaceEntry = true;
}
}
}
}
}
catch (XmlException)
{
continue;
}
}
if (replaceEntry)
{
var fullName = entry.FullName;
entry.Delete();
var newEntry = za.CreateEntry(fullName);
using (StreamWriter writer = new StreamWriter(newEntry.Open()))
using (XmlWriter xmlWriter = XmlWriter.Create(writer))
{
entryXDoc.WriteTo(xmlWriter);
}
}
}
}
}

The fix by #RMD works great. I've been using it for years. But there is a new fix.
You can see the fix here in the changelog for issue #793
Upgrade OpenXML to 2.12.0.
Right click solution and select Manage NuGet Packages.
Implement the fix
It is helpful to have a unit test. Create an excel file with a bad email address like test#gmail,com. (Note the comma instead of the dot).
Make sure the stream you open and the call to SpreadsheetDocument.Open allows Read AND Write.
You need to implement a RelationshipErrorHandlerFactory and use it in the options when you open. Here is the code I used:
public class UriRelationshipErrorHandler : RelationshipErrorHandler
{
public override string Rewrite(Uri partUri, string id, string uri)
{
return "https://broken-link";
}
}
Then you need to use it when you open the document like this:
var openSettings = new OpenSettings
{
RelationshipErrorHandlerFactory = package =>
{
return new UriRelationshipErrorHandler();
}
};
using var document = SpreadsheetDocument.Open(stream, true, openSettings);
One of the nice things about this solution is that it does not require you to create a temporary "fixed" version of your file and it is far less code.

Unfortunately solution where you have to open file as zip and replace broken hyperlink would not help me.
I just was wondering how it is posible that it works fine when your target framework is 4.0 even if your only installed .Net Framework has version 4.7.2.
I have found out that there is private static field inside System.UriParser that selects version of URI's RFC specification. So it is possible to set it to V2 as it is set for .net 4.0 and lower versions of .Net Framework. Only problem that it is private static readonly.
Maybe someone will want to set it globally for whole application. But I wrote UriQuirksVersionPatcher that will update this version and restore it back in Dispose method. It is obviously not thread-safe but it is acceptable for my purpose.
using System;
using System.Diagnostics;
using System.Reflection;
namespace BarCap.RiskServices.RateSubmissions.Utility
{
#if (NET20 || NET35 || NET40)
public class UriQuirksVersionPatcher : IDisposable
{
public void Dispose()
{
}
}
#else
public class UriQuirksVersionPatcher : IDisposable
{
private const string _quirksVersionFieldName = "s_QuirksVersion"; //See Source\ndp\fx\src\net\System\_UriSyntax.cs in NexFX sources
private const string _uriQuirksVersionEnumName = "UriQuirksVersion";
/// <code>
/// private enum UriQuirksVersion
/// {
/// V1 = 1, // RFC 1738 - Not supported
/// V2 = 2, // RFC 2396
/// V3 = 3, // RFC 3986, 3987
/// }
/// </code>
private const string _oldQuirksVersion = "V2";
private static readonly Lazy<FieldInfo> _targetFieldInfo;
private static readonly Lazy<int?> _patchValue;
private readonly int _oldValue;
private readonly bool _isEnabled;
static UriQuirksVersionPatcher()
{
var targetType = typeof(UriParser);
_targetFieldInfo = new Lazy<FieldInfo>(() => targetType.GetField(_quirksVersionFieldName, BindingFlags.Static | BindingFlags.NonPublic));
_patchValue = new Lazy<int?>(() => GetUriQuirksVersion(targetType));
}
public UriQuirksVersionPatcher()
{
int? patchValue = _patchValue.Value;
_isEnabled = patchValue.HasValue;
if (!_isEnabled) //Disabled if it failed to get enum value
{
return;
}
int originalValue = QuirksVersion;
_isEnabled = originalValue != patchValue;
if (!_isEnabled) //Disabled if value is proper
{
return;
}
_oldValue = originalValue;
QuirksVersion = patchValue.Value;
}
private int QuirksVersion
{
get
{
return (int)_targetFieldInfo.Value.GetValue(null);
}
set
{
_targetFieldInfo.Value.SetValue(null, value);
}
}
private static int? GetUriQuirksVersion(Type targetType)
{
int? result = null;
try
{
result = (int)targetType.GetNestedType(_uriQuirksVersionEnumName, BindingFlags.Static | BindingFlags.NonPublic)
.GetField(_oldQuirksVersion, BindingFlags.Static | BindingFlags.Public)
.GetValue(null);
}
catch
{
#if DEBUG
Debug.WriteLine("ERROR: Failed to find UriQuirksVersion.V2 enum member.");
throw;
#endif
}
return result;
}
public void Dispose()
{
if (_isEnabled)
{
QuirksVersion = _oldValue;
}
}
}
#endif
}
Usage:
using(new UriQuirksVersionPatcher())
{
using(var document = SpreadsheetDocument.Open(fullPath, false))
{
//.....
}
}
P.S. Later I found that someone already implemented this pathcher: https://github.com/google/google-api-dotnet-client/blob/master/Src/Support/Google.Apis.Core/Util/UriPatcher.cs

I haven't use OpenXml but if there's no specific reason for using it then I highly recommend LinqToExcel from LinqToExcel. Example of code is here:
var sheet = new ExcelQueryFactory("filePath");
var allRows = from r in sheet.Worksheet() select r;
foreach (var r in allRows) {
var cella = r["Header"].ToString();
}

Extract powerpoint titles with C#

I have powerponint 97-2003 files(.ppt extension) and I need to extract slide titles programatically using C#.
I have tried using Microsoft.Office.Interop but without success.
I have search with google and as a maximum I have found how to obtain reference to PowerPoint.Slide:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.Office.Core;
using PowerPoint = Microsoft.Office.Interop.PowerPoint;
namespace Tester
{
class Program
{
static void Main(string[] args)
{
Microsoft.Office.Interop.PowerPoint.Application presentationApp = new Microsoft.Office.Interop.PowerPoint.Application();
try
{
string pptPath = #"D:\somefile.ppt";
TestReadingTitles(presentationApp, pptPath);
}
finally
{
presentationApp.Quit();
}
}
private static void TestReadingTitles(Microsoft.Office.Interop.PowerPoint.Application presentationApp, string pptPath)
{
presentationApp.Visible = Microsoft.Office.Core.MsoTriState.msoTrue;
Microsoft.Office.Interop.PowerPoint.Presentations presentations = presentationApp.Presentations;
Microsoft.Office.Core.MsoTriState readOnly = Microsoft.Office.Core.MsoTriState.msoTrue;
Microsoft.Office.Core.MsoTriState untitled = Microsoft.Office.Core.MsoTriState.msoTrue;
Microsoft.Office.Core.MsoTriState withWindow = Microsoft.Office.Core.MsoTriState.msoFalse;
Microsoft.Office.Interop.PowerPoint.Presentation presentation = presentations.Open(pptPath, readOnly, untitled, withWindow);
for (int i = 0; i < presentation.Slides.Count; i++)
{
foreach (PowerPoint.Slide slide in presentation.Slides)
{
string slidetitle = ??????????????????;
}
}
}
}
}

You can extract the titles without looping through the shapes.
private static string ExtractSlideTitlefromSlide(Microsoft.Office.Interop.PowerPoint.Slide slide, string defaultValue)
{
if (slide.Shapes.HasTitle == Office.MsoTriState.msoTrue)
{
if (slide.Shapes.Title.TextFrame.HasText == Office.MsoTriState.msoTrue)
{
return slide.Shapes.Title.TextFrame.TextRange.Text;
}
}
return defaultValue;
}

I have no solution for direct extract slide titles from ppt. This is a workarround - first temproaly convert it into pptx and then extract titles using openxml.
For conversion from ppt to pptx I have used Microsoft Interop which I do not like but I have no better solution.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using PowerPoint = Microsoft.Office.Interop.PowerPoint;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Presentation;
using D = DocumentFormat.OpenXml.Drawing;
using Shape = DocumentFormat.OpenXml.Presentation.Shape;
namespace Tester
{
class Program
{
static void Main(string[] args)
{
string pptPath = #"D:\mypresentation.ppt";
ReadTitles(pptPath);
}
private static void ReadTitles(string pptPath)
{
IList<string> slideTitles = GetSlidesTitles(pptPath);
Debug.Print("SLIDES TITLES FOR {0}:", pptPath);
foreach (string slideTitle in slideTitles)
{
Debug.Print("\t {0}", slideTitle);
}
}
private static IList<string> GetSlidesTitles(string pptPath)
{
string pptxPath = SaveAsPptx(pptPath);
IList<string> titles = GetSlideTitles(pptxPath);
try
{
File.Delete(pptxPath);
Debug.Print("Temporary pptx file {0} deleted.", pptxPath);
}
catch (Exception e)
{
Debug.Print("Error deleting file {0}. ERROR: {1}", pptxPath, e.Message);
}
return titles;
}
private static string SaveAsPptx(string pptPathIn)
{
Microsoft.Office.Interop.PowerPoint.Application presentationApp = new Microsoft.Office.Interop.PowerPoint.Application();
string pptxPathOut = null;
try
{
string pptDir = Path.GetDirectoryName(pptPathIn);
string pptFileNameOnly = Path.GetFileNameWithoutExtension(pptPathIn);
pptxPathOut = Path.Combine(pptDir, pptFileNameOnly + ".pptx");
presentationApp.Visible = Microsoft.Office.Core.MsoTriState.msoTrue;
Microsoft.Office.Interop.PowerPoint.Presentations presentations = presentationApp.Presentations;
Microsoft.Office.Core.MsoTriState readOnly = Microsoft.Office.Core.MsoTriState.msoFalse;
Microsoft.Office.Core.MsoTriState untitled = Microsoft.Office.Core.MsoTriState.msoFalse;
Microsoft.Office.Core.MsoTriState withWindow = Microsoft.Office.Core.MsoTriState.msoFalse;
Debug.Print("Opening ppt file {0} ...", pptPathIn);
Microsoft.Office.Interop.PowerPoint.Presentation presentation = presentations.Open(pptPathIn, readOnly, untitled, withWindow);
Debug.Print("Starting creation of pptx from ppt {0}", pptPathIn);
presentation.SaveCopyAs(pptxPathOut, PowerPoint.PpSaveAsFileType.ppSaveAsOpenXMLPresentation, Microsoft.Office.Core.MsoTriState.msoFalse);
Debug.Print("Successfully created pptx {0} from ppt {1}", pptxPathOut, pptPathIn);
}
catch (Exception e)
{
Debug.Print("Error during creating pptx from ppt " + pptPathIn, e);
}
finally
{
presentationApp.Quit();
}
return pptxPathOut;
}
// Get a list of the titles of all the slides in the presentation.
public static IList<string> GetSlideTitles(string presentationFile)
{
// Open the presentation as read-only.
using (PresentationDocument presentationDocument = PresentationDocument.Open(presentationFile, false))
{
return GetSlideTitles(presentationDocument);
}
}
// Get a list of the titles of all the slides in the presentation.
public static IList<string> GetSlideTitles(PresentationDocument presentationDocument)
{
if (presentationDocument == null)
{
throw new ArgumentNullException("presentationDocument");
}
// Get a PresentationPart object from the PresentationDocument object.
PresentationPart presentationPart = presentationDocument.PresentationPart;
if (presentationPart != null &&
presentationPart.Presentation != null)
{
// Get a Presentation object from the PresentationPart object.
Presentation presentation = presentationPart.Presentation;
if (presentation.SlideIdList != null)
{
List<string> titlesList = new List<string>();
// Get the title of each slide in the slide order.
foreach (var slideId in presentation.SlideIdList.Elements<SlideId>())
{
SlidePart slidePart = presentationPart.GetPartById(slideId.RelationshipId) as SlidePart;
// Get the slide title.
string title = GetSlideTitle(slidePart);
// An empty title can also be added.
titlesList.Add(title);
}
return titlesList;
}
}
return null;
}
// Get the title string of the slide.
public static string GetSlideTitle(SlidePart slidePart)
{
if (slidePart == null)
{
throw new ArgumentNullException("slidePart");
}
// Declare a paragraph separator.
string paragraphSeparator = null;
if (slidePart.Slide != null)
{
// Find all the title shapes.
var shapes = from shape in slidePart.Slide.Descendants<Shape>()
where IsTitleShape(shape)
select shape;
StringBuilder paragraphText = new StringBuilder();
foreach (var shape in shapes)
{
// Get the text in each paragraph in this shape.
foreach (var paragraph in shape.TextBody.Descendants<D.Paragraph>())
{
// Add a line break.
paragraphText.Append(paragraphSeparator);
foreach (var text in paragraph.Descendants<D.Text>())
{
paragraphText.Append(text.Text);
}
paragraphSeparator = "\n";
}
}
return paragraphText.ToString();
}
return string.Empty;
}
// Determines whether the shape is a title shape.
private static bool IsTitleShape(Shape shape)
{
var placeholderShape = shape.NonVisualShapeProperties.ApplicationNonVisualDrawingProperties.GetFirstChild<PlaceholderShape>();
if (placeholderShape != null && placeholderShape.Type != null && placeholderShape.Type.HasValue)
{
switch ((PlaceholderValues)placeholderShape.Type)
{
// Any title shape.
case PlaceholderValues.Title:
// A centered title.
case PlaceholderValues.CenteredTitle:
return true;
default:
return false;
}
}
return false;
}
}
}

Finally I have found a way to get out powerpoint presentation titles from .ppt file without converting it to .pptx. Here is a solution:
using System;
using System.Collections.Generic;
using Microsoft.Office.Core;
using PowerPoint = Microsoft.Office.Interop.PowerPoint;
namespace Mintra.Publisher.DocumentConverter.Core.Utils
{
class InteropUtility
{
public static IList<string> GetPresentationTitles(string pptPath)
{
IList<string> result = new List<string>();
var presentationApp = new Microsoft.Office.Interop.PowerPoint.Application();
try
{
presentationApp.Visible = Microsoft.Office.Core.MsoTriState.msoTrue;
Microsoft.Office.Interop.PowerPoint.Presentations presentations = presentationApp.Presentations;
var readOnly = Microsoft.Office.Core.MsoTriState.msoTrue;
var untitled = Microsoft.Office.Core.MsoTriState.msoTrue;
var withWindow = Microsoft.Office.Core.MsoTriState.msoFalse;
Microsoft.Office.Interop.PowerPoint.Presentation presentation = presentations.Open(pptPath, readOnly, untitled, withWindow);
int i = 0;
foreach (PowerPoint.Slide slide in presentation.Slides)
{
string defaultTitle = String.Format("Slide {0}", i);
String shapeTitle = ExtractSlideTitlefromShape(slide, defaultTitle);
result.Add(shapeTitle);
}
}
finally
{
presentationApp.Quit();
}
return result;
}
private static string ExtractSlideTitlefromShape(PowerPoint.Slide slide, string defaultValue)
{
PowerPoint.HeadersFooters headersFooters = slide.HeadersFooters;
PowerPoint.Shapes mastershapes = slide.Master.Shapes;
for (int i = 1; i <= slide.Shapes.Count; i++)
{
PowerPoint.Shape shape = slide.Shapes[i];
bool hasTextFrame = shape.HasTextFrame == MsoTriState.msoTrue;
bool isTypePlaceholder = shape.Type.Equals(MsoShapeType.msoPlaceholder);
bool hasTextInTextFrame = shape.TextFrame.HasText == MsoTriState.msoTrue;
bool isTitleShape = shape.Name.ToLower().Contains("title");
if (isTypePlaceholder && hasTextFrame && hasTextInTextFrame && isTitleShape)
{
return shape.TextFrame.TextRange.Text;
}
}
return defaultValue;
}
}
}

Microsoft.Office.Interop.PowerPoint.Application pptApplication = new Microsoft.Office.Interop.PowerPoint.Application();
Microsoft.Office.Interop.PowerPoint.Slides slides;
Microsoft.Office.Interop.PowerPoint._Slide slide;
// Create the Presentation File
Presentation pptPresentation = pptApplication.Presentations.Add(MsoTriState.msoTrue);
for (int i = 0; i < 2; i++)
{
Microsoft.Office.Interop.PowerPoint.CustomLayout customLayout = pptPresentation.SlideMaster.CustomLayouts[Microsoft.Office.Interop.PowerPoint.PpSlideLayout.ppLayoutChartAndText];
// customLayout.t
// Create new Slide
slides = pptPresentation.Slides;
slide = slides.AddSlide(1, customLayout);
slide.Shapes.Title.Top = 0;
slide.Shapes.Title.TextFrame.TextRange.Text = "Welcome!";
All you need is change Welcome text.

Dropped zip file causes e.Data.GetData("FileContents") to throw an exception

I'm trying to implement a handler in my WPF application for files dragged from a zip archive. The handler should get the file content for further processing.
My environment: Windows7, 7-zip installed, Visual Studio 2012 Express, .Net 4.5
Here is the code of a simple MainWindow app to demonstrate the problem:
public partial class MainWindow : Window
{
public MainWindow()
{
InitializeComponent();
AllowDrop= true;
Drop += onDrop;
}
private void onDrop(object sender, DragEventArgs e)
{
if (e.Data.GetDataPresent("FileContents"))
{
var fileContents = e.Data.GetData("FileContents");
//get file contents...
}
}
}
When I drag a file contained in a zip archive to my Window the call to e.Data.GetData("FileContents") throws a System.ArgumentException ("Argument out of Range") with the following callstack:
System.Windows.DataObject.OleConverter.GetDataInner(formatetc, medium)
System.Windows.DataObject.OleConverter.GetDataFromOleHGLOBAL(format, aspect, index)
System.Windows.DataObject.OleConverter.GetDataFromBoundOleDataObject(format, aspect, index)
System.Windows.DataObject.OleConverter.GetData(format, autoConvert, aspect, index)
System.Windows.DataObject.OleConverter.GetData(format, autoConvert)
System.Windows.DataObject.GetData(format, autoConvert)
System.Windows.DataObject.GetData(format)
TestZip.MainWindow.onDrop(sender, e) Zeile 34 C#
I've looked up the source code of this OleConverter (http://reflector.webtropy.com/default.aspx/Dotnetfx_Win7_3#5#1/Dotnetfx_Win7_3#5#1/3#5#1/DEVDIV/depot/DevDiv/releases/Orcas/NetFXw7/wpf/src/Core/CSharp/System/Windows/dataobject#cs/1/dataobject#cs) but the GetDataInner() method is implemented like
private void GetDataInner(ref FORMATETC formatetc, out STGMEDIUM medium)
{
new SecurityPermission(SecurityPermissionFlag.UnmanagedCode).Assert(); // BlessedAssert
try
{
_innerData.GetData(ref formatetc, out medium);
}
finally
{
SecurityPermission.RevertAssert();
}
}
So this does also not provide further info of what's wrong here.
I also tried with uninstalled 7-zip and with different zip archives, but no change.
My question: Does any one have a clue what's going wrong here? What do I need to do in order to get the content of a file from a zip-archive dropped onto my window?

Old question but I needed to do this today so....
Using statements:
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.ComTypes;
using System.Text;
Reference PresentationCore
https://msdn.microsoft.com/en-us/library/system.windows.idataobject(v=vs.110).aspx
First you need to get the "FileDescriptor" contents, here's a class which reads them.
/// <summary>
/// Specifies which fields are valid in a FileDescriptor Structure
/// </summary>
[Flags]
enum FileDescriptorFlags : uint
{
ClsId = 0x00000001,
SizePoint = 0x00000002,
Attributes = 0x00000004,
CreateTime = 0x00000008,
AccessTime = 0x00000010,
WritesTime = 0x00000020,
FileSize = 0x00000040,
ProgressUI = 0x00004000,
LinkUI = 0x00008000,
Unicode = 0x80000000,
}
internal static class FileDescriptorReader
{
internal sealed class FileDescriptor
{
public FileDescriptorFlags Flags{get;set;}
public Guid ClassId{get;set;}
public Size Size{get;set;}
public Point Point{get;set;}
public FileAttributes FileAttributes{get;set;}
public DateTime CreationTime{get;set;}
public DateTime LastAccessTime{get;set;}
public DateTime LastWriteTime{get;set;}
public Int64 FileSize{get;set;}
public string FileName{get;set;}
public FileDescriptor(BinaryReader reader)
{
//Flags
Flags = (FileDescriptorFlags)reader.ReadUInt32();
//ClassID
ClassId = new Guid(reader.ReadBytes(16));
//Size
Size = new Size(reader.ReadInt32(), reader.ReadInt32());
//Point
Point = new Point(reader.ReadInt32(), reader.ReadInt32());
//FileAttributes
FileAttributes = (FileAttributes)reader.ReadUInt32();
//CreationTime
CreationTime = new DateTime(1601,1,1).AddTicks(reader.ReadInt64());
//LastAccessTime
LastAccessTime = new DateTime(1601,1,1).AddTicks(reader.ReadInt64());
//LastWriteTime
LastWriteTime = new DateTime(1601, 1, 1).AddTicks(reader.ReadInt64());
//FileSize
FileSize = reader.ReadInt64();
//FileName
byte[] nameBytes = reader.ReadBytes(520);
int i = 0;
while(i < nameBytes.Length)
{
if (nameBytes[i] == 0 && nameBytes[i + 1] == 0)
break;
i++;
i++;
}
FileName = UnicodeEncoding.Unicode.GetString(nameBytes, 0, i);
}
}
public static IEnumerable<FileDescriptor> Read(Stream fileDescriptorStream)
{
BinaryReader reader = new BinaryReader(fileDescriptorStream);
var count = reader.ReadUInt32();
while (count > 0)
{
FileDescriptor descriptor = new FileDescriptor(reader);
yield return descriptor;
count--;
}
}
public static IEnumerable<string> ReadFileNames(Stream fileDescriptorStream)
{
BinaryReader reader = new BinaryReader(fileDescriptorStream);
var count = reader.ReadUInt32();
while(count > 0)
{
FileDescriptor descriptor = new FileDescriptor(reader);
yield return descriptor.FileName;
count--;
}
}
}
Now using that you can get the matching file content for each file:
static class ClipboardHelper
{
internal static MemoryStream GetFileContents(System.Windows.IDataObject dataObject, int index)
{
//cast the default IDataObject to a com IDataObject
IDataObject comDataObject;
comDataObject = (IDataObject)dataObject;
System.Windows.DataFormat Format = System.Windows.DataFormats.GetDataFormat("FileContents");
if (Format == null)
return null;
FORMATETC formatetc = new FORMATETC();
formatetc.cfFormat = (short)Format.Id;
formatetc.dwAspect = DVASPECT.DVASPECT_CONTENT;
formatetc.lindex = index;
formatetc.tymed = TYMED.TYMED_ISTREAM | TYMED.TYMED_HGLOBAL;
//create STGMEDIUM to output request results into
STGMEDIUM medium = new STGMEDIUM();
//using the com IDataObject interface get the data using the defined FORMATETC
comDataObject.GetData(ref formatetc, out medium);
switch (medium.tymed)
{
case TYMED.TYMED_ISTREAM: return GetIStream(medium);
default: throw new NotSupportedException();
}
}
private static MemoryStream GetIStream(STGMEDIUM medium)
{
//marshal the returned pointer to a IStream object
IStream iStream = (IStream)Marshal.GetObjectForIUnknown(medium.unionmember);
Marshal.Release(medium.unionmember);
//get the STATSTG of the IStream to determine how many bytes are in it
var iStreamStat = new System.Runtime.InteropServices.ComTypes.STATSTG();
iStream.Stat(out iStreamStat, 0);
int iStreamSize = (int)iStreamStat.cbSize;
//read the data from the IStream into a managed byte array
byte[] iStreamContent = new byte[iStreamSize];
iStream.Read(iStreamContent, iStreamContent.Length, IntPtr.Zero);
//wrapped the managed byte array into a memory stream
return new MemoryStream(iStreamContent);
}
}
Now you can enumerate the streams in the file contents:
var fileDescriptor = (MemoryStream)Clipboard.GetDataObject().GetData("FileGroupDescriptorW");
var files = FileDescriptorReader.Read(fileDescriptor);
var fileIndex = 0;
foreach (var fileContentFile in files)
{
if ((fileContentFile.FileAttributes & FileAttributes.Directory) != 0)
{
//Do something with directories?
//Note that directories do not have FileContents
//And will throw if we try to read them
}
else
{
var fileData = ClipboardHelper.GetFileContents(Clipboard.GetDataObject(), FileIndex);
fileData.Position = 0;
//Do something with the fileContent Stream
}
fileIndex++;
}

Display RDL in C#

I have an RDL file designed in ReportBuilder 2. I need to show it in my C# application. I know there is an RdlViewer example at gotreportviewer.com but it is not what I really want (For example it can not handle parameters with multiple values). Instead, I'm curious about the way ReportBuilder itself shows report previews. Looking at its installation directory, I found several interesting DLLs. Using ILSpy, I saw that inside MSReportBuilder.exe, there is a method called "StartPreview()" which is called when you the "Run" button in ReportBuilder. Here is the code:
private void StartPreview()
{
try
{
if (!this.m_layoutEditor.AtBookmark("ReportPreview"))
{
using (MemoryStream memoryStream = new MemoryStream())
{
this.m_layoutEditor.CommitPendingChanges(false);
this.m_preview.Reset();
Report report;
if (this.ActiveServer != null)
{
this.m_preview.ProcessingMode = ProcessingMode.Remote;
report = this.m_preview.ServerReport;
string text = this.ActiveServer.ServerUrl;
if (this.ActiveServer.IsInSharePointMode)
{
text = Util.RemoveTrailingSlash(text) + "/_vti_bin/reportserver";
}
AuthenticationInfo authenticationInfo = this.ActiveServer.GetAuthenticationInfo();
if (authenticationInfo.IsFormsAuth)
{
this.m_preview.ServerReport.ReportServerCredentials.SetFormsCredentials(authenticationInfo.AuthCookie, authenticationInfo.UserName, authenticationInfo.Password, authenticationInfo.Domain);
}
this.m_preview.ServerReport.ReportServerUrl = new Uri(text);
using (this.m_layoutEditor.UndoManager.BeginTempGroup())
{
Report rdlObject = this.m_layoutEditor.Report.GetRdlObject();
if (this.CurrentReportInfo.IsServerReport)
{
this.m_reportProject.ResolveServerReferences(rdlObject);
}
RdlSerializer rdlSerializer = LayoutEditor.CreateSerializer();
rdlSerializer.Serialize(memoryStream, rdlObject);
}
memoryStream.Position = 0L;
this.m_preview.ServerReport.LoadReportDefinition(memoryStream);
DataSourceCredentials[] storedCredentialsForCurrentReport = this.GetStoredCredentialsForCurrentReport();
if (storedCredentialsForCurrentReport.Length > 0)
{
this.m_preview.ServerReport.SetDataSourceCredentials(storedCredentialsForCurrentReport);
}
}
else
{
this.m_preview.ProcessingMode = ProcessingMode.Local;
report = this.m_preview.LocalReport;
using (this.m_layoutEditor.UndoManager.BeginTempGroup())
{
Report rdlObject2 = this.m_layoutEditor.Report.GetRdlObject();
this.m_reportProject.FixUpDataSourceCredentialsForLocalPreview(rdlObject2);
RdlSerializer rdlSerializer2 = LayoutEditor.CreateSerializer();
rdlSerializer2.Serialize(memoryStream, rdlObject2);
}
memoryStream.Position = 0L;
this.m_preview.LocalReport.LoadReportDefinition(memoryStream);
DataSourceCredentials[] storedCredentialsForCurrentReport2 = this.GetStoredCredentialsForCurrentReport();
if (storedCredentialsForCurrentReport2.Length > 0)
{
bool flag;
this.m_preview.LocalReport.GetDataSources(out flag);
if (!flag)
{
this.m_preview.LocalReport.SetDataSourceCredentials(storedCredentialsForCurrentReport2);
}
}
}
report.DisplayName = this.CurrentReportInfo.ReportName;
this.m_preview.RefreshReport();
this.m_layoutEditor.SetBookmark("ReportPreview");
}
}
}
catch (Exception ex)
{
string message;
this.m_reportProject.ParseException(ex, out message);
ErrorDialog.Show(this, Strings.Error_Preview, message, ex);
this.SetDisplayMode(DisplayMode.Design);
}
}
Interesting point is that the code for server-side processing is very clear. But I can't understand how the client-side processing (my need) works! Can you help me do the same thing in my application?
I'm using VS 2010 / .Net 4.0

Develop Reference

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

Programmatically convert Word (docx) to PDF - c#

Related

StreamWriter keeps writing to the same file after instantiating it again for another file

OpenXml Excel: throw error in any word after mail address

Extract powerpoint titles with C#

Dropped zip file causes e.Data.GetData("FileContents") to throw an exception

Display RDL in C#

Categories

Resources