Transform XML document changing encoding - c#

I have following code:
XslCompiledTransform transform = new XslCompiledTransform();
XsltArgumentList arguments = new XsltArgumentList();
string txId = SPOZUS_T2S_COMMON.Spozus.GetUniqueInputId;
arguments.AddParam("Id", string.Empty, txId);
foreach (KeyValuePair<string, string> kvp in args)
{
arguments.AddParam(kvp.Key, string.Empty, kvp.Value);
}
using (StringReader sr = new StringReader(xslt))
{
using (XmlReader reader = XmlReader.Create(sr))
{
transform.Load(reader); // load transformation
}
}
XDocument inDoc = new XDocument(_ecpIn.Message); // input document
XDocument outDoc = new XDocument(); // output document
using (XmlReader reader = inDoc.CreateReader())
{
XmlWriter XWriter = outDoc.CreateWriter();
using (XmlWriter writer = XmlWriter.Create(XWriter, transform.OutputSettings))
{
transform.Transform(reader, arguments, writer); // do the transformation
}
}
I have the source document inDoc in utf-8 and I need to replace accentuated chars in the values of text value elements and get rid of those that does not match this pattern ([0-9a-zA-Z-\?:().,&apos;+ ]([0-9a-zA-Z-\?:().,&apos;+ ](/[0-9a-zA-Z-\?:().,&apos;+ ])?)).
Any idea?

Related

Pass xml and xsl as string for processing in xslt 3

I am converting xml to html using xslt 3.0 saxon-HE 9.8 library. Using it in c# code.
I am passing xml and xslt file path in input to get it transformed and get output.
Can anyone please let me know how can I pass xml as string and xslt as string as input in c# code for processing it.
Below is my code.
public static string Transform_XML(string param, string inputfile, string xsltfilename)
{
var xslt = new FileInfo(xsltfilename);
var input = new FileInfo(inputfile);
// Compile stylesheet
var processor = new Processor();
var compiler = processor.NewXsltCompiler();
var executable = compiler.Compile(new Uri(xslt.FullName));
XPathDocument doc = new XPathDocument(new StringReader(param));
DocumentBuilder db = processor.NewDocumentBuilder();
XdmNode xml;
using (XmlReader xr = XmlReader.Create(new StringReader(param)))
{
xml = db.Build(xr);
}
// Do transformation to a destination
var destination = new DomDestination();
using (var inputStream = input.OpenRead())
{
var transformer = executable.Load();
transformer.SetParameter(new QName("", "", "user_entry"), xml);
transformer.SetInputStream(inputStream, new Uri(input.DirectoryName));
transformer.Run(destination);
}
return destination.XmlDocument.InnerXml.ToString();
}
Want to pass xml and xslt as string instead of file path.
UPDATE 1
Got the solution for passing xml and xsl as string in c#. Below is the updated code.
private string Transform_XML(string param, string param_name, string inputfile, string xsltfilename)
{
string xslt_input = System.IO.File.ReadAllText(xsltfilename + ".xslt");
string xml_input = System.IO.File.ReadAllText(inputfile + ".xml");
// Compile stylesheet
var processor = new Processor();
var compiler = processor.NewXsltCompiler();
compiler.BaseUri=new Uri(Server.MapPath("/"));
var executable = compiler.Compile(new XmlTextReader(new StringReader(xslt_input)));
XPathDocument doc = new XPathDocument(new StringReader(param));
DocumentBuilder db = processor.NewDocumentBuilder();
XdmNode xml;
using (XmlReader xr = XmlReader.Create(new StringReader(param)))
{
xml = db.Build(xr);
}
//xml input
DocumentBuilder builder = processor.NewDocumentBuilder();
builder.BaseUri= new Uri(Server.MapPath("/"));
MemoryStream ms = new MemoryStream();
StreamWriter tw = new StreamWriter(ms);
tw.Write(xml_input);
tw.Flush();
Stream instr = new MemoryStream(ms.GetBuffer(), 0, (int)ms.Length);
XdmNode input = builder.Build(instr);
// Do transformation to a destination
var destination = new DomDestination();
var transformer = executable.Load();
//Set the parameter with xml value
transformer.SetParameter(new QName("", "", param_name), xml);
// Set the root node of the source document to be the initial context node
transformer.InitialContextNode = input;
transformer.Run(destination);
// Get result
return destination.XmlDocument.InnerXml.ToString();
}
The XsltTransformer has a method SetInputStream() that allows you to supply the input as a stream (which indeed you appear to be using).
This post How do I generate a stream from a string? tells you how to create a stream from a string.

XDocument Save String parameter doesn't exists in net.core

In my old projects when I save a XDocument, the Save function has like 7 overloads including a "string fileName"
Now in my new project using Net Core there's no overload accepting a string where the document should be saved.
I have this:
XDocument file = new XDocument();
XElement email = new XElement("Email");
XElement recipientsXml = new XElement("Recipients");
foreach (var r in recipients)
{
var rec = new XElement("Recipient",
new XAttribute("To", r));
recipientsXml.Add(rec);
}
email.Add(recipientsXml);
file.Add(email);
file.Save(#"C:\1\email.xml");
How I save the XDocument in my disk?
Thanks.
You can save the XDocument like this, but you need to add some SaveOptions (implementation). Have a look at the Implementation of XDocument:
public void Save(string fileName, SaveOptions options)
{
XmlWriterSettings ws = GetXmlWriterSettings(options);
if (_declaration != null && !string.IsNullOrEmpty(_declaration.Encoding))
{
try
{
ws.Encoding = Encoding.GetEncoding(_declaration.Encoding);
}
catch (ArgumentException)
{
}
}
using (XmlWriter w = XmlWriter.Create(fileName, ws))
{
Save(w);
}
}
You can implement your own solution using a writer, or simply call the existing method like
file.Save(#"C:\1\email.xml", SaveOptions.None);
Ok I found how to do that.
FileStream fileStream = new FileStream(#"C:\1\emails.xml");
XmlWriterSettings settings = new XmlWriterSettings() { Indent = true };
XmlWriter writer = XmlWriter.Create(fileStream, settings);
XDocument file = new XDocument();
XElement email = new XElement("Email");
XElement recipientsXml = new XElement("Recipients");
foreach (var r in recipients)
{
var rec = new XElement("Recipient",
new XAttribute("To", r));
recipientsXml.Add(rec);
}
email.Add(recipientsXml);
file.Add(email);
file.Save(writer);
writer.Flush();
fileStream.Flush();

How to set XmlWriter.XmlWriterSettings when created from XPathNavigator.AppendChild()?

I need to set the OmitXmlDeclaration property of the XmlWriterSettings for a XmlWriter to false to not have XML declarations created. The issue is that I have created the XmlWriter from a call of a XPathNavigator.AppendChild() method. Code below:
public String GetEntityXml<T>(List<T> entities)
{
XmlDocument xmlDoc = new XmlDocument();
XPathNavigator nav = xmlDoc.CreateNavigator();
using (XmlWriter writer = nav.AppendChild())
{
XmlSerializer ser = new XmlSerializer(typeof(List<T>), new XmlRootAttribute(typeof(T).Name + "_LIST"));
ser.Serialize(writer, entities);
}
StringWriter stringWriter = new StringWriter();
XmlTextWriter xmlTextWriter = new XmlTextWriter(stringWriter);
xmlDoc.WriteTo(xmlTextWriter);
string resultString = stringWriter.ToString();
stringWriter.Close();
xmlTextWriter.Close();
return resultString;
}
Any idea how to serialize the List and not have XML declarations?
I’m not getting the XML declaration when I execute your code. Serializing a List<int> gives me:
<Int32_LIST xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<int>5</int>
<int>7</int>
<int>2</int>
</Int32_LIST>
Note that the “XML declaration” that OmitXmlDeclaration refers to is typically something similar to:
<?xml version="1.0" encoding="UTF-8" ?>
If you’re instead referring to the xmlns parts, then those are called “XML namespace declarations”, and may be eliminated by initializing an XmlSerializerNamespaces instance with a default empty namespace, and passing it to your Serialize method:
XmlSerializer ser = new XmlSerializer(typeof(List<T>), new XmlRootAttribute(typeof(T).Name + "_LIST"));
var namespaces = new XmlSerializerNamespaces(new[] { new XmlQualifiedName("", "") });
ser.Serialize(writer, entities, namespaces);
The below is a shortened implementation which achieves the same result as your code:
public String GetEntityXml<T>(List<T> entities)
{
var sb = new StringBuilder();
var settings = new XmlWriterSettings { OmitXmlDeclaration = true };
using (XmlWriter writer = XmlWriter.Create(sb, settings))
{
XmlSerializer ser = new XmlSerializer(typeof(List<T>), new XmlRootAttribute(typeof(T).Name + "_LIST"));
var namespaces = new XmlSerializerNamespaces(new[] { new XmlQualifiedName("", "") });
ser.Serialize(writer, entities, namespaces);
}
return sb.ToString();
}
Try this approach (switched to var for readability):
public String GetEntityXml<T>(List<T> entities)
{
var xmlDoc = new XmlDocument();
var nav = xmlDoc.CreateNavigator();
using (var sw = new StringWriter())
{
//Create an XmlWriter that will omit xml declarations
var s = new XmlWriterSettings{ OmitXmlDeclaration = true };
using (var xmlWriter = XmlWriter.Create(sw, s))
{
//Use the following to serialize without namespaces
var ns = new XmlSerializerNamespaces();
ns.Add("", "");
var ser = new XmlSerializer(typeof(List<T>),
new XmlRootAttribute(typeof(T).Name + "_LIST"));
ser.Serialize(xmlWriter, entities, ns);
}
//Pass xml string to nav.AppendChild()
nav.AppendChild(sw.ToString());
}
using (var stringWriter = new StringWriter())
{
using (var xmlTextWriter = XmlWriter.Create(stringWriter))
{
xmlDoc.WriteTo(xmlTextWriter);
}
return stringWriter.ToString();
}
}
Rather than using nav.AppendChild() to create the XmlWriter, you can create the XmlWriter separately and then just use nav.AppendChild(string) to write the XML into xmlDoc. When you create the XmlWriter yourself, you can omit the XML declaration. Also, when you serialize, you'll probably want to omit the xmlns:xsi and xmlns:xsd namespaces using the XmlSerializerNamespaces class.

xml serialization and encoding

i want the xml encoding to be:
<?xml version="1.0" encoding="windows-1252"?>
To generate encoding like encoding="windows-1252" I wrote this code.
var myns = OS.xmlns;
using (var stringWriter = new StringWriter())
{
var settings = new XmlWriterSettings
{
Encoding = Encoding.GetEncoding(1252),
OmitXmlDeclaration = false
};
using (var writer = XmlWriter.Create(stringWriter, settings))
{
var ns = new XmlSerializerNamespaces();
ns.Add(string.Empty, myns);
var xmlSerializer = new XmlSerializer(OS.GetType(), myns);
xmlSerializer.Serialize(writer, OS,ns);
}
xmlString= stringWriter.ToString();
}
But I am still not getting my expected encoding what am I missing? Please guide me to generate encoding like encoding="windows-1252"?. What do I need to change in my code?
As long as you output the XML directly to a String (through a StringBuilder or a StringWriter) you'll always get UTF-8 or UTF-16 encondings. This is because strings in .NET are internally represented as Unicode characters.
In order to get the proper encoding you'll have to switch to a binary output, such as a Stream.
Here's a quick example:
var settings = new XmlWriterSettings
{
Encoding = Encoding.GetEncoding(1252)
};
using (var buffer = new MemoryStream())
{
using (var writer = XmlWriter.Create(buffer, settings))
{
writer.WriteRaw("<sample></sample>");
}
buffer.Position = 0;
using (var reader = new StreamReader(buffer))
{
Console.WriteLine(reader.ReadToEnd());
Console.Read();
}
}
Related resources:
C# in Depth: Strings in C# and .NET

What is the simplest way to get indented XML with line breaks from XmlDocument?

When I build XML up from scratch with XmlDocument, the OuterXml property already has everything nicely indented with line breaks. However, if I call LoadXml on some very "compressed" XML (no line breaks or indention) then the output of OuterXml stays that way. So ...
What is the simplest way to get beautified XML output from an instance of XmlDocument?
Based on the other answers, I looked into XmlTextWriter and came up with the following helper method:
static public string Beautify(this XmlDocument doc)
{
StringBuilder sb = new StringBuilder();
XmlWriterSettings settings = new XmlWriterSettings
{
Indent = true,
IndentChars = " ",
NewLineChars = "\r\n",
NewLineHandling = NewLineHandling.Replace
};
using (XmlWriter writer = XmlWriter.Create(sb, settings)) {
doc.Save(writer);
}
return sb.ToString();
}
It's a bit more code than I hoped for, but it works just peachy.
As adapted from Erika Ehrli's blog, this should do it:
XmlDocument doc = new XmlDocument();
doc.LoadXml("<item><name>wrench</name></item>");
// Save the document to a file and auto-indent the output.
using (XmlTextWriter writer = new XmlTextWriter("data.xml", null)) {
writer.Formatting = Formatting.Indented;
doc.Save(writer);
}
Or even easier if you have access to Linq
try
{
RequestPane.Text = System.Xml.Linq.XElement.Parse(RequestPane.Text).ToString();
}
catch (System.Xml.XmlException xex)
{
displayException("Problem with formating text in Request Pane: ", xex);
}
A shorter extension method version
public static string ToIndentedString( this XmlDocument doc )
{
var stringWriter = new StringWriter(new StringBuilder());
var xmlTextWriter = new XmlTextWriter(stringWriter) {Formatting = Formatting.Indented};
doc.Save( xmlTextWriter );
return stringWriter.ToString();
}
If the above Beautify method is being called for an XmlDocument that already contains an XmlProcessingInstruction child node the following exception is thrown:
Cannot write XML declaration.
WriteStartDocument method has already
written it.
This is my modified version of the original one to get rid of the exception:
private static string beautify(
XmlDocument doc)
{
var sb = new StringBuilder();
var settings =
new XmlWriterSettings
{
Indent = true,
IndentChars = #" ",
NewLineChars = Environment.NewLine,
NewLineHandling = NewLineHandling.Replace,
};
using (var writer = XmlWriter.Create(sb, settings))
{
if (doc.ChildNodes[0] is XmlProcessingInstruction)
{
doc.RemoveChild(doc.ChildNodes[0]);
}
doc.Save(writer);
return sb.ToString();
}
}
It works for me now, probably you would need to scan all child nodes for the XmlProcessingInstruction node, not just the first one?
Update April 2015:
Since I had another case where the encoding was wrong, I searched for how to enforce UTF-8 without BOM. I found this blog post and created a function based on it:
private static string beautify(string xml)
{
var doc = new XmlDocument();
doc.LoadXml(xml);
var settings = new XmlWriterSettings
{
Indent = true,
IndentChars = "\t",
NewLineChars = Environment.NewLine,
NewLineHandling = NewLineHandling.Replace,
Encoding = new UTF8Encoding(false)
};
using (var ms = new MemoryStream())
using (var writer = XmlWriter.Create(ms, settings))
{
doc.Save(writer);
var xmlString = Encoding.UTF8.GetString(ms.ToArray());
return xmlString;
}
}
XmlTextWriter xw = new XmlTextWriter(writer);
xw.Formatting = Formatting.Indented;
public static string FormatXml(string xml)
{
try
{
var doc = XDocument.Parse(xml);
return doc.ToString();
}
catch (Exception)
{
return xml;
}
}
A simple way is to use:
writer.WriteRaw(space_char);
Like this sample code, this code is what I used to create a tree view like structure using XMLWriter :
private void generateXML(string filename)
{
using (XmlWriter writer = XmlWriter.Create(filename))
{
writer.WriteStartDocument();
//new line
writer.WriteRaw("\n");
writer.WriteStartElement("treeitems");
//new line
writer.WriteRaw("\n");
foreach (RootItem root in roots)
{
//indent
writer.WriteRaw("\t");
writer.WriteStartElement("treeitem");
writer.WriteAttributeString("name", root.name);
writer.WriteAttributeString("uri", root.uri);
writer.WriteAttributeString("fontsize", root.fontsize);
writer.WriteAttributeString("icon", root.icon);
if (root.children.Count != 0)
{
foreach (ChildItem child in children)
{
//indent
writer.WriteRaw("\t");
writer.WriteStartElement("treeitem");
writer.WriteAttributeString("name", child.name);
writer.WriteAttributeString("uri", child.uri);
writer.WriteAttributeString("fontsize", child.fontsize);
writer.WriteAttributeString("icon", child.icon);
writer.WriteEndElement();
//new line
writer.WriteRaw("\n");
}
}
writer.WriteEndElement();
//new line
writer.WriteRaw("\n");
}
writer.WriteEndElement();
writer.WriteEndDocument();
}
}
This way you can add tab or line breaks in the way you are normally used to, i.e. \t or \n
When implementing the suggestions posted here, I had trouble with the text encoding. It seems the encoding of the XmlWriterSettings is ignored, and always overridden by the encoding of the stream. When using a StringBuilder, this is always the text encoding used internally in C#, namely UTF-16.
So here's a version which supports other encodings as well.
IMPORTANT NOTE: The formatting is completely ignored if your XMLDocument object has its preserveWhitespace property enabled when loading the document. This had me stumped for a while, so make sure not to enable that.
My final code:
public static void SaveFormattedXml(XmlDocument doc, String outputPath, Encoding encoding)
{
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = "\t";
settings.NewLineChars = "\r\n";
settings.NewLineHandling = NewLineHandling.Replace;
using (MemoryStream memstream = new MemoryStream())
using (StreamWriter sr = new StreamWriter(memstream, encoding))
using (XmlWriter writer = XmlWriter.Create(sr, settings))
using (FileStream fileWriter = new FileStream(outputPath, FileMode.Create))
{
if (doc.ChildNodes.Count > 0 && doc.ChildNodes[0] is XmlProcessingInstruction)
doc.RemoveChild(doc.ChildNodes[0]);
// save xml to XmlWriter made on encoding-specified text writer
doc.Save(writer);
// Flush the streams (not sure if this is really needed for pure mem operations)
writer.Flush();
// Write the underlying stream of the XmlWriter to file.
fileWriter.Write(memstream.GetBuffer(), 0, (Int32)memstream.Length);
}
}
This will save the formatted xml to disk, with the given text encoding.
If you have a string of XML, rather than a doc ready for use, you can do it this way:
var xmlString = "<xml>...</xml>"; // Your original XML string that needs indenting.
xmlString = this.PrettifyXml(xmlString);
private string PrettifyXml(string xmlString)
{
var prettyXmlString = new StringBuilder();
var xmlDoc = new XmlDocument();
xmlDoc.LoadXml(xmlString);
var xmlSettings = new XmlWriterSettings()
{
Indent = true,
IndentChars = " ",
NewLineChars = "\r\n",
NewLineHandling = NewLineHandling.Replace
};
using (XmlWriter writer = XmlWriter.Create(prettyXmlString, xmlSettings))
{
xmlDoc.Save(writer);
}
return prettyXmlString.ToString();
}
A more simplified approach based on the accepted answer:
static public string Beautify(this XmlDocument doc) {
StringBuilder sb = new StringBuilder();
XmlWriterSettings settings = new XmlWriterSettings
{
Indent = true
};
using (XmlWriter writer = XmlWriter.Create(sb, settings)) {
doc.Save(writer);
}
return sb.ToString();
}
Setting the new line is not necessary. Indent characters also has the default two spaces so I preferred not to set it as well.
Set PreserveWhitespace to true before Load.
var document = new XmlDocument();
document.PreserveWhitespace = true;
document.Load(filename);

Categories

Resources