I have some html.
I try to clean it with library: http://sourceforge.net/projects/tidynet/
Here is my code:
//clean up html
Tidy tidy = new Tidy();
tidy.Options.DocType = DocType.Omit;
tidy.Options.DropFontTags = true;
tidy.Options.LogicalEmphasis = true;
tidy.Options.Xhtml = true;
tidy.Options.XmlOut = true;
tidy.Options.MakeClean = true;
tidy.Options.TidyMark = false;
tidy.Options.CharEncoding = CharEncoding.UTF8;
/* Declare the parameters that is needed */
TidyMessageCollection tmc = new TidyMessageCollection();
MemoryStream input = new MemoryStream();
MemoryStream output = new MemoryStream();
byte[] byteArray = Encoding.UTF8.GetBytes(report);
input.Write(byteArray, 0, byteArray.Length);
input.Position = 0;
tidy.Parse(input, output, tmc);
string cleanHtml = Encoding.UTF8.GetString(output.ToArray());
then I try to use xslt:
try
{
StringBuilder res = new StringBuilder();
XslCompiledTransform xslt = new XslCompiledTransform();
xslt.Load(XmlReader.Create(new StringReader(stylesheet.Content)));
xslt.Transform(StringExtensions.ToXmlReader(cleanHtml), null, new StringWriter(res));
var resultReport = res.ToString();
}
catch (Exception e)
{
}
and I get an exeption:
The '=' character, hexadecimal value 0x3D, cannot be included in a name
Update
How can I automaticaly clean name from '='?
HTML is not XML (unless it's XHTML). Trying to apply an XSLT to a generic HTML document is almost certainly going to give you issues. You'll have to find a different way to do whatever transformation it is that you're looking for.
If you want to process the HTML programmatically, then I would suggest the HTML Agility Pack.
Related
I am fairly new coding with C#. I would like to calculate an IRmark of an xml file saved on my c drive.
The IRmark calculation is based on the HMRC's specification on https://www.gov.uk/government/publications/hmrc-irmark-generic-irmark-specification
I have found a code online that I could use to do this. I do not know how to direct it to read and convert the xml file on the c drive instead. I will appreciate your help. Thank you
Here is the code
public static string GetIRMark(byte[] Xml)
{
string vbLf = "\n";
string vbCrLf = "\r\n";
// Convert Byte array to string
string text = Encoding.UTF8.GetString(Xml);
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.LoadXml(text);
XmlNamespaceManager ns = new XmlNamespaceManager(doc.NameTable);
ns.AddNamespace("env", doc.DocumentElement.NamespaceURI);
XmlNode Body = doc.SelectSingleNode("//env:Body", ns);
ns.AddNamespace("tax", Body.FirstChild.NextSibling.NamespaceURI);
// Create an XML document of just the body section
XmlDocument xmlBody = new XmlDocument();
xmlBody.PreserveWhitespace = true;
xmlBody.LoadXml(Body.OuterXml);
// Remove any existing IRMark
XmlNode nodeIr = xmlBody.SelectSingleNode("//tax:IRmark", ns);
if (nodeIr != null)
{
nodeIr.ParentNode.RemoveChild(nodeIr);
}
// Normalise the document using C14N (Canonicalisation)
XmlDsigC14NTransform c14n = new XmlDsigC14NTransform();
c14n.LoadInput(xmlBody);
using (Stream S = (Stream)c14n.GetOutput())
{
byte[] Buffer = new byte[S.Length];
// Convert to string and normalise line endings
S.Read(Buffer, 0, (int)S.Length);
text = Encoding.UTF8.GetString(Buffer);
text = text.Replace("
", "");
text = text.Replace(vbCrLf, vbLf);
text = text.Replace(vbCrLf, vbLf);
// Convert the final document back into a byte array
byte[] b = Encoding.UTF8.GetBytes(text);
// Create the SHA-1 hash from the final document
SHA1 SHA = SHA1.Create();
byte[] hash = SHA.ComputeHash(b);
return Convert.ToBase64String(hash);
}
I have tried this code
string text = Encoding.UTF8.GetString(#"c:\myfile.xml");
but I get an error message
I am converting xml to html using xslt 3.0 saxon-HE 9.8 library. Using it in c# code.
I am passing xml and xslt file path in input to get it transformed and get output.
Can anyone please let me know how can I pass xml as string and xslt as string as input in c# code for processing it.
Below is my code.
public static string Transform_XML(string param, string inputfile, string xsltfilename)
{
var xslt = new FileInfo(xsltfilename);
var input = new FileInfo(inputfile);
// Compile stylesheet
var processor = new Processor();
var compiler = processor.NewXsltCompiler();
var executable = compiler.Compile(new Uri(xslt.FullName));
XPathDocument doc = new XPathDocument(new StringReader(param));
DocumentBuilder db = processor.NewDocumentBuilder();
XdmNode xml;
using (XmlReader xr = XmlReader.Create(new StringReader(param)))
{
xml = db.Build(xr);
}
// Do transformation to a destination
var destination = new DomDestination();
using (var inputStream = input.OpenRead())
{
var transformer = executable.Load();
transformer.SetParameter(new QName("", "", "user_entry"), xml);
transformer.SetInputStream(inputStream, new Uri(input.DirectoryName));
transformer.Run(destination);
}
return destination.XmlDocument.InnerXml.ToString();
}
Want to pass xml and xslt as string instead of file path.
UPDATE 1
Got the solution for passing xml and xsl as string in c#. Below is the updated code.
private string Transform_XML(string param, string param_name, string inputfile, string xsltfilename)
{
string xslt_input = System.IO.File.ReadAllText(xsltfilename + ".xslt");
string xml_input = System.IO.File.ReadAllText(inputfile + ".xml");
// Compile stylesheet
var processor = new Processor();
var compiler = processor.NewXsltCompiler();
compiler.BaseUri=new Uri(Server.MapPath("/"));
var executable = compiler.Compile(new XmlTextReader(new StringReader(xslt_input)));
XPathDocument doc = new XPathDocument(new StringReader(param));
DocumentBuilder db = processor.NewDocumentBuilder();
XdmNode xml;
using (XmlReader xr = XmlReader.Create(new StringReader(param)))
{
xml = db.Build(xr);
}
//xml input
DocumentBuilder builder = processor.NewDocumentBuilder();
builder.BaseUri= new Uri(Server.MapPath("/"));
MemoryStream ms = new MemoryStream();
StreamWriter tw = new StreamWriter(ms);
tw.Write(xml_input);
tw.Flush();
Stream instr = new MemoryStream(ms.GetBuffer(), 0, (int)ms.Length);
XdmNode input = builder.Build(instr);
// Do transformation to a destination
var destination = new DomDestination();
var transformer = executable.Load();
//Set the parameter with xml value
transformer.SetParameter(new QName("", "", param_name), xml);
// Set the root node of the source document to be the initial context node
transformer.InitialContextNode = input;
transformer.Run(destination);
// Get result
return destination.XmlDocument.InnerXml.ToString();
}
The XsltTransformer has a method SetInputStream() that allows you to supply the input as a stream (which indeed you appear to be using).
This post How do I generate a stream from a string? tells you how to create a stream from a string.
This is a very strange question.
I using C# to create a pass.json and save it to memoryStream, it work normally. After that I create the manifest.json SHA1 data which including that pass.json, the string of manifest.json like this and it is totally correct.
{"icon.png": "9423bd00e2b01c59a3265c38b5062fac7da0752d",
"icon#2x.png": "4d1db55bdaca70b685c013529a1c0dcbd7046524",
"logo.png": "ee5b053e63dbfe3b78378c15d163331d68a0ede8",
"logo#2x.png": "2f9e3a55bded1163620719a4d6c1ad496ed40c17",
"pass.json": "fd68bf77757d3057263a9aca0e5110ddd933934a"}
After generate pkpass as my phone, it can't open. I change the pass.json SHA1 code as "fd68bf77757d3057263a9aca0e5110ddd933934a" without using a value to save it, it work.
The coding like following:
// This version run success
var strPass = JavascriptSerialize(details);
var sw = new StreamWriter(assetsFolder + #"pass.json");
sw.Write(strPass);
sw.Close();
manifest.passjson = GetSha1Hash(assetsFolder + manifest.GetAssetBoardingPass(libPkPass_object_boardingPass.JsonObjects.AssetTypes.passjson));
//manifest.passjson = "2f9e3a55bded1163620719a4d6c1ad496ed40c17"
// end
// This version run fail
var strPass = JavascriptSerialize(details);
MemoryStream stream = new MemoryStream();
StreamWriter writer = new StreamWriter(strPass);
writer.Write(s);
writer.Flush();
stream.Position = 0;
var a = GetSha1HashMemory(passStream);
private static string GetSha1HashMemory(Stream passStream)
{
//var bs = new BufferedStream(passStream);
using (SHA1Managed sha = new SHA1Managed())
{
byte[] checksum = sha.ComputeHash(passStream);
string sendCheckSum = BitConverter.ToString(checksum)
.Replace("-", string.Empty);
return sendCheckSum.ToString().ToLower();
}
}
manifest.passjson = a;
//manifest.passjson = "2f9e3a55bded1163620719a4d6c1ad496ed40c17" (same data )
//end
What is going on?????? I can find out any question that string is wrong.
The pkpass provide in here (sendspace).
Can any body told me where is wrong?
Big Thank!
Two mistakes :
ComputeHash(Stream) and using Stream
ComputeHash(Stream) : ComputeHash stream only using System.IO.Stream, but not MemoryStream, change to ComputeHash(bytes[]) can handle it
using Stream: I try to pass the stream to other function, it is not a good example, the stream need to create a new one and it may replace some bytes at your computer stream. In this case, I just need to call this function will out open new one, it will fix
StringBuilder formatted;
using (var sha1 = new SHA1Managed())
{
//var bytePass = ReadFully(passStream);
var bytePass = passStream.ToArray();
var hash = sha1.ComputeHash(bytePass);
formatted = new StringBuilder(2 * hash.Length);
foreach (var b in hash)
{
formatted.AppendFormat("{0:X2}", b);
}
}
manifest.passjson = formatted.ToString().ToLower();
Sending XML from C# Server and receiving it it in Android Java client
This is what the received XML look likes:
<?xml version="1.0" encoding="utf-8"?>.....
This is the c# send code
// convert the class WorkItem to xml
MemoryStream memoryStream = new MemoryStream();
XmlSerializer xs = new XmlSerializer(typeof(WorkItem));
XmlTextWriter xmlTextWriter = new XmlTextWriter(memoryStream, Encoding.UTF8);
xs.Serialize(xmlTextWriter, p);
// send the xml version of WorkItem to client
byte[] data = memoryStream.ToArray();
clientStream.Write(data, 0, data.Length);
Console.WriteLine(" send.." + data);
clientStream.Close();
In Java i just do:
in = new DataInputStream(skt.getInputStream());
String XMlString = in.readLine();
Everything is working if i every time remove the 3 first characters from XMlString.
I would really like to do this in a better way if it's possible
*UPDATE adding the Android java client
#Override
protected String doInBackground(Long... params) {
textTopInfo.setText("Loading workitems..");
DataOutputStream out = null;
DataInputStream in = null;
try {
Socket skt = new Socket(Consts.SERVER_URL_1, Consts.SERVER_PORT_1);
skt.setSoTimeout(10000); //10 sec timout
out = new DataOutputStream(skt.getOutputStream());
in = new DataInputStream(skt.getInputStream());
// check valid user id
String id = prefs.getString("id", "");
if(id.equals(""))
return "Open menu and enter User Id";
String theString = Consts.PUSH_GET_WORKITEM + ":" + id ;
out.write(theString.getBytes());
BufferedReader d = new BufferedReader
(new InputStreamReader(skt.getInputStream()));
String XMlString = d.readLine();
// here I remove the BOM
XMlString = XMlString.substring(3);
Log.d(TAG, "GF");
XStream xstream = new XStream();
xstream.alias("WorkItem", WorkItem.class);
xstream.alias("OneItem", OneItem.class);
pl = (WorkItem)xstream.fromXML(XMlString);
} catch (Exception e) {
return "cannot connect to server " + e.toString();
}finally{
//kill out/in
try {
if(out != null)
out.close();
if(in!=null)
in.close();
} catch (IOException e) {
}
}
return "here is the list";
}
the method readLine is deprecated in Java 1.7; from the javadocs:
readLine()
Deprecated.
This method does not properly convert bytes to characters. As of JDK 1.1, the preferred way to read lines of text is via the BufferedReader.readLine() method. Programs that use the DataInputStream class to read lines can be converted to use the BufferedReader class by replacing code of the form:
DataInputStream d = new DataInputStream(in);
with:
BufferedReader d
= new BufferedReader(new InputStreamReader(in));
The three initial bytes is the UTF8 BOM (Byte ordering mark). You will need to tell your Java code to use the same encoding.
EDIT:
Doing too much.... this works for me with national chars
var xs = new XmlSerializer(typeof(ToDoItem));
var stringWriter = new StringWriter();
xs.Serialize(stringWriter, item);
var test = XDocument.Parse(stringWriter.ToString());
...where The item is the object containing strings with national chars
/EDIT
I did a project with serialization of some objects.
I copied some code from examples on this site and everything worked great, till I changed framework ASP.NET from 3.5 til 4.0... (and changed ISS7 .net setting from v2.0 to v4.0)
I am 99% sure this is the cause of the following error:
Before this change something like this:
var test = XDocument.Parse(SerializeObject("æøåAØÅ", typeof(string)));
test.Save(HttpContext.Current.Server.MapPath("test.xml"));
Would save the xml with the exact chars used.
Now it saves this:
���A��
I would like: Information on settings I might have to make in IIS7
OR
A comment on how to change the serializing methods to handle the national chars better.
This is the serialization code used.
private static String UTF8ByteArrayToString(Byte[] characters)
{
var encoding = new UTF8Encoding();
String constructedString = encoding.GetString(characters);
return (constructedString);
}
public static String SerializeObject(Object pObject, Type type)
{
try
{
String XmlizedString = null;
var memoryStream = new MemoryStream();
var xs = new XmlSerializer(type);
var xmlTextWriter = new XmlTextWriter(memoryStream, Encoding.ASCII);
xs.Serialize(xmlTextWriter, pObject);
memoryStream = (MemoryStream)xmlTextWriter.BaseStream;
XmlizedString = UTF8ByteArrayToString(memoryStream.ToArray());
return XmlizedString.Trim();
}
catch (Exception e)
{
//Console.WriteLine(e);
return null;
}
}
You save a text as using ASCII and then decode it using UTF-8 and expect that it will work? It won't. This code could never work properly, regardless of any updates or settings.
There is no need to write the XML to a MemoryStream and then decode that. Just use StringWriter:
var xs = new XmlSerializer(type);
var stringWriter = new StringWriter();
xs.Serialize(stringWriter, pObject);
return stringWriter.ToString();