I'm looking for a easy way to convert my xml to json with the additional option to add the full xpath as attribute. Now I do it this way:
private static string XmlToJson(string xmlString)
{
return new JavaScriptSerializer().Serialize(GetXmlValues(XElement.Parse(xmlString)));
}
private static Dictionary<string, object> GetXmlValues(XElement xml)
{
var attr = xml.Attributes().ToDictionary(d => d.Name.LocalName, d => (object)d.Value);
if (xml.HasElements)
{
attr.Add("_children", xml.Elements().Select(e => GetXmlValues(e)));
attr.Add("_path", xml.GetPath());
}
else if (!xml.IsEmpty)
{
attr.Add("_value", xml.Value);
attr.Add("_path", xml.GetPath());
}
return new Dictionary<string, object> { { xml.Name.LocalName, attr } };
}
private static string GetPath(this XElement node)
{
string path = node.Name.LocalName;
XElement currentNode = node;
while (currentNode.Parent != null)
{
currentNode = currentNode.Parent;
path = currentNode.Name.LocalName + "/" + path;
}
return path;
}
But it looks circuitous compare to:
XmlDocument doc = new XmlDocument();
doc.LoadXml(xml);
string jsonText = JsonConvert.SerializeXmlNode(doc);
But there for I have no idea how to add the path during the convertion?
But it looks circuitous compare to
Json.net uses it's own implementation of JsonConverter named XmlNodeConverter. So if you want it to look not circuitous, you can implement your own JsonConverter and use it:
var doc = XDocument.Parse(xml);
var json = JsonConvert.SerializeObject(doc, new MyXmlWithXPathJsonConverter());
This is a nice but quite complex task.
But a bit simpler way is to append your xml nodes with xpath attribute right before serialization. For example:
public void AppendXPath(XContainer container)
{
if (container == null)
throw new ArgumentNullException("container");
var doc = container as XDocument;
if (doc != null)
AppendXPath(doc.Root, "", 1);
else
AppendXPath(container as XElement, "/", 1);
}
private void AppendXPath(XElement node, string parent, int num)
{
var path = $"{parent}/{node.Name}[{num}]";
if (node.Attribute("xpath") != null)
throw new InvalidOperationException($"Node {path} already contains xpath attribute");
var indicies = new Dictionary<XName, int>();
foreach (var child in node.Elements())
{
int index;
if (indicies.TryGetValue(child.Name, out index))
indicies[child.Name] = ++index;
else
indicies[child.Name] = index = 1;
AppendXPath(child, path, index);
}
node.Add(new XAttribute("xpath", path));
}
Test:
void Test()
{
var xml =
#"<xml>
<foo>
<one />
<other />
</foo>
<bar data=""abc"">
<item order=""3"" />
<item order=""1"">
<child whatever="""" />
</item>
</bar>
</xml>";
var doc = XDocument.Parse(xml);
AppendXPath(doc);
var json = JsonConvert.SerializeObject(doc, Newtonsoft.Json.Formatting.Indented);
Console.WriteLine(json);
}
Result:
{
"xml": {
"#xpath": "/xml[1]",
"foo": {
"#xpath": "/xml[1]/foo[1]",
"one": {
"#xpath": "/xml[1]/foo[1]/one[1]"
},
"other": {
"#xpath": "/xml[1]/foo[1]/other[1]"
}
},
"bar": {
"#data": "abc",
"#xpath": "/xml[1]/bar[1]",
"item": [
{
"#order": "3",
"#xpath": "/xml[1]/bar[1]/item[1]"
},
{
"#order": "1",
"#xpath": "/xml[1]/bar[1]/item[2]",
"child": {
"#whatever": "",
"#xpath": "/xml[1]/bar[1]/item[2]/child[1]"
}
}
]
}
}
}
Related
I'm trying to convert a two dimension list of string to an html table.
I did this who does the job :
public string htmlTableFromTwoDimensionList(List<List<string>> list)
{
XDocument xDocument = new XDocument(new XElement("table"));
XElement xTable = xDocument.Element("table");
foreach(List<string> row in list)
{
XElement xRow = new XElement("tr");
foreach(string col in row)
{
if (list.First() == row) xRow.Add(new XElement("th", col));
else xRow.Add(new XElement("td", col));
}
xTable.Add(xRow);
}
return xDocument.ToString();
}
But now, i learn that the string can be some html. So i would like to parse it if it's html or use a string if it's not. I tried to do something like that, without success :
public string htmlTableFromTwoDimensionList(List<List<string>> list)
{
XDocument xDocument = new XDocument(new XElement("table"));
XElement xTable = xDocument.Element("table");
foreach(List<string> row in list)
{
XElement xRow = new XElement("tr");
foreach(string col in row)
{
XElement content;
string text = "";
// tcheck if content is html or text :
try
{
content = XElement.Parse(col);
}
catch
{
text = col;
}
if (list.First() == row) xRow.Add(new XElement("th", string.IsNullOrEmpty(text) ? content : text));
else xRow.Add(new XElement("td", string.IsNullOrEmpty(text) ? content : text));
}
xTable.Add(xRow);
}
return xDocument.ToString();
}
But I'm not even sure to use try catch in this situation. Any idea to do that properly ?
here is a solution, probably not the best, with some sample input :
class Program
{
static void Main(string[] args)
{
List<List<string>> table = new List<List<String>>{
new List<String> { "1d", "Client", "some html", "Date", "col n"},
new List<String> { "1", "Client 1","google","31/12/2022", "some content ..." },
new List<String> { "2", "Client 2","google","31/12/2022", "some content ..." },
new List<String> { "3", "Client 3","google","31/12/2022", "some content ..." },
};
Console.Write(htmlTableFromTwoDimensionList(table));
Console.Read();
}
public static string htmlTableFromTwoDimensionList(List<List<string>> list)
{
XDocument xDocument = new XDocument(new XElement("table"));
XElement xTable = xDocument.Element("table");
foreach (List<string> row in list)
{
XElement xRow = new XElement("tr");
foreach (string col in row)
{
XElement htmlCel;
if (list.First() == row) htmlCel = new XElement("th");
else htmlCel = new XElement("td");
XElement content;
try
{
content = XElement.Parse(col);
htmlCel.Add(content);
}
catch
{
htmlCel.Add(col);
}
xRow.Add(htmlCel);
}
xTable.Add(xRow);
}
return xDocument.ToString();
}
}
Firstly, I read heaps of topics about JSON to TreeView on the Stackoverflow. After this, I create a JSON data like this:
{
"Cars": {
"Audi": [{
"A6 2.0 TDI quatro 2018 Red": ["S-Line", "17 inch rim", "Full sport packet"],
"A5 1.6 TFSI 2018 Blue": ["Desing packet", "Sunroof"]
}],
"Mercedes-Benz": [{
"E220d AMG 2018 white": ["Glass ceiling", "Vacuum doors", "Navigation"],
"E220d Exclusive Black 2018 Blue": ["Power seats", "Start & Stop"]
}]
}
}
Here is the C# code content:
private void Form1_Load(object sender, EventArgs e)
{
try
{
treeView1.Nodes.Clear();
var json = File.ReadAllText(Uz.path + #"cars.json");
var obj = JObject.Parse(json);
var parent = Json2Tree(obj);
treeView1.Nodes.Add(parent);
treeView1.ExpandAll();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message, #"ERROR");
}
}
private static TreeNode Json2Tree(JObject obj)
{
//create the parent node
var parent = new TreeNode();
//loop through the obj. all token should be pair<key, value>
foreach (var token in obj)
{
//change the display Content of the parent
parent.Text = token.Key;
//create the child node
var child = new TreeNode();
child.Text = token.Key;
//check if the value is of type obj recall the method
if (token.Value.Type.ToString() == "Object")
{
// child.Text = token.Key.ToString();
//create a new JObject using the the Token.value
var o = (JObject)token.Value;
//recall the method
child = Json2Tree(o);
//add the child to the parentNode
parent.Nodes.Add(child);
}
//if type is of array
else if (token.Value.Type.ToString() == "Array")
{
int ix = -1;
// child.Text = token.Key.ToString();
//loop though the array
foreach (var itm in token.Value)
{
//check if value is an Array of objects
if (itm.Type.ToString() == "Object")
{
//child.Text = token.Key.ToString();
//call back the method
ix++;
var o = (JObject)itm;
var objTN = Json2Tree(o);
//objTN.Text = token.Key + "[" + ix + "]";
child.Nodes.Add(objTN);
//parent.Nodes.Add(child);
}
//regular array string, int, etc
else if (itm.Type.ToString() == "Array")
{
ix++;
var dataArray = new TreeNode();
foreach (var data in itm)
{
//dataArray.Text = token.Key + "[" + ix + "]";
dataArray.Nodes.Add(data.ToString());
}
child.Nodes.Add(dataArray);
}
else
{
child.Nodes.Add(itm.ToString());
}
}
parent.Nodes.Add(child);
}
else
{
//if token.Value is not nested
// child.Text = token.Key.ToString();
//change the value into N/A if value == null or an empty string
child.Nodes.Add(token.Value.ToString() == "" ? "N/A" : token.Value.ToString());
parent.Nodes.Add(child);
}
}
return parent;
}
when I run the code, the screenshot looks like this:
But marked as 1, 2 and 3 are should not be shown. It must be like this:
Although I worked 3 days, I did not succeed.
In JsonTreeView project, it show like this:
using System.Windows.Forms;
using Newtonsoft.Json.Linq;
namespace JsonTreeView
{
public static class JsonToTreeView
{
public static void Json2Tree(this TreeView treeView, string json, string group_name)
{
if (string.IsNullOrWhiteSpace(json))
{
return;
}
var obj = JObject.Parse(json);
AddObjectNodes(obj, group_name, treeView.Nodes);
}
public static void AddObjectNodes(JObject obj, string name, TreeNodeCollection parent)
{
var node = new TreeNode(name);
parent.Add(node);
foreach (var property in obj.Properties())
{
AddTokenNodes(property.Value, property.Name, node.Nodes);
}
}
private static void AddArrayNodes(JArray array, string name, TreeNodeCollection parent)
{
var node = new TreeNode(name);
parent.Add(node);
for (var i = 0; i < array.Count; i++)
{
AddTokenNodes(array[i], $"[{i}]", node.Nodes);
}
}
private static void AddTokenNodes(JToken token, string name, TreeNodeCollection parent)
{
switch (token)
{
case JValue _:
parent.Add(new TreeNode($"{((JValue) token).Value}"));
break;
case JArray _:
AddArrayNodes((JArray)token, name, parent);
break;
case JObject _:
AddObjectNodes((JObject)token, name, parent);
break;
}
}
}
}
I'm using HtmlAgilityPack. Does it have a function similar to jQuery closest? (closest parent that matches a CSS selector). I tried google and the website http://html-agility-pack.net - and both don't appear to have an answer.
As there is no built-in method currently, you can write a Extension method to achieve this.
I have written a simple extension method which can be used to find elements with tagName, ID and class names that you can use.
Anyways it can be further extended easily to match other selectors.
public static class HtmlAgilityPackExtensions
{
public static HtmlNode Closest(this HtmlNode node, string jQuerySelector)
{
if (node == null) return null;
string tagName = "", id = "";
var classes = new List<string>();
if (jQuerySelector.Contains("."))
{
var parts = jQuerySelector.Split('.');
if (!string.IsNullOrWhiteSpace(parts[0]))
{
tagName = parts[0];
}
for (int i = 1; i < parts.Length; i++)
{
classes.Add(parts[i]);
}
}
if (jQuerySelector.Contains("#"))
{
var parts = jQuerySelector.Split('#');
if (!string.IsNullOrWhiteSpace(parts[0]))
{
tagName = parts[0];
}
id = parts[1];
}
if (string.IsNullOrWhiteSpace(tagName) && string.IsNullOrWhiteSpace(id) && classes.Count == 0)
{
tagName = jQuerySelector;
}
HtmlNode closestParent = null;
while (node.ParentNode != null && closestParent == null)
{
var isClosest = true;
node = node.ParentNode;
if (!string.IsNullOrWhiteSpace(tagName))
{
isClosest = node.Name == tagName;
}
if (isClosest && !string.IsNullOrWhiteSpace(id))
{
isClosest = node.Id == id;
}
if (isClosest && classes.Count > 0)
{
var classNames = node.GetAttributeValue("class", "");
if (!string.IsNullOrWhiteSpace(classNames))
{
foreach (string c in classes)
{
isClosest = classNames.Contains(c);
if (!isClosest) break;
}
}
}
if (isClosest)
{
closestParent = node;
}
}
return closestParent;
}
}
Test Code
var html = "<div><div id='parent1' class='parent'><span id='parent2' class='parent'><div id='parent3' class='parent'><div id='TestNode' class='child'>Test node</div></div></span></div></div>";
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var testNode1 = htmlDoc.DocumentNode.SelectSingleNode("//div[#id='TestNode']");
if (testNode1 != null)
{
var parent1 = testNode1.Closest(".parent");
var parent2 = testNode1.Closest("#parent1");
var parent3 = testNode1.Closest("span.parent");
var nonExistingParent = testNode1.Closest("span.parent1");
}
I needed the same thing, but couldn't find any, so I wrote my own Closest function:
public static HtmlNode Closest(this HtmlNode node, string search)
{
search = search.ToLower();
while (node.ParentNode != null)
{
if (node.ParentNode.Name.ToLower() == search) return node.ParentNode;
node = node.ParentNode;
}
return null;
}
This one only works for tag names (as I needed) you can extend it to classes, attributes, and ...
Consider the following XML which I have to parse.
<root>
<item>
<itemId>001</itemId>
<itemName>test 1</itemName>
<description/>
</item>
</root>
I have to parse each of its tag and store it into a table as follows:
TAG_NAME TAG_VALUE IsContainer
------------ -------------- -----------
root null true
item null true
itemId 001 false
itemName test 1 false
description null false
/item null true
/root null true
Now to get this done, I am using XmlReader as this allows us to parse each & every node.
I am doing it as follows:
I created the following class to contain each tag's data
public class XmlTag
{
public string XML_TAG { get; set; }
public string XML_VALUE { get; set; }
public bool IsContainer { get; set; }
}
I am trying to get the list of tags(including closing ones) as follows:
private static List<XmlTag> ParseXml(string path)
{
var tags = new List<XmlTag>();
using (var reader = XmlReader.Create(path))
{
while (reader.Read())
{
var tag = new XmlTag();
bool shouldAdd = false;
switch (reader.NodeType)
{
case XmlNodeType.Element:
shouldAdd = true;
tag.XML_TAG = reader.Name;
//How do I get the VALUE of current reader?
//How do I determine if the current node contains children nodes to set IsContainer property of XmlTag object?
break;
case XmlNodeType.EndElement:
shouldAdd = true;
tag.XML_TAG = string.Format("/{0}", reader.Name);
tag.XML_VALUE = null;
//How do I determine if the current closing node belongs to a node which had children.. like ROOT or ITEM in above example?
break;
}
if(shouldAdd)
tags.Add(tag);
}
}
return tags;
}
but I am having difficulty determining the following:
How to determine if current ELEMENT contains children XML nodes? To set IsContainer property.
How to get the value of current node value if it is of type XmlNodeType.Element
Edit:
I have tried to use LINQ to XML as follows:
var xdoc = XDocument.Load(#"SampleItem.xml");
var tags = (from t in xdoc.Descendants()
select new XmlTag
{
XML_TAG = t.Name.ToString(),
ML_VALUE = t.HasElements ? null : t.Value,
IsContainer = t.HasElements
}).ToList();
This gives me the XML tags and their values but this does not give me ALL the tags including the closing ones. That's why I decided to try XmlReader. But If I have missed anything in LINQ to XML example, please correct me.
First of all, as noted by Jon Skeet in the comments you should probably consider using other tools, like XmlDocument possibly with LINQ to XML (EDIT: an example with XmlDocument follows).
Having said that, here is the simplest solution for what you have currently (note that it's not the cleanest possible code, and it doesn't have much validation):
private static List<XmlTag> ParseElement(XmlReader reader, XmlTag element)
{
var result = new List<XmlTag>() { element };
while (reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
element.IsContainer = true;
var newTag = new XmlTag() { XML_TAG = reader.Name };
if (reader.IsEmptyElement)
{
result.Add(newTag);
}
else
{
result.AddRange(ParseElement(reader, newTag));
}
break;
case XmlNodeType.Text:
element.XML_VALUE = reader.Value;
break;
case XmlNodeType.EndElement:
if (reader.Name == element.XML_TAG)
{
result.Add(new XmlTag()
{
XML_TAG = string.Format("/{0}", reader.Name),
IsContainer = element.IsContainer
});
}
return result;
}
}
return result;
}
private static List<XmlTag> ParseXml(string path)
{
var result = new List<XmlTag>();
using (var reader = XmlReader.Create(path))
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
result.AddRange(ParseElement(
reader,
new XmlTag() { XML_TAG = reader.Name }));
}
else if (reader.NodeType == XmlNodeType.EndElement)
{
result.Add(new XmlTag()
{
XML_TAG = string.Format("/{0}",current.Name)
});
}
}
}
return result;
}
An example using XmlDocument. This will give slightly different result for self-enclosing tags (<description/> in your case). You can change this behaviour easily, depending on what you want.
private static IEnumerable<XmlTag> ProcessElement(XElement current)
{
if (current.HasElements)
{
yield return new XmlTag()
{
XML_TAG = current.Name.ToString(),
IsContainer = true
};
foreach (var tag in current
.Elements()
.SelectMany(e => ProcessElement(e)))
{
yield return tag;
}
yield return new XmlTag()
{
XML_TAG = string.Format("/{0}", current.Name.ToString()),
IsContainer = true
};
}
else
{
yield return new XmlTag()
{
XML_TAG = current.Name.ToString(),
XML_VALUE = current.Value
};
yield return new XmlTag()
{
XML_TAG = string.Format("/{0}",current.Name.ToString())
};
}
}
And using it:
var xdoc = XDocument.Load(#"test.xml");
var tags = ProcessElement(xdoc.Root).ToList();
I have the following code:
static void Main(string[] args)
{
XmlDocument xml = new XmlDocument();
xml.Load(#"C:\MR.xml");
XmlNodeList stations = xml.SelectNodes("//FileDump/Message/Attachment");
var Message_ID = xml.SelectSingleNode("//FileDump/Message/MsgID").InnerXml;
Console.WriteLine("Message ID is :{0}", Message_ID);
foreach (XmlNode station in stations)
{
var File_Name = station.SelectSingleNode("FileName").InnerXml;
var File_ID = station.SelectSingleNode("FileID").InnerXml;
}
}
FileID and FileName do not always exist in some files. How can I avoid NullReferenceExceptions in this case?
I would try to something like this if that check has to happen in lot of places and to keep the code simple and clear
public static class Helpers
{
public static string GetInnerXml(this XmlNode node, string innerNodeName)
{
string innerXml = "";
XmlNode innerNode = node.SelectSingleNode(innerNodeName);
if (innerNode != null)
{
innerXml = innerNode.InnerXml;
}
return innerXml;
}
}
and use it like this
static void Main(string[] args)
{
XmlDocument xml = new XmlDocument();
xml.Load(#"C:\MR.xml");
XmlNodeList stations = xml.SelectNodes("//FileDump/Message/Attachment");
var Message_ID = xml.GetInnerXml("//FileDump/Message/MsgID");
Console.WriteLine("Message ID is :{0}", Message_ID);
foreach (XmlNode station in stations)
{
var File_Name = station.GetInnerXml("FileName");
var File_ID = station.GetInnerXml("FileID");
}
}
You could do something like:
string FileName= "";
string File_ID = "";
if (station.SelectSingleNode("FileName") != null)
File_Name = station.SelectSingleNode("FileName").InnerXml;
if (station.SelectSingleNode("FileID") != null)
File_ID = station.SelectSingleNode("FileID").InnerXml;
And continue processing if the vars are not the empty string ... ("") ...
static void Main(string[] args)
{
XmlDocument xml = new XmlDocument();
xml.Load(#"C:\MR.xml");
XmlNodeList stations = xml.SelectNodes("//FileDump/Message/Attachment");
var Message_ID = xml.SelectSingleNode("//FileDump/Message/MsgID").InnerXml;
Console.WriteLine("Message ID is :{0}", Message_ID);
foreach (XmlNode station in stations)
{
var fileNameNode = station.SelectSingleNode("FileName");
var fileIdNode = station.SelectSingleNode("FileID");
var File_Name = fileNameNode == null ? (string)null : fileNameNode.InnerXml;
var File_ID = fileIdNode == null ? (string)null : fileIdNode.InnerXml;;
}
}
I usually use extension methods for handling unexpected nulls.
public static string GetValueIfNotNull(this XmlAttribute xmlAttribute)
{
if (xmlAttribute == null)
{
return null;
}
return xmlAttribute.Value;
}
Then I can do myElement.Attribute("someAttribute").GetValueIfNotNull();