I am trying to build a data pipeline in .NET. I have been given an xsd and have used the XML Schema Definition Tool to generate C# classes that represent the object model. In order to load this data into my data store I need to transform the data coming out of the XML into structure that matches my application schema and collect/dedupe elements. To do this I have a parser class that will read a file and load the contents into local collections which will then be loaded into my database. I see two options to do this -
Manually loop through the XML with an XmlReader and pull out the data I need, loading it into the local collections in stream. This is not desirable because it does not take advantage of the strongly typed/strict xsd that I was given and requires a lot of hard coding things like while (reader.Read()), check for specific XML nodes, and then `reader.GetAttribute("HardCodedString").
Use XmlSerializer to deserialize the whole file at once and then loop through the deserialized collections and insert into my local collections. This is not desirable because the files could be very large and this method forces me to loop through all of the data twice (once to deserialize and once to extract the data to my local collections).
Ideally I would like some way to register a delegate to be executed as each object is deserialized to insert into my local collections. Is there something in the framework that allows me to do this? Requirements are as follows:
Performant - Only loop through the data once.
Functional - Data is inserted into the local collections during deserialization.
Maintainable - Utilize strongly typed classes that were generated via the xsd.
I have created a minimal example to illustrate my point.
Example XML File:
<Hierarchy xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.example.com/example">
<Children>
<Child ChildId="1" ChildName="First">
<Parents>
<Parent ParentId="1" ParentName="First" RelationshipStart="1900-01-01T00:00:00"/>
<Parent ParentId="2" ParentName="Second" RelationshipStart="2000-01-01T00:00:00"/>
</Parents>
</Child>
<Child ChildId="2" ChildName="Second">
<Parents>
<Parent ParentId="2" ParentName="Second" RelationshipStart="1900-01-01T00:00:00"/>
<Parent ParentId="3" ParentName="Third" RelationshipStart="2000-01-01T00:00:00"/>
</Parents>
</Child>
</Children>
</Hierarchy>
Local collections I am trying to load:
public Dictionary<int, string> Parents { get; }
public Dictionary<int, string> Children { get; }
public List<Relationship> Relationships { get; }
Manual version (not maintainable and doesn't use xsd):
public void ParseFileManually(string fileName)
{
using (var reader = XmlReader.Create(fileName))
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "Hierarchy")
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "Child")
{
int childId = int.Parse(reader.GetAttribute("ChildId"));
string childName = reader.GetAttribute("ChildName");
Children[childId] = childName;
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "Parent")
{
int parentId = int.Parse(reader.GetAttribute("ParentId"));
string parentName = reader.GetAttribute("ParentName");
DateTime relationshipStart = DateTime.Parse(reader.GetAttribute("RelationshipStart"));
Parents[parentId] = parentName;
Relationships.Add(
new Relationship{
ParentId = parentId,
ChildId = childId,
Start = relationshipStart
});
}
else if (reader.NodeType == XmlNodeType.EndElement && reader.Name == "Child")
{
break;
}
}
}
}
}
}
}
}
Deserialize version (loops through the data twice):
public void ParseFileWithDeserialize(string fileName)
{
var serializer = new XmlSerializer(typeof(Hierarchy));
using (var fileStream = new FileStream(fileName, FileMode.Open))
{
var fileData = (Hierarchy) serializer.Deserialize(fileStream);
foreach (var child in fileData.Children)
{
Children[child.ChildId] = child.ChildName;
foreach (var parent in child.Parents)
{
Parents[parent.ParentId] = parent.ParentName;
Relationships.Add(
new Relationship
{
ParentId = parent.ParentId,
ChildId = child.ChildId,
Start = parent.RelationshipStart
});
}
}
}
}
You should use some annotations to get the data from the correct field in the XML, if you use these definitions;
public class Hierarchy
{
public Hierarchy()
{
Children = new List<Child>();
}
public List<Child> Children { get; set; }
}
public class Child
{
public Child()
{
Parents = new List<Parent>();
}
[XmlAttribute("ChildId")]
public int ChildId { get; set; }
[XmlAttribute("ChildName")]
public string ChildName { get; set; }
public List<Parent> Parents { get; set; }
}
public class Parent
{
[XmlAttribute("ParentId")]
public int ParentId { get; set; }
[XmlAttribute("ParentName")]
public string ParentName { get; set; }
[XmlAttribute("RelationshipStart")]
public DateTime RelationshipStart { get; set; }
}
Then you should be able to simplify your code to;
public static Hierarchy Deserialize(string fileName)
{
using (var fileStream = new StreamReader(fileName, Encoding.UTF8))
{
XmlSerializer ser = new XmlSerializer(typeof(Hierarchy));
return (Hierarchy)ser.Deserialize(fileStream);
}
}
To test it out you can create a sample data set and serialize it to a file, then use the above code to read it back
public static void Serialize(Hierarchy h, string fileName)
{
System.Xml.Serialization.XmlSerializer ser = new System.Xml.Serialization.XmlSerializer(typeof(Hierarchy));
StreamWriter sw = new StreamWriter(fileName, false, Encoding.UTF8);
ser.Serialize(sw, h);
}
Test Code
static void Test()
{
Hierarchy h = new Hierarchy();
Parent p1 = new Parent() { ParentId = 1, ParentName = "First", RelationshipStart = DateTime.Now };
Parent p2 = new Parent() { ParentId = 2, ParentName = "Second", RelationshipStart = DateTime.Now };
Parent p3 = new Parent() { ParentId = 3, ParentName = "Third", RelationshipStart = DateTime.Now };
Child c1 = new Child() { ChildId = 1, ChildName = "First" };
c1.Parents.Add(p1);
c1.Parents.Add(p2);
Child c2 = new Child() { ChildId = 2, ChildName = "Second" };
c2.Parents.Add(p2);
c2.Parents.Add(p3);
h.Children.Add(c1);
h.Children.Add(c2);
Serialize(h, AppContext.BaseDirectory + "Text.xml");
Hierarchy hReadBack = Deserialize(AppContext.BaseDirectory + "Text.xml");
}
Edit : To answer your question
Use these classes
public class Hierarchy
{
public Hierarchy()
{
Children = new List<Child>();
}
public List<Child> Children { get; set; }
private Dictionary<int, string> _parents;
private Dictionary<int, string> _childrenList;
private List<Relationship> _relationships;
private void CalcuateLists()
{
_parents = new Dictionary<int, string>();
_childrenList = new Dictionary<int, string>();
_relationships = new List<Relationship>();
foreach (Child c in this.Children)
{
if (!_childrenList.ContainsKey(c.ChildId))
{
_childrenList.Add(c.ChildId, c.ChildName);
}
foreach (Parent p in c.Parents)
{
if (!_parents.ContainsKey(p.ParentId))
{
_parents.Add(p.ParentId, p.ParentName);
}
if (_relationships.FirstOrDefault(dat => dat.ParentId == p.ParentId && dat.ChildId == c.ChildId) == null)
{
_relationships.Add(new Relationship() { ChildId = c.ChildId, ParentId = p.ParentId, Start = p.RelationshipStart });
}
}
}
}
public Dictionary<int, string> Parents {
get
{
if (_parents == null)
CalcuateLists();
return _parents;
}
}
public Dictionary<int, string> ChildrenList {
get
{
if (_childrenList == null)
CalcuateLists();
return _childrenList;
}
}
public List<Relationship> Relationships {
get
{
if (_relationships == null)
CalcuateLists();
return _relationships;
}
}
}
public class Child
{
public Child()
{
Parents = new List<Parent>();
}
[XmlAttribute("ChildId")]
public int ChildId { get; set; }
[XmlAttribute("ChildName")]
public string ChildName { get; set; }
public List<Parent> Parents { get; set; }
}
public class Parent
{
[XmlAttribute("ParentId")]
public int ParentId { get; set; }
[XmlAttribute("ParentName")]
public string ParentName { get; set; }
[XmlAttribute("RelationshipStart")]
public DateTime RelationshipStart { get; set; }
}
Then your test code becomes
public static void Test()
{
Hierarchy h = new Hierarchy();
Parent p1 = new Parent() { ParentId = 1, ParentName = "First", RelationshipStart = DateTime.Now };
Parent p2 = new Parent() { ParentId = 2, ParentName = "Second", RelationshipStart = DateTime.Now };
Parent p3 = new Parent() { ParentId = 3, ParentName = "Third", RelationshipStart = DateTime.Now };
Child c1 = new Child() { ChildId = 1, ChildName = "First" };
c1.Parents.Add(p1);
c1.Parents.Add(p2);
Child c2 = new Child() { ChildId = 2, ChildName = "Second" };
c2.Parents.Add(p2);
c2.Parents.Add(p3);
h.Children.Add(c1);
h.Children.Add(c2);
Serialize(h, AppContext.BaseDirectory + "Text.xml");
Hierarchy hReadBack = Deserialize(AppContext.BaseDirectory + "Text.xml");
Dictionary<int, string> Parents = hReadBack.Parents;
Dictionary<int, string> Children = hReadBack.ChildrenList;
List<Relationship> Relationships = hReadBack.Relationships;
}
EDIT
To get the results directly without looping
You will need this class
public class Relationship
{
public int ParentId { get; set; }
public int ChildId { get; set; }
public DateTime Start { get; set; }
}
And this selection
// Get a list of child ids and names
Dictionary<int, string> Children = (from c in hReadBack.Children select new { ChildId = c.ChildId, Name = c.ChildName}).ToDictionary(dat => dat.ChildId, dat => dat.Name);
// Get a parent ids and names
Dictionary<int, string> Parents = (from p in hReadBack.Children.SelectMany(i => i.Parents) select new { ParentId = p.ParentId, Name = p.ParentName }).Distinct().ToDictionary(dat => dat.ParentId, dat => dat.Name);
// Get the relationships
List<Relationship> Relationship = (from Child c in hReadBack.Children from Parent p in c.Parents select new Relationship() { ChildId = c.ChildId, ParentId = p.ParentId, Start = p.RelationshipStart }).ToList();
Related
For some reason such as performance, I have to use HiarachyId in my database. I have to convert the HierarchyId data type to JSON to show up in FancyTree.
I Use the solution here but won't work. My code was
static void Main(string[] args)
{
{
var dd = new List<Field>();
dd.Add(new Field(1, "Earth", HierarchyId.Parse("/")));
dd.Add(new Field(2, "Europe", HierarchyId.Parse("/1/")));
dd.Add(new Field(3, "South America", HierarchyId.Parse("/2/")));
dd.Add(new Field(4, "Antarctica", HierarchyId.Parse("/3/")));
dd.Add(new Field(5, "Brazil", HierarchyId.Parse("/2/1/")));
dd.Add(new Field(6, "France", HierarchyId.Parse("/1/1/")));
dd.Add(new Field(7, "Germany", HierarchyId.Parse("/1/4/")));
dd.Add(new Field(8, "test", HierarchyId.Parse("/1/5/")));
dd.Add(new Field(9, "McMurdo Station", HierarchyId.Parse("/3/1/")));
dd.Add(new Field(10, "Italy", HierarchyId.Parse("/1/3/")));
dd.Add(new Field(11, "Spain", HierarchyId.Parse("/1/2/")));
dd.Add(new Field(12, "Morano", HierarchyId.Parse("/1/3/1/")));
dd.Add(new Field(13, "Rio de Janeiro", HierarchyId.Parse("/2/1/3/")));
dd.Add(new Field(14, "Paris", HierarchyId.Parse("/1/1/1/")));
dd.Add(new Field(15, "Madrid", HierarchyId.Parse("/1/2/1/")));
dd.Add(new Field(16, "Brasilia", HierarchyId.Parse("/2/1/1/")));
dd.Add(new Field(17, "Bahia", HierarchyId.Parse("/2/1/2/")));
dd.Add(new Field(18, "Salvador", HierarchyId.Parse("/2/1/2/1/")));
dd.Add(new Field(19, "tets1", HierarchyId.Parse("/2/1/3/1/")));
dd.Add(new Field(20, "test2", HierarchyId.Parse("/2/1/3/1/1/")));
dd.Add(new Field(21, "test3", HierarchyId.Parse("/2/1/3/1/1/1/")));
dd.Add(new Field(22, "test24", HierarchyId.Parse("/2/1/3/1/1/2/")));
MyClass clss = new MyClass();
var x= clss.NewMthodTest(dd);
}
}
Method to get child:
public class MyClass
{
public List<HierarchicalNode> NewMthodTest(List<Field> query)
{
var root = new HierarchicalNode("Root", 0);
foreach (var rec in query)
{
var current = root;
foreach (string part in rec.Node.ToString().Split(new[] { '/' },
StringSplitOptions.RemoveEmptyEntries))
{
int parsedPart = int.Parse(part);
current = current.Children[parsedPart - 1];
}
current.Children.Add(new HierarchicalNode(rec.FieldName, rec.Id));
}
return null; // in this method i don't know what do we suppose to return
}
}
and my input parameter class is :
public class Field
{
public Field(long id, string fieldName, HierarchyId node)
{
Id = id;
FieldName = fieldName;
Node = node;
}
public long Id { get; set; }
public string FieldName { get; set; }
public HierarchyId Node { get; set; }
}
and output class is
class HierarchicalNode
{
private readonly List<HierarchicalNode> children =
new List<HierarchicalNode>();
public List<HierarchicalNode> Children { get { return children; } }
private readonly string name;
public string Name { get { return name; } }
private readonly long id;
public long Id { get { return id; } }
public HierarchicalNode(string name, long id)
{
this.name = name;
this.id = id;
}
}
it seems something wrong and it returns this:
One of the benefits of using HierarchyId is so that you can build a tree without doing recursive calls.
I would also name things a big differently. Let's say you call your database table Nodes. Here is the table structure:
CREATE TABLE dbo.Nodes
(
[NodeId] [int] NOT NULL,
[NodeName] [nvarchar](100) NOT NULL,
[HierarchyId] [hierarchyid] NOT NULL,
[Level] AS [HierarchyId].GetLevel() PERSISTED,
CONSTRAINT Primary_Key_Nodes PRIMARY KEY CLUSTERED ([NodeId])
);
CREATE UNIQUE NONCLUSTERED INDEX
[Nodes_1]
ON
[dbo].[Nodes] ([HierarchyId] ASC);
--Important - put level as the first column to index
CREATE UNIQUE NONCLUSTERED INDEX
[Nodes_2]
ON
[dbo].[Nodes] ([Level] ASC, [HierarchyId] ASC);
Here is the SQL to return nodes for a given parent. I would wrap this up in a function called GetDescendantsAndSelf():
SELECT
[NodeId]
,[NodeName]
,[HierarchyId].ToString() AS 'HierarchyPath'
FROM
[dbo].[Nodes]
WHERE
[HierarchyId].IsDescendantOf(#parentHierarchyId) = 1
ORDER BY
[Level] ASC
,[NodeName] ASC;
My data transfer object could look like this:
public class TreeNode
{
public string Text { get; set; } = String.Empty;
public List<TreeNode> Nodes { get; set; } = new List<TreeNode>();
}
GetDescendantsAndSelf() should return a list of Node data access objects like this one:
public class Node
{
public int NodeId { get; set; }
public string NodeName { get; set; } = String.Empty;
public SqlHierarchyId HierarchyId { get; set; }
public int Level => HierarchyId.GetLevel().ToSqlInt32().Value;
}
Here is the code to build a tree:
TreeNode? rootNode = null;
Dictionary<string, TreeNode> treeBuilder = new Dictionary<string, TreeNode>();
string parentHierarchyId = "/1/2/3";
var nodes = GetDescendantsAndSelf(parentHierarchyId);
foreach (Node node in nodes)
{
TreeNode currentNode = new TreeNode() { Text = node.NodeName };
treeBuilder[node.HierarchyId.ToString()] = currentNode;
if (node.Level == 1)
{
rootNode = currentNode;
}
else
{
string parentKey = node.HierarchyId.GetAncestor(1).ToString();
treeBuilder[parentKey].Nodes.Add(currentNode);
}
}
if (rootNode is {})
{
//rootNode contains your tree structure
}
else
{
//no data found for parentHierarchyId
}
After a lot of search On the net with no result, I solve this by myself with a recursive method in c#.
I put my total code here to help people who search in the future for this question. If someone has any nice advice to make it better please leave a note and make me happy.
This is my code:
This is My Main method which calls GetTreeMethod
public List<TreeView> GetTree()
{
//get all nodes from DB to make a collection in RAM
var nodesColect = _getFieldsList.GetFieldsByNode("/");
var x = GetTreeMethod("/", nodesColect);
return x;
}
This is my main recursive method
private List<TreeView> GetTreeMethod(string nodeStr,List<FieldListDto> lstCollection)
{
List<TreeView> lst = new List<TreeView>();
HierarchyId node = HierarchyId.Parse(nodeStr);
var lastItemInCurrentLevel = GetChilds(node, lstCollection);
foreach (var item in lastItemInCurrentLevel)
{
TreeView tr = new TreeView
{
title = item.title,
id = item.id,
node = item.node,
fin = item.fin,
};
tr.children = GetTreeMethod(item.node.ToString(), lstCollection);
lst.Add(tr);
}
return lst;
}
this just gives children of a specific node
private List<TreeView> GetChilds(HierarchyId node, List<FieldListDto> lstCollection)
{
List<TreeView> child = lstCollection.Where(x => x.Node.ToString() != "/" && x.Node.GetAncestor(1).ToString() == node.ToString()).Select(q => new TreeView { id = q.Id, node = q.Node, title = q.FieldName }).ToList();
return child;
}
Models
public class FieldListDto
{
public long id { get; set; }
public string FieldName { get; set; }
public HierarchyId Node { get; set; }
}
public class TreeView
{
public long id { get; set; }
public string title { get; set; }
public HierarchyId node { get; set; }
public List<TreeView> children { get; set; }
}
here my SQL data
and here my final result
I have a table in a database which looks like this:
| id | parentID | name |
|----------+----------+-------------|
|ABCD-12345| | Top |
|----------+----------+-------------|
|ABCD-23456|ABCD-12345| Middle |
|----------+----------+-------------|
|ABCD-34567|ABCD-23456| Bottom |
|----------+----------+-------------|
|ABCD-45678|ABCD-23456| Bottom |
etc. - Basically, a hierarchical structure of N depth. I've taken this and shoved it into a datatable.
I have the following class built to hold this data:
public class TreeNode
{
public string id { get; set; }
public string name { get; set; }
public string parentID { get; set; }
public List<TreeNode> children { get; set; }
}
My goal is to go through each of these DataTable rows and insert them into the appropriate location in the TreeNode structure, but I'm super confused as to how I should approach this.
The main point of confusion for me is how I search through the entire existing structure of TreeNodes to see if a node with the parentID exists. Can anyone point me in the right direction?
I tried the following code, but it doesn't work:
public List<TreeNode> BuildTree(int currNode, List<TreeNode> treeList, DataTable dt)
{
foreach(DataRow row in dt.Rows)
{
if(row[1].ToString() == treeList[currNode].id)
{
treeList[currNode].children.Add(new TreeNode
{
id = row[0].ToString(),
name = row[2].ToString(),
parentID = row[1].ToString()
});
dt.Rows.Remove(row);
if(dt.Rows.Count > 0)
{
currNode++;
BuildTree(currNode, treeList, dt);
}
else
{
return treeList;
}
}
}
return null;
}
The problem is this line:
if(row[1].ToString() == treeList[currNode].id)
which gets an out of range exception, because I have a root at index 0, so on the second run (when currNode is 1), it breaks. I need to traverse to treeList[0].Children[int], followed by treeList[0].Children[int].Children[int] and so on and so forth.
So how do I accomplish this goal?
First I'm going to modify the TreeNode class for our convenience. It's not necessary, but just a nice to have. Also I'm going to assume that in your datatable you've done your error checking and there's only one node with ParentId = "".
public class TreeNode
{
public string Id { get; set; }
public string Name { get; set; }
public string ParentID { get; set; }
public List<TreeNode> Children { get; set; }
public TreeNode()
{
Id = Name = ParentID = string.Empty;
Children = new List<TreeNode>();
}
public bool IsRoot { get { return ParentID == string.Empty; } }
public bool IsChild { get { return Children == null || Children.Count == 0; } }
}
First, I'd convert your datatable data into a list of TreeNode objects. Forget about relationships, just create a list with each objects Children being empty. I wrote a method to simulate data retrival from datatable. Instead of that you can use your actual datatable.
static List<DataTableData> GetDataTableData()
{
var data = new List<DataTableData>
{
new DataTableData() { Id = "23456", ParentID = "12345", Name = "Middle" },
new DataTableData() { Id = "55555", ParentID = "12345", Name = "Middle" },
new DataTableData() { Id = "34567", ParentID = "23456", Name = "Bottom" },
new DataTableData() { Id = "12345", ParentID = string.Empty, Name = "Top" },
new DataTableData() { Id = "45678", ParentID = "23456", Name = "Bottom" },
new DataTableData() { Id = "66666", ParentID = "55555", Name = "Bottom" }
};
return data;
}
And this is what your Main() would look like:
static void Main(string[] args)
{
var treeNodes = new List<TreeNode>();
var dataTable = GetDataTableData();
foreach (var data in dataTable)
{
treeNodes.Add(new TreeNode() { Id = data.Id, Name = data.Name, ParentID = data.ParentID });
}
var root = BuildTree(treeNodes);
Console.ReadLine();
}
Now, in my BuildTree() method, instead of passing the datatable I can pass my TreeNode list, and return just the root node.
public static TreeNode BuildTree(List<TreeNode> nodes)
{
foreach (var node in nodes)
{
node.Children = nodes.Where(x => x.ParentID == node.Id).ToList();
}
return nodes.Find(x => x.IsRoot);
}
BuildTree() Breakdown
The nodes list already have all the nodes corresponding to data in your datatable. The BuildTree() is merely going to create the parent-child relations and fill in each object's Children list.
So I iterate through the list, and see what other elements in the list are supposed to be its children. When you have iterated through the list you'd created all the parent-child relationships. Finally, I pick the root node (the one who's ParentId is empty) and return it.
Edit
Here's an easy method to print and verify your tree.
static void PrintTree(TreeNode node, int indents)
{
for (int tab = 0; tab < indents; tab++)
{
Console.Write("\t");
}
Console.WriteLine("{0} - {1}", node.Id, node.Name);
if (node.Children != null && node.Children.Count > 0)
{
indents++;
foreach (var child in node.Children)
{
PrintTree(child, indents);
}
}
}
My output looks like this:
If you are building a class structure then you need a class with a recursive method. Not sure how efficient this will be if it gets too big. Execute the method from the top of the tree.
public class TreeNode
{
public string id { get; set; }
public string name { get; set; }
public string parentID { get; set; }
public List<TreeNode> children { get; set; }
public TreeNode() {
children = new List<TreeNode>();
}
public TreeNode FindParentWithID(string ID)
{
TreeNode ParentWithID = null;
//check my parentID if i am the one being looked for then return
if (id == ID) return this;
//search children
foreach (TreeNode treeNode in children)
{
ParentWithID = treeNode.FindParentWithID(ID);
if (ParentWithID != null)
{
break;
}
}
return ParentWithID;
}
}
You would load your data into the classes from the database. I had to hard code the values for the example to work:
TreeNode treeNode5 = new TreeNode() { id = "ABCD-12345", parentID = null, name = "Top" };
TreeNode treeNode6 = new TreeNode() { id = "ABCD-12346", parentID = "ABCD-12345", name = "Middle" };
treeNode5.children.Add(treeNode6);
TreeNode treeNode7 = new TreeNode() { id = "ABCD-12347", parentID = "ABCD-12346", name = "Bottom" };
TreeNode treeNode8 = new TreeNode() { id = "ABCD-12348", parentID = "ABCD-12346", name = "Bottom" };
treeNode6.children.Add(treeNode7);
treeNode6.children.Add(treeNode8);
TreeNode topOne = treeNode5.FindParentWithID("ABCD-12346");
topOne will be end up being treeNode6 name="Middle" in this example.
try this code, i have same issue and it works perfectly
this method used to build tree from list of items, by looping through all items, and add each item to its parent's child list. and return only the root item with its nested child.
public TreeNode BuildTree(List<TreeNode> source)
{
// build the children list for each item
foreach (var item in source)
{
var itm = source.Where(i => i.parentID == item.Id).ToList();
item.ChildItems = itm;
}
// return only the root parents with its child inside
return source.Where(i => i.parentID == null).FirstOrDefault();
}
noting that this method return only on TreeNode Object with its child, you can return List by changing .FirstOrDefault() to .ToList() in return line
I am using EPPlus to help me export data as excel. I am still learning to export data properly but somehow am stuck at a point where i am not able to export an object with child objects all flatted out.
ParentObject
public string A;
public string B;
public ChildObject ChildObject;
ChildObject
public string C;
public string D;
so i want my exported excel to look like
A B C D
aa1 bb1 cc1 dd1
aa2 bb2 cc2 dd2
aa3 bb3 cc3 dd3
This is how my current implementation looks like
public void CreateExcel(IEnumerable<T> dataCollection, string fullyQualifiedFileName, string worksheetName)
{
using (var package = new ExcelPackage(new FileInfo(fullyQualifiedFileName)))
{
var worksheet =
package.Workbook.Worksheets.FirstOrDefault(excelWorksheet => excelWorksheet.Name == worksheetName) ??
package.Workbook.Worksheets.Add(worksheetName);
var membersToInclude = typeof(T)
.GetMembers(BindingFlags.Instance | BindingFlags.Public)
.Where(p => Attribute.IsDefined(p, typeof(ExcelUtilityIgnoreAttribute)) == false
|| p.GetCustomAttribute<ExcelUtilityIgnoreAttribute>().IsIgnored == false)
.ToArray();
worksheet.Cells["A1"].LoadFromCollection(dataCollection, true, OfficeOpenXml.Table.TableStyles.None,
BindingFlags.Public, membersToInclude);
package.Save();
}
}
I tried using Microsoft generics using expando object but EPPlus wont work with generics, is there a way where in i can export objects with child objects ?
also: is there any other library that i could use ?
There is no native function that could do that. Hard to come up with something generic as it would require a great deal of assumption. What property type should be automatically exported vs what should be treated a child node and have ITS properties exported or expanded. But if you come up with that it is a basic tree traversal from there.
Below is something I adapted from a similar task. Here, I assume that anything that is a either a string or a data type without properties is considered an value type for exporting (int, double, etc.). But it is very easy to tweak as needed. I threw this together so it may not be fully optimized:
public static void ExportFlatExcel<T>(IEnumerable<T> dataCollection, FileInfo file, string worksheetName)
{
using (var package = new ExcelPackage(file))
{
var worksheet =
package.Workbook.Worksheets.FirstOrDefault(excelWorksheet => excelWorksheet.Name == worksheetName) ??
package.Workbook.Worksheets.Add(worksheetName);
const BindingFlags flags = BindingFlags.Instance | BindingFlags.Public;
var props = typeof (T).GetProperties(flags);
//Map the properties to types
var rootTree = new Branch<PropertyInfo>(null);
var stack = new Stack<KeyValuePair<PropertyInfo, IBranch<PropertyInfo>>>(
props
.Reverse()
.Select(pi =>
new KeyValuePair<PropertyInfo, IBranch<PropertyInfo>>(
pi
, rootTree
)
)
);
//Do a non-recursive traversal of the properties
while (stack.Any())
{
var node = stack.Pop();
var prop = node.Key;
var branch = node.Value;
//Print strings
if (prop.PropertyType == typeof (string))
{
branch.AddNode(new Leaf<PropertyInfo>(prop));
continue;
}
//Values type do not have properties
var childProps = prop.PropertyType.GetProperties(flags);
if (!childProps.Any())
{
branch.AddNode(new Leaf<PropertyInfo>(prop));
continue;
}
//Add children to stack
var child = new Branch<PropertyInfo>(prop);
branch.AddNode(child);
childProps
.Reverse()
.ToList()
.ForEach(pi => stack
.Push(new KeyValuePair<PropertyInfo, IBranch<PropertyInfo>>(
pi
, child
)
)
);
}
//Go through the data
var rows = dataCollection.ToList();
for (var r = 0; r < rows.Count; r++)
{
var currRow = rows[r];
var col = 0;
foreach (var child in rootTree.Children)
{
var nodestack = new Stack<Tuple<INode, object>>();
nodestack.Push(new Tuple<INode, object>(child, currRow));
while (nodestack.Any())
{
var tuple = nodestack.Pop();
var node = tuple.Item1;
var currobj = tuple.Item2;
var branch = node as IBranch<PropertyInfo>;
if (branch != null)
{
currobj = branch.Data.GetValue(currobj, null);
branch
.Children
.Reverse()
.ToList()
.ForEach(cnode => nodestack.Push(
new Tuple<INode, object>(cnode, currobj)
));
continue;
}
var leaf = node as ILeaf<PropertyInfo>;
if (leaf == null)
continue;
worksheet.Cells[r + 2, ++col].Value = leaf.Data.GetValue(currobj, null);
if (r == 0)
worksheet.Cells[r + 1, col].Value = leaf.Data.Name;
}
}
}
package.Save();
package.Dispose();
}
}
So say you have these as a structure:
#region Classes
public class Parent
{
public string A { get; set; }
public Child1 Child1 { get; set; }
public string D { get; set; }
public int E { get; set; }
public Child2 Child2 { get; set; }
}
public class Child1
{
public string B { get; set; }
public string C { get; set; }
}
public class Child2
{
public Child1 Child1 { get; set; }
public string F { get; set; }
public string G { get; set; }
}
#endregion
#region Tree Nodes
public interface INode { }
public interface ILeaf<T> : INode
{
T Data { get; set; }
}
public interface IBranch<T> : ILeaf<T>
{
IList<INode> Children { get; }
void AddNode(INode node);
}
public class Leaf<T> : ILeaf<T>
{
public Leaf() { }
public Leaf(T data) { Data = data; }
public T Data { get; set; }
}
public class Branch<T> : IBranch<T>
{
public Branch(T data) { Data = data; }
public T Data { get; set; }
public IList<INode> Children { get; } = new List<INode>();
public void AddNode(INode node)
{
Children.Add(node);
}
}
#endregion
And this as a test:
[TestMethod]
public void ExportFlatTest()
{
var list = new List<Parent>();
for (var i = 0; i < 20; i++)
list.Add(new Parent
{
A = $"A-{i}",
D = $"D-{i}",
E = i*10,
Child1 = new Child1
{
B = $"Child1-B-{i}",
C = $"Child1-C-{i}",
},
Child2 = new Child2
{
F = $"F-{i}",
G = $"G-{i}",
Child1 = new Child1
{
B = $"Child2-Child1-B-{i}",
C = $"Child2-Child1-C-{i}",
}
}
});
var file = new FileInfo(#"c:\temp\flat.xlsx");
if (file.Exists)
file.Delete();
TestExtensions.ExportFlatExcel(
list
, file
, "Test1"
);
}
Will give you this:
Tag class consists of ID Name and List<Tagging> :
public class Tag
{
public int Id { get; set; }
public string Name { get; set; }
public ICollection<Tagging> Tagging { get; set; }
}
Tagging class :
public class Tagging
{
public int Id { get; set; }
[ForeignKey("ParentTag")]
public int ParentId { get; set; }
public Tag ParentTag { get; set; }
[ForeignKey("ChildTag")]
public int ChildId { get; set; }
public Tag ChildTag { get; set; }
}
Tagging class just express many to many relationship between tags, for hierarchical purpose.
For example given a list :
List<Tag> tags = new List<Tag>();
var parent = new Tag {Name = "Parent", Id = 1, Tagging = new List<Tagging>{ new Tagging{ ParentId = 1, ChildId = 2}}};
var child = new Tag {Name = "Child", Id = 2, Tagging = new List<Tagging> { new Tagging { ParentId = 2, ChildId = 3 }}};
var grandChild = new Tag {Name = "GrandChild", Id = 3};
tags.Add(parent);
tags.Add(child);
tags.Add(grandChild);
I am trying to loop through all hierarchical objects connected to his parent. For example if you call a method getAllHiearchyObject(Tag parent)
Output should be something like this :
Name : "Parent", Id = 1;
Name : "Child", Id : 2;
Name : "GrandChild", Id :3
I need an actual implementation of getAllHiearchyObject(Tag parent)
How about this...
static IEnumerable<Tag> FlattenTag(Tag root)
{
yield return root;
if (root.Tagging != null)
foreach (var childTagging in root.Tagging)
if (childTagging.ChildTag != null)
foreach (var grandChildTag in FlattenTag(childTagging.ChildTag))
yield return grandChildTag;
}
Note that the second foreach above allows for the use of yield with recursion.
Usage...
foreach(var tag in FlattenTag(root))
...
Only one parent to one child.
For a simple case when you have only one parent-child relationship you can create methods like:
public static class EnumerableExtensions
{
#region Methods
public static IEnumerable<T> Unwind<T>(T first, Func<T, T> getNext)
where T : class
{
if (getNext == null)
throw new ArgumentNullException(nameof(getNext));
return Unwind(
first: first,
getNext: getNext,
isAfterLast: item =>
item == null);
}
public static IEnumerable<T> Unwind<T>(
T first,
Func<T, T> getNext,
Func<T, Boolean> isAfterLast)
{
if (getNext == null)
throw new ArgumentNullException(nameof(getNext));
if (isAfterLast == null)
throw new ArgumentNullException(nameof(isAfterLast));
var current = first;
while(!isAfterLast(current))
{
yield return current;
current = getNext(current);
}
}
#endregion
}
And use them in the following way (I have set ChildTag in Taggings, as it will be done by EF):
List<Tag> tags = new List<Tag>();
var grandChild = new Tag { Name = "GrandChild", Id = 3 };
var child = new Tag { Name = "Child", Id = 2, Tagging = new List<Tagging> { new Tagging { ParentId = 2, ChildId = 3, ChildTag = grandChild } } };
var parent = new Tag { Name = "Parent", Id = 1, Tagging = new List<Tagging> { new Tagging { ParentId = 1, ChildId = 2, ChildTag = child } } };
tags.Add(parent);
tags.Add(child);
tags.Add(grandChild);
var fromParent = EnumerableExtensions
.Unwind(
parent,
item =>
item?.Tagging?.FirstOrDefault()?.ChildTag)
.ToArray();
Console.WriteLine("Parent to child:");
foreach (var item in fromParent)
{
Console.WriteLine(item);
}
Proper parent to many children
For a proper tree creation you will have to use:
public class UnwoundItem<T> : IEnumerable<UnwoundItem<T>>
{
private readonly T _item;
private readonly IEnumerable<UnwoundItem<T>> _unwoundItems;
public UnwoundItem(T item, IEnumerable<UnwoundItem<T>> unwoundSubItems)
{
this._item = item;
this._unwoundItems = unwoundSubItems ?? Enumerable.Empty<UnwoundItem<T>>();
}
public T Item
{
get
{
return this._item;
}
}
public IEnumerable<UnwoundItem<T>> UnwoundSubItems
{
get
{
return this._unwoundItems;
}
}
public IEnumerator<UnwoundItem<T>> GetEnumerator()
{
return this._unwoundItems.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
}
and
public static class EnumerableExtensions
{
#region Methods
public static UnwoundItem<T> UnwindMany<T>(
T first,
Func<T, IEnumerable<T>> getNext)
where T : class
{
if (getNext == null)
throw new ArgumentNullException(nameof(getNext));
return UnwindMany(
first: first,
getNext: getNext,
isAfterLast: collection =>
collection == null);
}
public static UnwoundItem<T> UnwindMany<T>(
T first,
Func<T, IEnumerable<T>> getNext,
Func<IEnumerable<T>, Boolean> isAfterLast)
{
if (getNext == null)
throw new ArgumentNullException(nameof(getNext));
if (isAfterLast == null)
throw new ArgumentNullException(nameof(isAfterLast));
var currentItems = getNext(first);
if (isAfterLast(currentItems))
return new UnwoundItem<T>(
item: first,
unwoundSubItems: Enumerable.Empty<UnwoundItem<T>>());
return new UnwoundItem<T>(
item: first,
unwoundSubItems: currentItems
.Select(item =>
UnwindMany(
item,
getNext,
isAfterLast)));
}
#endregion
}
It can be tested with:
private static void Print<T>(IEnumerable<UnwoundItem<T>> items, Func<T, String> toString, Int32 level)
{
var indent = new String(' ', level * 4);
foreach (var item in items)
{
Console.Write(indent);
Console.WriteLine(toString(item.Item));
Print(item.UnwoundSubItems, toString, level + 1);
}
}
...
var grandChild = new Tag { Name = "GrandChild", Id = 3 };
var grandChild2 = new Tag { Name = "GrandChild 2", Id = 33 };
var child = new Tag { Name = "Child", Id = 2, Tagging = new List<Tagging> { new Tagging { ParentId = 2, ChildId = 3, ChildTag = grandChild } } };
var child2 = new Tag { Name = "Child 2", Id = 22, Tagging = new List<Tagging> { new Tagging { ParentId = 2, ChildId = 33, ChildTag = grandChild2 } } };
var parent = new Tag { Name = "Parent", Id = 1,
Tagging = new List<Tagging> {
new Tagging { ParentId = 1, ChildId = 2, ChildTag = child },
new Tagging { ParentId = 1, ChildId = 2, ChildTag = child2 } }
};
var fromParent = EnumerableExtensions
.UnwindMany(
parent,
item =>
item?.Tagging?.Select(tagging => tagging.ChildTag));
Console.WriteLine("Parent to child:");
Print(new[] { fromParent }, item => item.Name, 0);
I have the following object graph:
Root ( Root_Id)
----Child (Child_Id,Root_Id)
-------GrandChild (GrandChild_Id, Child_Id)
And I want to bypass Child and return GrandChild collection having a Root object. So far i have tried this:
var child_Ids = db.Root
.SingleOrDefault( r => r.Root_Id == rootID )
.Childs
.Select( ch => new { Child_Id = ch.Child_Id} ).ToArray();
return db.GrandChilds.Where( gc => child_Ids.Contains( gc.Child_Id ) );
But that wont even compile with the following errors :
1) IEnumerable does not contain a definition for Contains...
2) Argument instance: can not convert from 'AnonymousType # 1 []' to 'System.Linq.IQueryable
How can i accomplish this?
db.Root
.SingleOrDefault( r => r.Root_Id == rootID )
.Childs.SelectMany(ch=>ch.GrandChilds).Distinct()
Use the .SelectMany extension to get the grandchildren collection
Try this
var child_Ids = db.Root
.SingleOrDefault( r => r.Root_Id == rootID )
.Childs
.Select( ch => ch.Child_Id)
.ToArray();
return
from grandChild in db.GrandChild
join child_id in child_Ids
on child_id == grandChild.HandlingUnit_Id
select grandChild;
P.S: I am still a bit unsure about your goal but it looks like working approximation of your original solution
EDIT:
If your hierarchy and classes are something like:
public class Db
{
public Db(IEnumerable<Root> roots)
{ this.Roots = new List<Root>(roots); }
public ICollection<Root> Roots { get; private set; }
}
public class Root
{
public Root(IEnumerable<Child> children )
{
this.Children = new List<Child>(children);
}
public ICollection<Child> Children { get; private set; }
}
public class Child
{
public Child(Int32 childId, Int32 rootId, IEnumerable<GrandChild> grandChildren)
{
this.Child_Id = childId;
this.Root_Id = rootId;
this.GrandChildren = new List<GrandChild>(grandChildren);
}
public Int32 Child_Id { get; private set; }
public Int32 Root_Id { get; private set; }
public ICollection<GrandChild> GrandChildren {get; private set;}
}
public class GrandChild
{
public GrandChild (Int32 grandChildId, Int32 childId)
{
this.GrandChild_Id = grandChildId;
this.Child_Id = childId;
}
public Int32 GrandChild_Id {get; private set;}
public Int32 Child_Id {get; private set;}
}
Then as it was already suggested by AD.NET you could try the SelectMany method
GrandChild gc1 = new GrandChild(1, 10);
GrandChild gc2 = new GrandChild(2, 10);
GrandChild gc3 = new GrandChild(3, 11);
Child c1 = new Child(10, 100, new GrandChild[]{ gc1, gc2 });
Child c2 = new Child(11, 100, new GrandChild[]{ gc3 });
Root r1 = new Root(new Child[]{c1, c2});
Db db = new Db(new Root[] { r1 });
var rootGrandChildren = db
.Roots
.FirstOrDefault()
.Children
.SelectMany(child => child.GrandChildren);
In query syntax it will look like
var rootGrandChildren = from child in db.Roots.FirstOrDefault().Children
from grandChild in child.GrandChildren
select grandChild;
But if your Child class does not know his GrandChildren and they(GrandChildren) are contained in Root:
public class Child
{
public Child(Int32 childId, Int32 rootId)
{
this.Child_Id = childId;
this.Root_Id = rootId;
}
public Int32 Child_Id { get; private set; }
public Int32 Root_Id { get; private set; }
}
public class Root
{
public Root(IEnumerable<Child> children, IEnumerable<GrandChild> grandChildren )
{
this.Children = new List<Child>(children);
this.GrandChildren = new List<GrandChild>(grandChildren );
}
public ICollection<Child> Children { get; private set; }
public ICollection<GrandChild> GrandChildren{ get; private set; }
}
you will have to use:
Root r1 = new Root(new Child[]{c1, c2}, new GrandChild[]{gc1, gc2, gc3});
Db db = new Db(new Root[] { r1 });
Root root = db.Roots.FirstOrDefault();
var rootGrandChildren = from child in root.Children
join grandChild in root.GrandChildren
on child.Child_Id equals grandChild.Child_Id
select grandChild;