I want to build a hierarchical dictionary structure in C# from an Excel data sheet. The structure should look like this:
{
"Data": {
survey_id: {
"SurveyParameter": {},
"SurveySetting": {},
"Boreholes": {
borehole_id: {
"BoreholeParameter": {},
"BoreholeSetting": {},
"Layers": {
"Geology": {
layer_id: {"Parameter": {}, "LayerSetting": {}}
},
"Contamination": {
layer_id: {"Parameter": {}, "LayerSetting": {}}
},
},
}
},
}
}
}
"""
An example image of Excel data sheet from which the dictionary structure is to be built is provided here:
image of excel_data
I have built a dictionary structure that looks like this second image (below). But I want to convert it to the hierarchical structure, shown at the top.
image of the built dictionary
here is the text format of the dictionary saved as json file:
"3": {
"(SurveyParameter, SurveyId)": "[Company-Year]",
"(SurveyParameter, SurveyLink)": null,
"(SurveySetting, CoordinateSystem)": "[UTM, Revit]",
"(BoreholeParameter, BoreholeId)": null,
"(BoreholeSetting, AllowOverwrite)": "[True / False (default)]",
"(BoreholeSetting, Easting)": "[m] (X-Coordinate)",
"(BoreholeSetting, Northing)": "[m] (Y-Coordinate)",
"(BoreholeSetting, Elevation)": "[m] (Z-Coordinate)",
"(BoreholeParameter, BoreholeDiameter)": "[m] (default 0.4 m)",
"(Parameter, LayerId)": null,
"(Parameter, InputLayerStartDepth)": "(automatically filled)",
"(Parameter, InputLayerEndDepth)": "[m] (relative to coordinate)",
"(LayerSetting, SoilType)": "(pre-defined)",
"(Parameter, HomogeneousAreaSoil)": "(pre-defined)",
"(Parameter, SoilDescription)": null
},
"4": {
"(SurveyParameter, SurveyId)": "BRE-18",
"(SurveyParameter, SurveyLink)": null,
"(SurveySetting, CoordinateSystem)": "UTM",
"(BoreholeParameter, BoreholeId)": "RKS A5",
"(BoreholeSetting, AllowOverwrite)": null,
"(BoreholeSetting, Easting)": 795561.0,
"(BoreholeSetting, Northing)": 2278972.0,
"(BoreholeSetting, Elevation)": null,
"(BoreholeParameter, BoreholeDiameter)": null,
"(Parameter, LayerId)": "RKS A5_1",
"(Parameter, InputLayerStartDepth)": null,
"(Parameter, InputLayerEndDepth)": 1.7,
"(LayerSetting, SoilType)": "A",
"(Parameter, HomogeneousAreaSoil)": "A",
"(Parameter, SoilDescription)": "G, s, u'"
},
"5": {
"(SurveyParameter, SurveyId)": "BRE-18",
"(SurveyParameter, SurveyLink)": null,
"(SurveySetting, CoordinateSystem)": "UTM",
"(BoreholeParameter, BoreholeId)": "RKS A5",
"(BoreholeSetting, AllowOverwrite)": null,
"(BoreholeSetting, Easting)": 795561.0,
"(BoreholeSetting, Northing)": 2278972.0,
"(BoreholeSetting, Elevation)": null,
"(BoreholeParameter, BoreholeDiameter)": null,
"(Parameter, LayerId)": "RKS A5_2",
"(Parameter, InputLayerStartDepth)": null,
"(Parameter, InputLayerEndDepth)": 2.2,
"(LayerSetting, SoilType)": "A",
"(Parameter, HomogeneousAreaSoil)": "A",
"(Parameter, SoilDescription)": "G, s, u'"
},
"6": {
"(SurveyParameter, SurveyId)": "BRE-18",
"(SurveyParameter, SurveyLink)": null,
"(SurveySetting, CoordinateSystem)": "UTM",
"(BoreholeParameter, BoreholeId)": "RKS A5",
"(BoreholeSetting, AllowOverwrite)": null,
"(BoreholeSetting, Easting)": 795561.0,
"(BoreholeSetting, Northing)": 2278972.0,
"(BoreholeSetting, Elevation)": null,
"(BoreholeParameter, BoreholeDiameter)": null,
"(Parameter, LayerId)": "RKS A5_3",
"(Parameter, InputLayerStartDepth)": null,
"(Parameter, InputLayerEndDepth)": 2.3,
"(LayerSetting, SoilType)": "Bohrhindernis",
"(Parameter, HomogeneousAreaSoil)": "Bohrhindernis",
"(Parameter, SoilDescription)": "Bohrhindernis"
I used the following code to build the dictionary in second image:
public void ExcelReader(string path)
{
var stream = File.Open(path, FileMode.Open, FileAccess.Read);
var reader = ExcelReaderFactory.CreateReader(stream);
var result = reader.AsDataSet();
var tables = result.Tables.Cast<DataTable>();
Dictionary<string,Dictionary<(object, object), object>> rows = new Dictionary<string,Dictionary<(object, object), object>> ();
foreach (DataTable table in tables)
{
//DataRow selectedRow = table.Rows[3];
//DataRow newRow = table.NewRow();
//newRow.ItemArray = selectedRow.ItemArray; // copy data
//table.Rows.Remove(selectedRow);
//table.Rows.InsertAt(newRow, 3 + 1 / -1);
//table.Rows.Remove(table.Rows[0]);
Dictionary<(object, object), object> row;
foreach (DataRow dr in table.Rows)
{
row = new Dictionary<(object, object), object>();
foreach (DataColumn col in table.Columns)
{
object FirstcolName = table.Rows[0][col.ColumnName].ToString(); //index the first row with the column name
object SecondcolName = table.Rows[1][col.ColumnName].ToString(); //index the 2nd row with the column name
//string 3rdcolname = table.Rows[3][col.ColumnName].ToString(); //index the 2nd row with the column name
// Tuple<string,string> column_name_keys = new Tuple<string,string>(FirstcolName.ToString(), SecondcolName.ToString());
//col.ColumnName = column_name_keys;
// table.Rows[0].Delete();
row.Add((FirstcolName, SecondcolName), dr[col]); //add the column names as keys and data as values in the dicttionary.using a tuple of two column headers as key
}
int index = table.Rows.IndexOf(dr); //indexing eachrow to cnvert the value in each row to string
rows.Add(index.ToString(), row); //converting to string
}
string json = JsonConvert.SerializeObject(rows, Formatting.Indented);
File.WriteAllText(#"D:\Data\test.json", json);
}
}
}
Posting the developed code for people who might need it or might be stuck in a similar problem.
This code below works for the most part as it makes the dictionary structure. However it needs to be improved with introduction of Flexible nested dictionaries that can take any types of values(including Dictionaries). This one generates the Survey_level Dictionary atleast: The nested borehole dictionary structure can only be added when we have a class that combines Boreholes Dictionary, SurveyParameter Dictionary & SurveySetting Dictionary
...
public void ExcelReader(string path)
{
var stream = File.Open(path, FileMode.Open, FileAccess.Read);
var reader = ExcelReaderFactory.CreateReader(stream);
var result = reader.AsDataSet();
var tables = result.Tables.Cast<DataTable>();
//If you declared nested dict as object you couldn't do any operations with it that are provided by Dictionary, only the ones valid for all objects.
Dictionary<string, Dictionary<string, Dictionary<(string, string), object>>> dict_coll = new Dictionary<string, Dictionary<string, Dictionary<(string, string), object>>>();
Dictionary<string, Dictionary<(string, string), object>> rows = new Dictionary<string, Dictionary<(string, string), object>>();
Dictionary<(string, string), object> row_usable = new Dictionary<(string, string), object>();
foreach (DataTable table in tables)
{
Dictionary<(string, string), object> row;
foreach (DataRow dr in table.Rows)
{
row = new Dictionary<(string, string), object>();
foreach (DataColumn col in table.Columns)
{
string FirstcolName = table.Rows[0][col.ColumnName].ToString(); //index the first row with the column name
string SecondcolName = table.Rows[1][col.ColumnName].ToString(); //index the 2nd row with the column name
row.Add((FirstcolName, SecondcolName), dr[col]); //add the column names as keys and data as values in the dicttionary.using a tuple of two column headers as key
}
int index = table.Rows.IndexOf(dr); //indexing eachrow to cnvert the value in each row to string
rows.Add(index.ToString(), row); //converting to string
rows.Remove("0");
rows.Remove("1");
rows.Remove("3");
List<(string, string)> row_Keys = new List<(string, string)>(row.Keys);
List<object> row_vals = new List<object>(row.Values);
row_usable = row_Keys.Zip(row_vals, (k, v) => new { k, v }).ToDictionary(x => x.k, x => x.v);
}
dict_coll.Add(table.TableName.ToString(), rows);
}
var guid_data = rows.ElementAt(0).Value; //or use trygetvalue()
var SURVEY_ID = ("SurveyParameter", "SurveyId");
var BOREHOLE_ID = ("BoreholeParameter", "BoreholeId");
var LAYER_ID = ("Parameter", "LayerId");
Dictionary<string, object> guid_dict = new Dictionary<string, object>();
Dictionary<string, Dictionary<string,Dictionary<string,object>>> data_dict =
new Dictionary<string, Dictionary<string, Dictionary<string, object>>>();
//Dictionary<string,object?> data_dict =
// new Dictionary<string,object?>();
//Hashtable data_dict = new Hashtable;
Hashtable restructured = new Hashtable();
restructured.Add("ParameterGuid", guid_dict);
restructured.Add("Data", data_dict);
List<string> guid_dict_keys = guid_dict.Keys.OfType<string>().ToList();
List<string> data_dict_keys = data_dict.Keys.OfType<string>().ToList();
foreach (Dictionary<string, Dictionary<(string, string), object>> datasets in dict_coll.Values) // datasets= Dictionary<string, Dictionary<(string, string), object>>>no need for specified types with hashtable but have to cast the value KeyValuePair<string, Dictionary<string, Dictionary<(string, string), object>>>
{
foreach (KeyValuePair<(string, string), object> i in guid_data)
{
if (i.Key.Item1.EndsWith("Parameter") & !guid_dict_keys.Contains(i.Key.Item2))//string.IsNullOrEmpty()//convert guidict to single string //guid_dict.Keys i.Key.Item2))
{
guid_dict[i.Key.Item2] = i.Value; //convert guid
}
}
foreach (Dictionary<(string, string), object> dataset in datasets.Values) //NULlMaybe/get dict(tuple,value) /KeyValuePair<(string, string), object>datasets.Value as Hashtable, KeyValuePair<(string, string), object>
{
// get survey/borehole/layer Ids
string target_layer_id = dataset[LAYER_ID].ToString();
string target_survey_id = dataset[SURVEY_ID].ToString(); //
string target_borehole_id = dataset[BOREHOLE_ID].ToString();//
//NULL
var finished = false;
while (!finished)
{
if (data_dict.ContainsKey(target_survey_id))
{
var survey_set = data_dict[target_survey_id];
var boreholes = survey_set["Boreholes"] as boreholes_dict;
if (boreholes.ContainsKey(target_borehole_id))
{
var borehole_set = boreholes[target_borehole_id] as borehole_id_dict;
layers_dict layers = new layers_dict();
layers = borehole_set["Layers"] as layers_dict;
if (layers.ContainsKey(target_layer_id))
{
{ }
finished = true;
}
else
{
finished = true;
}
}
else
{
boreholes.Add(target_borehole_id, new Dictionary<string, object>());
boreholes[target_borehole_id].Add("BoreholeParameter", new Dictionary<string, object>());
boreholes[target_borehole_id].Add("BoreholeSetting", new Dictionary<string, object>());
boreholes[target_borehole_id].Add("Layers", new Dictionary<string, object>());
//boreholes = new Dictionary<string,Dictionary<string,Dictionary<string,object>>
}
}
else
{
//Dictionary<string, object> survey_nested_dict = new Dictionary<string, object>();
data_dict.Add(target_survey_id, new Dictionary<string, Dictionary<string,object>>());
data_dict[target_survey_id].Add("SurveyParameter", new Dictionary<string, object>());
data_dict[target_survey_id].Add("SurveySetting",new Dictionary<string, object>());
data_dict[target_survey_id].Add("Boreholes", new Dictionary<string, object>());
foreach (KeyValuePair<(string, string), object> cat_nam_val in dataset) // this is a hashtable version of dataset which is inside datasets.Values
{
var val_dict = new Dictionary<string, object>();
try
{
val_dict = data_dict[target_survey_id][cat_nam_val.Key.Item1];
}
catch
{
}
if (IsDictionary(val_dict)) ;
{
val_dict.Add(cat_nam_val.Key.Item2, cat_nam_val.Value);
}
}
}
}
}
}
string json_guid = JsonConvert.SerializeObject(guid_dict, Formatting.Indented);
File.WriteAllText(#"D:\Data\testjsons\test_guid_dict.json", json_guid);
string json = JsonConvert.SerializeObject(dict_coll, Formatting.Indented);
File.WriteAllText(#"D:\Data\testjsons\test2.json", json);
string json_data = JsonConvert.SerializeObject(data_dict, Formatting.Indented);
File.WriteAllText(#"D:\Data\testjsons\test_data_dict.json", json_guid);
}
}
}
EDIT:
The following code has achieved the structure shown above in the question description. I hope it helps anyone who is stuck in this problem.
public void ExcelReader(string path)
{
var stream = File.Open(path, FileMode.Open, FileAccess.Read);
var reader = ExcelReaderFactory.CreateReader(stream);
var result = reader.AsDataSet();
var tables = result.Tables.Cast<DataTable>();
//If you declared nested dict as object you couldn't do any operations with it that are provided by Dictionary, only the ones valid for all objects.
Dictionary<string, Dictionary<string, Dictionary<(string, string), object>>> dict_coll = new Dictionary<string, Dictionary<string, Dictionary<(string, string), object>>>();
Dictionary<string, Dictionary<(string, string), object>> rows = new Dictionary<string, Dictionary<(string, string), object>>();
Dictionary<(string, string), object> row_usable = new Dictionary<(string, string), object>();
foreach (DataTable table in tables)
{
Dictionary<(string, string), object> row;
foreach (DataRow dr in table.Rows)
{
row = new Dictionary<(string, string), object>();
foreach (DataColumn col in table.Columns)
{
string FirstcolName = table.Rows[0][col.ColumnName].ToString(); //index the first row with the column name
string SecondcolName = table.Rows[1][col.ColumnName].ToString(); //index the 2nd row with the column name
row.Add((FirstcolName, SecondcolName), dr[col]); //add the column names as keys and data as values in the dicttionary.using a tuple of two column headers as key
}
int index = table.Rows.IndexOf(dr); //indexing eachrow to cnvert the value in each row to string
rows.Add(index.ToString(), row); //converting to string
rows.Remove("0");
rows.Remove("1");
rows.Remove("3");
List<(string, string)> row_Keys = new List<(string, string)>(row.Keys);
List<object> row_vals = new List<object>(row.Values);
row_usable = row_Keys.Zip(row_vals, (k, v) => new { k, v }).ToDictionary(x => x.k, x => x.v);
}
dict_coll.Add(table.TableName.ToString(), rows);
}
var guid_data = rows.ElementAt(0).Value; //or use trygetvalue()
var SURVEY_ID = ("SurveyParameter", "SurveyId");
var BOREHOLE_ID = ("BoreholeParameter", "BoreholeId");
var LAYER_ID = ("Parameter", "LayerId");
Dictionary<string, object> guid_dict = new Dictionary<string, object>();
Dictionary<string,Dictionary<string, Dictionary<object, object>>> data_dict =
new Dictionary<string, Dictionary<string, Dictionary<object, object>>> ();
//Dictionary<string,object?> data_dict =
// new Dictionary<string,object?>();
//Hashtable data_dict = new Hashtable;
Hashtable restructured = new Hashtable();
restructured.Add("ParameterGuid", guid_dict);
restructured.Add("Data", data_dict);
List<string> guid_dict_keys = guid_dict.Keys.OfType<string>().ToList();
List<string> data_dict_keys = data_dict.Keys.OfType<string>().ToList();
foreach (Dictionary<string, Dictionary<(string, string), object>> datasets in dict_coll.Values) // datasets= Dictionary<string, Dictionary<(string, string), object>>>no need for specified types with hashtable but have to cast the value KeyValuePair<string, Dictionary<string, Dictionary<(string, string), object>>>
{
foreach (KeyValuePair<(string, string), object> i in guid_data)
{
if (i.Key.Item1.EndsWith("Parameter") & !guid_dict_keys.Contains(i.Key.Item2))//string.IsNullOrEmpty()//convert guidict to single string //guid_dict.Keys i.Key.Item2))
{
guid_dict[i.Key.Item2] = i.Value; //convert guid
}
}
foreach (Dictionary<(string, string), object> dataset in datasets.Values) //NULlMaybe/get dict(tuple,value) /KeyValuePair<(string, string), object>datasets.Value as Hashtable, KeyValuePair<(string, string), object>
{
// get survey/borehole/layer Ids
string target_layer_id = dataset[LAYER_ID].ToString();
string target_survey_id = dataset[SURVEY_ID].ToString(); //
string target_borehole_id = dataset[BOREHOLE_ID].ToString();//
//NULL
var finished = false;
while (!finished)
{
if (data_dict.ContainsKey(target_survey_id))
{
var survey_set = data_dict[target_survey_id];
var boreholes = survey_set["Boreholes"];
if (boreholes.ContainsKey(target_borehole_id))
{
var borehole_set = boreholes[target_borehole_id] as Dictionary<string, Dictionary<object, object>>;
//Hashtable layers = new Hashtable();
var layers = borehole_set["Layers"] as Dictionary<object, object>;
if (layers.ContainsKey(target_layer_id))
{
{ }
finished = true;
}
else
{
layers.Add(target_layer_id, new Dictionary<string, Dictionary<object, object>>
{
{"Parameter", new Dictionary<object, object>() },
{"LayerSetting", new Dictionary<object, object>()}
});
foreach (KeyValuePair<(string, string), object> cat_nam_val in dataset) // this is a hashtable version of dataset which is inside datasets.Values
{
var val_dict = new Dictionary<object, object>(); // Dictionary<string, object>();
try
{
var val_dicttt = layers[target_layer_id] as Dictionary<string, Dictionary<object, object>>; // extra variable for casting
val_dict = val_dicttt[cat_nam_val.Key.Item1];
}
catch
{
}
if (IsDictionary(val_dict)) ;
{
val_dict.Add(cat_nam_val.Key.Item2, cat_nam_val.Value);
}
}
finished = true;
}
}
else
{
boreholes.Add(target_borehole_id, new Dictionary<string, Dictionary<object, object>>
{
{ "BoreholeParameter", new Dictionary<object, object>() },
{ "BoreholeSetting", new Dictionary<object, object>()},
{ "Layers", new Dictionary<object, object>() }
});
foreach (KeyValuePair<(string, string), object> cat_nam_val in dataset) // this is a hashtable version of dataset which is inside datasets.Values
{
var val_dict = new Dictionary<object, object>();
try
{
var val_dictt = boreholes[target_borehole_id] as Dictionary<string, Dictionary<object, object>>;
val_dict = val_dictt[cat_nam_val.Key.Item1];
}
catch
{
}
if (IsDictionary(val_dict)) ;
{
val_dict.Add(cat_nam_val.Key.Item2, cat_nam_val.Value);
}
}
}
}
else
{
//1.use hashtable
//Dictionary<string, object> survey_nested_dict = new Dictionary<string, object>();
data_dict.Add(target_survey_id, new Dictionary<string, Dictionary<object, object>>());
data_dict[target_survey_id].Add("SurveyParameter", new Dictionary<object, object>()); //new Dictionary<object, object>()
data_dict[target_survey_id].Add("SurveySetting", new Dictionary<object, object>());
data_dict[target_survey_id].Add("Boreholes", new Dictionary<object, object>());
foreach (KeyValuePair<(string, string), object> cat_nam_val in dataset) // this is a hashtable version of dataset which is inside datasets.Values
{
var val_dict = new Dictionary<object, object>();// Dictionary<string, object>();
try
{
val_dict = data_dict[target_survey_id][cat_nam_val.Key.Item1];
}
catch
{
}
if (IsDictionary(val_dict)) ;
{
val_dict.Add(cat_nam_val.Key.Item2, cat_nam_val.Value);
}
}
}
}
}
}
string json_guid = JsonConvert.SerializeObject(guid_dict, Formatting.Indented);
File.WriteAllText(#"D:\Data\test_guid_dict.json", json_guid);
string json = JsonConvert.SerializeObject(dict_coll, Formatting.Indented);
File.WriteAllText(#"D:\Data\test2.json", json);
string json_data = JsonConvert.SerializeObject(data_dict, Formatting.Indented);
File.WriteAllText(#"D:\Data\test_data_dict.json", json_data);
string json_final_data = JsonConvert.SerializeObject(restructured, Formatting.Indented);
File.WriteAllText(#"D:\Data\test_restructured_data_dict.json", json_final_data);
}
}
}