I have the following code that loops through json files in a directory and creates a csv file with following records:
results.csv
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
However, if i stop execution and rerun the program again, what happens is that the same records get inserted in the csv file again, resulting in:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
How do i check if the record already exists (i.e. each field is the same of the fields being inserted) and replace it (or basically dont append it again?) for example, if i were to run the program again because there was a change in file1.json and also because there was a new file added to the directory, the new csv should look like this:
results.csv:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Corpum & Co
file32.json 1 FirmA
code:
using ChoETL;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
static void Main(string[] args)
{
//Output to CSV
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
JsonToCsv(jsonFile, csvFilePath);
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (skipped.Count() == 0)
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
break;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
using (var fs = new FileStream(csvFile, FileMode.Append, FileAccess.Write))
{
using (var writer = new ChoCSVWriter(fs))
{
writer.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name");
if (fs.Position == 0) // we don't need header if file already existed before
{
writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = //lines[6].text,
fieldValue(lines, "Practice Name"),
};
}
));
}
fs.Write(Environment.NewLine); // append new line carrier so we don't write to the same line when file reopened for writing
}
}
}
Sample JSON file
{
"status": "succeeded",
"createdDateTime": "2020-10-30T15:56:11Z",
"lastUpdatedDateTime": "2020-10-30T15:56:12Z",
"analyzeResult": {
"version": "3.0.0",
"readResults": [
{
"page": 1,
"angle": 0.086,
"width": 684,
"height": 272,
"unit": "pixel",
"lines": [
{
"boundingBox": [
7,
6,
196,
5,
196,
24,
7,
25
],
"text": "Account Information",
"words": [
{
"boundingBox": [
10,
7,
83,
7,
81,
24,
7,
26
],
"text": "Account",
"confidence": 0.981
},
{
"boundingBox": [
87,
7,
196,
6,
196,
24,
85,
24
],
"text": "Information",
"confidence": 0.939
}
]
},
{
"boundingBox": [
120,
56,
223,
57,
223,
70,
120,
70
],
"text": "Practice Name",
"words": [
{
"boundingBox": [
120,
57,
176,
57,
176,
70,
120,
71
],
"text": "Practice",
"confidence": 0.982
},
{
"boundingBox": [
179,
57,
222,
57,
222,
71,
179,
70
],
"text": "Name",
"confidence": 0.985
}
]
},
{
"boundingBox": [
236,
62,
390,
62,
390,
77,
236,
77
],
"text": "Some Practice Name",
"words": [
{
"boundingBox": [
236,
62,
277,
62,
277,
78,
236,
78
],
"text": "Some",
"confidence": 0.987
},
{
"boundingBox": [
280,
62,
340,
62,
341,
78,
280,
77
],
"text": "Practice",
"confidence": 0.984
},
{
"boundingBox": [
343,
62,
390,
62,
390,
78,
344,
78
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
107,
102,
223,
102,
223,
115,
107,
115
],
"text": "Owner Full Name",
"words": [
{
"boundingBox": [
108,
103,
151,
102,
151,
116,
107,
116
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
154,
102,
177,
102,
176,
116,
153,
116
],
"text": "Full",
"confidence": 0.954
},
{
"boundingBox": [
180,
102,
224,
103,
223,
116,
179,
116
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
237,
104,
298,
104,
298,
119,
237,
119
],
"text": "Bob Lee",
"words": [
{
"boundingBox": [
238,
104,
266,
104,
266,
119,
238,
120
],
"text": "Bob",
"confidence": 0.987
},
{
"boundingBox": [
269,
104,
298,
105,
298,
120,
269,
119
],
"text": "Lee",
"confidence": 0.987
}
]
},
{
"boundingBox": [
136,
147,
223,
147,
223,
160,
137,
161
],
"text": "Owner Email",
"words": [
{
"boundingBox": [
137,
148,
181,
147,
181,
161,
137,
162
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
184,
147,
224,
147,
224,
161,
184,
161
],
"text": "Email",
"confidence": 0.985
}
]
},
{
"boundingBox": [
239,
144,
361,
144,
361,
162,
239,
162
],
"text": "bob#gmail.com",
"words": [
{
"boundingBox": [
240,
145,
362,
146,
361,
163,
240,
163
],
"text": "bob#gmail.com",
"confidence": 0.974
}
]
},
{
"boundingBox": [
137,
193,
224,
193,
224,
208,
137,
208
],
"text": "Server Setup",
"words": [
{
"boundingBox": [
137,
194,
179,
194,
179,
208,
137,
208
],
"text": "Server",
"confidence": 0.985
},
{
"boundingBox": [
182,
194,
224,
194,
224,
209,
182,
208
],
"text": "Setup",
"confidence": 0.985
}
]
},
{
"boundingBox": [
276,
188,
340,
192,
339,
211,
275,
209
],
"text": "cloud",
"words": [
{
"boundingBox": [
297,
192,
339,
194,
339,
211,
297,
211
],
"text": "cloud",
"confidence": 0.933
}
]
},
{
"boundingBox": [
376,
187,
461,
191,
460,
212,
376,
211
],
"text": "Location",
"words": [
{
"boundingBox": [
394,
191,
460,
196,
459,
211,
394,
211
],
"text": "Location",
"confidence": 0.844
}
]
},
{
"boundingBox": [
500,
189,
666,
192,
665,
212,
499,
211
],
"text": "LIcentral (multi-location)",
"words": [
{
"boundingBox": [
501,
190,
567,
195,
567,
212,
500,
212
],
"text": "LIcentral",
"confidence": 0.665
},
{
"boundingBox": [
572,
195,
665,
195,
665,
212,
571,
212
],
"text": "(multi-location)",
"confidence": 0.899
}
]
},
{
"boundingBox": [
21,
238,
224,
238,
223,
255,
21,
253
],
"text": "Number of Locations Enrolling",
"words": [
{
"boundingBox": [
21,
239,
76,
239,
76,
253,
21,
253
],
"text": "Number",
"confidence": 0.985
},
{
"boundingBox": [
79,
239,
92,
239,
92,
253,
79,
253
],
"text": "of",
"confidence": 0.983
},
{
"boundingBox": [
95,
239,
161,
239,
161,
254,
95,
253
],
"text": "Locations",
"confidence": 0.981
},
{
"boundingBox": [
164,
239,
224,
239,
223,
256,
163,
254
],
"text": "Enrolling",
"confidence": 0.983
}
]
},
{
"boundingBox": [
273,
237,
289,
239,
288,
257,
272,
255
],
"text": "1",
"words": [
{
"boundingBox": [
278,
237,
290,
239,
287,
257,
276,
255
],
"text": "1",
"confidence": 0.981
}
]
},
{
"boundingBox": [
337,
239,
670,
239,
670,
253,
337,
252
],
"text": "*If more than 1 location, add info on the locations form",
"words": [
{
"boundingBox": [
338,
239,
347,
239,
347,
252,
338,
252
],
"text": "*If",
"confidence": 0.874
},
{
"boundingBox": [
350,
239,
384,
239,
384,
253,
350,
252
],
"text": "more",
"confidence": 0.983
},
{
"boundingBox": [
386,
239,
416,
239,
416,
253,
386,
253
],
"text": "than",
"confidence": 0.986
},
{
"boundingBox": [
419,
239,
422,
239,
422,
253,
419,
253
],
"text": "1",
"confidence": 0.635
},
{
"boundingBox": [
425,
239,
478,
239,
478,
253,
425,
253
],
"text": "location,",
"confidence": 0.955
},
{
"boundingBox": [
481,
239,
506,
239,
506,
253,
481,
253
],
"text": "add",
"confidence": 0.986
},
{
"boundingBox": [
509,
239,
533,
239,
533,
253,
509,
253
],
"text": "info",
"confidence": 0.981
},
{
"boundingBox": [
535,
239,
551,
239,
552,
253,
535,
253
],
"text": "on",
"confidence": 0.988
},
{
"boundingBox": [
554,
239,
574,
239,
575,
253,
554,
253
],
"text": "the",
"confidence": 0.987
},
{
"boundingBox": [
577,
239,
634,
239,
634,
253,
577,
253
],
"text": "locations",
"confidence": 0.973
},
{
"boundingBox": [
636,
239,
666,
240,
666,
253,
637,
253
],
"text": "form",
"confidence": 0.986
}
]
}
]
}
]
}
}
screenshot of csv after adding spproach 2 of Supun De Silva answer:
FYI. Sample file you provided does not work as it fails at var lines = (dynamic[])r1.lines;
Approach 1 - Rename Old File and Create a new one for data appending
1. Introduce new Function
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
2. Call the Mover Function and use Create mode in new file
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.RenameIfExist(csvFile);
using (var fs = new FileStream(csvFile, FileMode.Create, FileAccess.Write))
{
try
{
using (ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs)
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
.WithFirstLineHeader())
{
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
));
}
}
catch(Exception e)
{
throw e;
}
}
}
}
Approach 2 - Open Existing File and create a lookup structure with the data
You may need to tweak this a bit
1. Declare new struct to store a key
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
2. Pre-Loader Function
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dataReader = reader.AsDataReader())
{
while (dataReader.Read())
{
Program.processedfileStates.Add($"{dataReader[0].ToString()}_{dataReader[1].ToString()}_{dataReader[2].ToString()}", true);
}
}
}
}
}
}
3. Json to CSV fcn
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.LoadOldStatsIfExist(csvFile);
using (var fs = new FileStream(csvFile, Program.processedfileStates.Count == 0 ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
if (Program.processedfileStates.Count != 0)
{
fs.Write(Environment.NewLine);
}
try
{
ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs);
if (Program.processedfileStates.Count == 0)
{
writer.WithFirstLineHeader();
}
using (writer
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
)
{
if (Program.processedfileStates.Count == 0)
{
writer = writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
var data = p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}"));
writer.Write(data);
}
}
catch (Exception e)
{
throw e;
}
}
}
}
Approach 2 - Refactored
public class OPModel
{
public string FileName { get; set; }
public long Page { get; set; }
public string PracticeName { get; set; }
}
public class Program
{
const string jsonFilesPath = "D:\\DevWork\\C#\\TempProject1\\ConsoleApp1\\data";
const string csvFilePath = "D:\\DevWork\\C#\\TempProject1\\ConsoleApp1\\output\\op.csv";
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
private static bool fileExisted = false;
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dTable = reader.AsDataTable())
{
foreach (DataRow row in dTable.Rows)
{
Program.processedfileStates.Add($"{row["File Name"].ToString()}_{row["Page"].ToString()}_{row["Practice Name"].ToString()}", true);
}
}
}
}
}
}
public static void Main(string[] args)
{
try
{
Program.fileExisted = File.Exists(csvFilePath);
Program.LoadOldStatsIfExist(csvFilePath);
List<OPModel> dataToWrite = new List<OPModel>();
// Persist each file to
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
dataToWrite.AddRange(JsonToCsv(jsonFile));
}
if (dataToWrite.Count != 0)
{
using (var fs = new FileStream(csvFilePath, !Program.fileExisted ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
try
{
ChoCSVWriter<OPModel> writer = new ChoCSVWriter<OPModel>(fs);
using (writer.WithField("FileName", fieldName: "File Name").WithField("Page").WithField("PracticeName", fieldName: "Practice Name"))
{
if (!Program.fileExisted)
{
writer = writer.WithFirstLineHeader();
}
writer.Write(dataToWrite);
}
fs.Write(Environment.NewLine);
}
catch (Exception e)
{
throw e;
}
}
}
//Output to CSV
Console.ReadKey();
}
catch (Exception ex)
{
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (enumerator.MoveNext())
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static List<OPModel> JsonToCsv(string jsonInputFile)
{
using (var reader = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
var data = reader.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new OPModel
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = Program.fieldValue(lines, "Practice Name")
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}")).ToList();
return data;
}
}
}