C# CSV to JSON per row - c#

Major Edit: I am doing a bad job of explaining :(
I have two classes:
public class UserDefinitions// a list of 'Items', each'group of items belong to a user. I handle User logic elsewhere, and it works flawlessly.
{
public List<Item> items { get; set; }
}
public class Item //the User definitions. A user could have 1 or 15 of these. They would all be a single 'line' from the CSV file.
{
public string definitionKey { get; set; }
public string defName { get; set; }
public string defValue { get; set; }
}
Which I wanna build with a CSV File. I build this CSV File, so I make it using the same parameters every time.
I run SQL on my company's DB to generate results like so: http://i.imgur.com/gS1UJot.png
Then I read the file like so:
class Program
{
static void Main(string[] args)
{
var userData = new UserDefinitions();
var csvList = new List<Item>();
string json = "";
string fPath = #"C:\test\csvTest.csv";
var lines = File.ReadAllLines(fPath);
Console.WriteLine(lines);
List<string> udata = new List<string>(lines);
foreach (var line in udata)
{
string[] userDataComplete = line.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries);// this cleans any empty cells from the CSV
csvList.Add(new Item { definitionKey = userDataComplete[1], defName = userDataComplete[2], defValue = userDataComplete[3] });
}
json = JsonConvert.SerializeObject(csvList); //everything below is for debugging/tracking progress
Console.WriteLine(json);
Console.ReadKey();
StreamWriter sw = new StreamWriter("C:\\test\\testjson.txt");
sw.WriteLine(json);
sw.Close();
}
}
This ALMOST does what I want. The output json is from the first 'column' of the csv data
[{"definitionKey":"uuid1","defName":"HairColor","defValue":"Brown"},{"definitionKey":"uuid1","defName":"HairColor","defValue":"Blonde"},{"definitionKey":"uuid1","defName":"HairColor","defValue":"Blue"}]
When using the screen shot as an example, the wanted output should be
[{"attributeDefinitionKey":"uuid1","name":"HairColor","value":"Brown"},{"definitionKey":"uuid2","defName":"FreckleAmount","defValue":"50"}]
[{"attributeDefinitionKey":"uuid1","name":"HairColor","value":"Blonde"},{"definitionKey":"uuid2","defName":"FreckleAmount","defValue":"null"}]
[{"attributeDefinitionKey":"uuid1","name":"HairColor","value":"Blue"},{"definitionKey":"uuid3","defName":"Tattoos","defValue":"5"}]
I can't pick out certain aspects at will, or apply them to Items. For example there maybe 10 users or 5000 users, but the definitionKey will always be the [1], and adding '3' will get every subsequent defintionKey. Just like the defName will always be in the [2] spot and adding 3 will get every subsequent defName if there are any, this is all per line.
I know I have to add some +3 logic, but not quite sure how to incorporate that. Maybe a for loop? a nested for loop after a foreach loop? I feel I am missing something obvious!
Thanks again for any help

This reads the csv line for line and converts each row to json, while adapting to the change in the amount of columns.
This only works if the CSV follows your rules:
one userId and
x amount of "Things" with 3 columns per "Thing".
private static void Main(string[] args)
{
var file = new StreamReader(#"C:\test\csvTest.csv");
string line;
var itemsJson = new List<string>();
file.ReadLine();
while ((line = file.ReadLine()) != null)
{
var sb = new StringBuilder();
var fields = line.Split(',');
sb.Append(GetKeyValueJson("UserId", fields[0]));
for (var i = 1; i < fields.Length; i += 3)
{
var x = (i + 3) / 3;
sb.Append(GetKeyValueJson($"Thing {i + x} ID", fields[i]));
sb.Append(GetKeyValueJson($"Thing {i + x} ID", fields[i + 1]));
sb.Append(i + 3 == fields.Length
? GetKeyValueJson($"Thing {i + x} ID", fields[i + 2], true)
: GetKeyValueJson($"Thing {i + x} ID", fields[i + 2]));
}
itemsJson.Add(WrapJson(sb.ToString()));
}
var json = WrapItems(itemsJson);
Console.ReadLine();
}
private static string GetKeyValueJson(string id, string value, bool lastPair = false)
{
var sb = new StringBuilder();
sb.Append('"');
sb.Append(id);
sb.Append('"');
sb.Append(':');
sb.Append('"');
sb.Append(value);
sb.Append('"');
if (!lastPair)
sb.Append(',');
return sb.ToString();
}
private static string WrapJson(string s)
{
var sb = new StringBuilder();
sb.Append('{');
sb.Append(s);
sb.Append('}');
return sb.ToString();
}
private static string WrapItems(List<string> jsonList)
{
var sb = new StringBuilder();
sb.Append('"');
sb.Append("Items");
sb.Append('"');
sb.Append(':');
sb.Append('[');
sb.Append(jsonList.Aggregate((current, next) => current + "," + next));
sb.Append(']');
return WrapJson(sb.ToString());
}
}
It's not pretty and sorting would be tough, but it should adapt to the column amount as long as it is in 3's.

Related

New line within CSV column causing issue

I have a large csv file which has millions of rows. The sample csv lines are
CODE,COMPANY NAME, DATE, ACTION
A,My Name , LLC,2018-01-28,BUY
B,Your Name , LLC,2018-01-25,SELL
C,
All Name , LLC,2018-01-21,SELL
D,World Name , LLC,2018-01-20,BUY
Row C has new line, but actually this is same record. I want to remove new line character from the csv line within cell\field\column.
I tired \r\n, Envirnment.NewLine and many other things, but could not make it work.
Here is my code..
private DataTable CSToDataTable(string csvfile)
{
Int64 row = 0;
try
{
string CSVFilePathName = csvfile; //#"C:\test.csv";
string[] Lines = File.ReadAllLines(CSVFilePathName.Replace(Environment.NewLine, ""));
string[] Fields;
Fields = Lines[0].Split(new char[] { ',' });
int Cols = Fields.GetLength(0);
DataTable dt = new DataTable();
//1st row must be column names; force lower case to ensure matching later on.
for (int i = 0; i < Cols; i++)
dt.Columns.Add(Fields[i].ToLower(), typeof(string));
DataRow Row;
for (row = 1; row < Lines.GetLength(0); row++)
{
Fields = Lines[row].Split(new char[] { ',' });
Row = dt.NewRow();
//Console.WriteLine(row);
for (int f = 0; f < Cols; f++)
{
Row[f] = Fields[f];
}
dt.Rows.Add(Row);
if (row == 190063)
{
}
}
return dt;
}
catch (Exception ex)
{
throw ex;
}
}
How can I remove new line character and read the row correctly? I don't want to skip the such rows as per the business requirement.
You CSV file is not in valid format. In order to parse and load them successfully, you will have to sanitize them. Couple of issues
COMPANY NAME column contains field separator in it. Fix them by
surrounding quotes.
New line in CSV value - This can be fixed by combining adjacent rows as one.
With Cinchoo ETL, you can sanitize and load your large file as below
string csv = #"CODE,COMPANY NAME, DATE, ACTION
A,My Name , LLC,2018-01-28,BUY
B,Your Name , LLC,2018-01-25,SELL
C,
All Name , LLC,2018-01-21,SELL
D,World Name , LLC,2018-01-20,BUY";
string bufferLine = null;
var reader = ChoCSVReader.LoadText(csv)
.WithFirstLineHeader()
.Setup(s => s.BeforeRecordLoad += (o, e) =>
{
string line = (string)e.Source;
string[] tokens = line.Split(",");
if (tokens.Length == 5)
{
//Fix the second and third value with quotes
e.Source = #"{0},""{1},{2}"",{3}, {4}".FormatString(tokens[0], tokens[1], tokens[2], tokens[3], tokens[4]);
}
else
{
//Fix the breaking lines, assume that some csv lines broken into max 2 lines
if (bufferLine == null)
{
bufferLine = line;
e.Skip = true;
}
else
{
line = bufferLine + line;
tokens = line.Split(",");
e.Source = #"{0},""{1},{2}"",{3}, {4}".FormatString(tokens[0], tokens[1], tokens[2], tokens[3], tokens[4]);
line = null;
}
}
});
foreach (var rec in reader)
Console.WriteLine(rec.Dump());
//Careful to load millions rows into DataTable
//var dt = reader.AsDataTable();
Hope it helps.
You haven't made it clear what are the possible criteria an unwanted new line could appear in the file. So assuming that a 'proper' line in the CSV file does NOT end with a comma, and if one ends with a comma that means that it's not a properly formatted line, you could do something like this:
static void Main(string[] args)
{
string path = #"CSVFile.csv";
List<CSVData> data = new List<CSVData>();
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read))
{
using (StreamReader sr = new StreamReader(fs))
{
sr.ReadLine(); // Header
while (!sr.EndOfStream)
{
var line = sr.ReadLine();
while (line.EndsWith(","))
{
line += sr.ReadLine();
}
var items = line.Split(new string[] { "," }, StringSplitOptions.None);
data.Add(new CSVData() { CODE = items[0], NAME = items[1], COMPANY = items[2], DATE = items[3], ACTION = items[4] });
}
}
}
Console.ReadLine();
}
public class CSVData
{
public string CODE { get; set; }
public string NAME { get; set; }
public string COMPANY { get; set; }
public string DATE { get; set; }
public string ACTION { get; set; }
}
Obviously there's a lot of error handling to be done here (for example, when creating a new CSVData object make sure your items contain all the data you want), but I think this is the start you need.

C# Adding CSV to a list

I try to add the containing of a csv file to a new list. It's a list of different types of people with caracteristics like the function, a matricule, the last name, the firstname and a sex. So I managed to read the file but I don't really know how to process to add the containing of the file to my list.Here is my code :
`private static void ReadTest()
{
int count = 0;
string line;
Char c = ';';
StreamReader file= new StreamReader("Listing.csv");
while ((line= file.ReadLine()) != null)
{
String[] substrings = line.Split(c);
foreach (var substring in substrings)
{
Console.WriteLine(substring);
}
count++;
}
fichier.Close();
System.Console.WriteLine("Number of lines : {0}.", count);
}
static void Main(string[] args)
{
List<Personnel> Workers = new List<Personnel>();
}
'
Why don't you use CSVHelper, it will be as simple as the following:
var csv = new CsvReader( textReader );
var records = csv.GetRecords<Personnel>();
//then loop through
foreach( var record in records )
{
}
you just need to install the nuget package:
Install-Package CsvHelper
Check this for more information.
Replace the foreach loop with something like the following:
var person = new Personnel();
person.firstname = substrings[0];
person.lastname = substrings[1];
person.function = substrings[2];
//continue until all variables assigned.
Workers.Add(person);
Also, if the Workers list is not a static list, then make ReadTest return a List and create a list within the function.
Like this:
private static List<Personnel> ReadTest()
{
int count = 0;
string line;
Char c = ';';
StreamReader file= new StreamReader("Listing.csv");
var Workers = new List<Personnel>();
while ((line= file.ReadLine()) != null)
{
String[] substrings = line.Split(c);
var person = new Personnel();
person.firstname = substrings[0];
person.lastname = substrings[1];
person.function = substrings[2];
//continue until all variables assigned.
Workers.Add(person);
count++;
}
file.Close();
System.Console.WriteLine("Number of lines : {0}.", count);
return Workers;
}
static void Main(string[] args)
{
List<Personnel> Workers = ReadTest();
}

Import two CSV, add specific columns from one CSV and import changes to new CSV (C#)

i have to import 2 CSV's.
CSV 1 [49]: Including about 50 tab seperated colums.
CSV 2:[2] Inlcudes 3 Columns which should be replaced on the [3] [6] and [11] place of my first csv.
So heres what i do:
1) Importing the csv and split into a array.
string employeedatabase = "MYPATH";
List<String> status = new List<String>();
StreamReader file2 = new System.IO.StreamReader(filename);
string line = file2.ReadLine();
while ((line = file2.ReadLine()) != null)
{
string[] ud = line.Split('\t');
status.Add(ud[0]);
}
String[] ud_status = status.ToArray();
PROBLEM 1: i have about 50 colums to handle, ud_status is just the first, so do i need 50 Lists and 50 String arrays?
2) Importing the second csv and split into a array.
List<String> vorname = new List<String>();
List<String> nachname = new List<String>();
List<String> username = new List<String>();
StreamReader file = new System.IO.StreamReader(employeedatabase);
string line3 = file.ReadLine();
while ((line3 = file.ReadLine()) != null)
{
string[] data = line3.Split(';');
vorname.Add(data[0]);
nachname.Add(data[1]);
username.Add(data[2]);
}
String[] db_vorname = vorname.ToArray();
String[] db_nachname = nachname.ToArray();
String[] db_username = username.ToArray();
PROBLEM 2: After loading these two csv's i dont know how to combine them, and change to columns as mentioned above ..
somethine like this?
mynewArray = ud_status + "/t" + ud_xy[..n] + "/t" + changed_colum + ud_xy[..n];
save "mynewarray" into tablulator seperated csv with encoding "utf-8".
To read the file into a meaningful format, you should set up a class that defines the format of your CSV:
public class CsvRow
{
public string vorname { get; set; }
public string nachname { get; set; }
public string username { get; set; }
public CsvRow (string[] data)
{
vorname = data[0];
nachname = data[1];
username = data[2];
}
}
Then populate a list of this:
List<CsvRow> rows = new List<CsvRow>();
StreamReader file = new System.IO.StreamReader(employeedatabase);
string line3 = file.ReadLine();
while ((line3 = file.ReadLine()) != null)
{
rows.Add(new CsvRow(line3.Split(';'));
}
Similarly format your other CSV and include unused properties for the new fields. Once you have loaded both, you can populate the new properties from this list in a loop, matching the records by whatever common field the CSVs hopefully share. Then finally output the resulting data to a new CSV file.
Your solution is not to use string arrays to do this. That will just drive you crazy. It's better to use the System.Data.DataTable object.
I didn't get a chance to test the LINQ lambda expression at the end of this (or really any of it, I wrote this on a break), but it should get you on the right track.
using (var ds = new System.Data.DataSet("My Data"))
{
ds.Tables.Add("File0");
ds.Tables.Add("File1");
string[] line;
using (var reader = new System.IO.StreamReader("FirstFile"))
{
//first we get columns for table 0
foreach (string s in reader.ReadLine().Split('\t'))
ds.Tables["File0"].Columns.Add(s);
while ((line = reader.ReadLine().Split('\t')) != null)
{
//and now the rest of the data.
var r = ds.Tables["File0"].NewRow();
for (int i = 0; i <= line.Length; i++)
{
r[i] = line[i];
}
ds.Tables["File0"].Rows.Add(r);
}
}
//we could probably do these in a loop or a second method,
//but you may want subtle differences, so for now we just do it the same way
//for file1
using (var reader2 = new System.IO.StreamReader("SecondFile"))
{
foreach (string s in reader2.ReadLine().Split('\t'))
ds.Tables["File1"].Columns.Add(s);
while ((line = reader2.ReadLine().Split('\t')) != null)
{
//and now the rest of the data.
var r = ds.Tables["File1"].NewRow();
for (int i = 0; i <= line.Length; i++)
{
r[i] = line[i];
}
ds.Tables["File1"].Rows.Add(r);
}
}
//you now have these in functioning datatables. Because we named columns,
//you can call them by name specifically, or by index, to replace in the first datatable.
string[] columnsToReplace = new string[] { "firstColumnName", "SecondColumnName", "ThirdColumnName" };
for(int i = 0; i < ds.Tables[0].Rows.Count; i++)
{
//you didn't give a sign of any relation between the two tables
//so this is just by row, and assumes the row count is equivalent.
//This is also not advised.
//if there is a key these sets of data share
//you should join on them instead.
foreach(DataRow dr in ds.Tables[0].Rows[i].ItemArray)
{
dr[3] = ds.Tables[1].Rows[i][columnsToReplace[0]];
dr[6] = ds.Tables[1].Rows[i][columnsToReplace[1]];
dr[11] = ds.Tables[1].Rows[i][columnsToReplace[2]];
}
}
//ds.Tables[0] now has the output you want.
string output = String.Empty;
foreach (var s in ds.Tables[0].Columns)
output = String.Concat(output, s ,"\t");
output = String.Concat(output, Environment.NewLine); // columns ready, now the rows.
foreach (DataRow r in ds.Tables[0].Rows)
output = string.Concat(output, r.ItemArray.SelectMany(t => (t.ToString() + "\t")), Environment.NewLine);
if(System.IO.File.Exists("MYPATH"))
using (System.IO.StreamWriter file = new System.IO.StreamWriter("MYPATH")) //or a variable instead of string literal
{
file.Write(output);
}
}
With Cinchoo ETL - an open source file helper library, you can do the merge of CSV files as below. Assumed the 2 CSV file contains same number of lines.
string CSV1 = #"Id Name City
1 Tom New York
2 Mark FairFax";
string CSV2 = #"Id City
1 Las Vegas
2 Dallas";
dynamic rec1 = null;
dynamic rec2 = null;
StringBuilder csv3 = new StringBuilder();
using (var csvOut = new ChoCSVWriter(new StringWriter(csv3))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
using (var csv1 = new ChoCSVReader(new StringReader(CSV1))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
using (var csv2 = new ChoCSVReader(new StringReader(CSV2))
.WithFirstLineHeader()
.WithDelimiter("\t")
)
{
while ((rec1 = csv1.Read()) != null && (rec2 = csv2.Read()) != null)
{
rec1.City = rec2.City;
csvOut.Write(rec1);
}
}
}
}
Console.WriteLine(csv3.ToString());
Hope it helps.
Disclaimer: I'm the author of this library.

How to split string containing multiple delimeters of each and every line of text file?

This is the Input my file contains:
50|Hallogen|Mercury|M:4;C:40;A:1
90|Oxygen|Mars|M:10;C:20;A:00
5|Hydrogen|Saturn|M:33;C:00;A:3
Now i want to split each and every line of my text file and store in my class file like :
Expected output:
Planets[0]:
{
Number:50
name: Hallogen
object:Mercury
proportion[0]:
{
Number:4
},
proportion[1]:
{
Number:40
},
proportion[2]:
{
Number:1
}
}
etc........
My class file to store all this values:
public class Planets
{
public int Number { get; set; } //This field points to first cell of every row.output 50,90,5
public string name { get; set; } //This field points to Second cell of every row.output Hallogen,Oxygen,Hydrogen
public string object { get; set; } ////This field points to third cell of every row.output Mercury,Mars,Saturn
public List<proportion> proportion { get; set; } //This will store all proportions with respect to planet object.
//for Hallogen it will store 4,40,1.Just store number.ignore M,C,A initials.
//for oxygen it will store 10,20,00.Just store number.ignore M,C,A initials.
}
public class proportion
{
public int Number { get; set; }
}
This is what i have done:
List<Planets> Planets = new List<Planets>();
using (StreamReader sr = new StreamReader(args[0]))
{
String line;
while ((line = sr.ReadLine()) != null)
{
string[] parts = Regex.Split(line, #"(?<=[|;-])");
foreach (var item in parts)
{
var Obj = new Planets();//Not getting how to store it but not getting proper output in parts
}
Console.WriteLine(line);
}
}
Without you having to change any of your logic in "Planets"-class my fast solution to your problem would look like this:
List<Planets> Planets = new List<Planets>();
using (StreamReader sr = new StreamReader(args[0]))
{
String line;
while ((line = sr.ReadLine()) != null)
{
Planets planet = new Planets();
String[] parts = line.Split('|');
planet.Number = Convert.ToInt32(parts[0]);
planet.name = parts[1];
planet.obj = parts[2];
String[] smallerParts = parts[3].Split(';');
planet.proportion = new List<proportion>();
foreach (var item in smallerParts)
{
proportion prop = new proportion();
prop.Number =
Convert.ToInt32(item.Split(':')[1]);
planet.proportion.Add(prop);
}
Planets.Add(planet);
}
}
Oh before i forget it, you should not name your property of class Planets "object" because "object" is a keyword for the base class of everything, use something like "obj", "myObject" ,"planetObject" just not "object" your compiler will tell you the same ;)
To my understanding, multiple delimiters are maintained to have a nested structure.
You need to split the whole string first based on pipe, followed by semi colon and lastly by colon.
The order of splitting here is important. I don't think you can have all the tokens at once by splitting with all 3 delimiters.
Try following code for same kind of data
var values = new List<string>
{
"50|Hallogen|Mercury|M:4;C:40;A:1",
"90|Oxygen|Mars|M:10;C:20;A:00",
"5|Hydrogen|Saturn|M:33;C:00;A:3"
};
foreach (var value in values)
{
var pipeSplitted = value.Split('|');
var firstNumber = pipeSplitted[0];
var name = pipeSplitted[1];
var objectName = pipeSplitted[2];
var semiSpltted = value.Split(';');
var secondNumber = semiSpltted[0].Split(':')[1];
var thirdNumber = semiSpltted[1].Split(':')[1];
var colenSplitted = value.Split(':');
var lastNumber = colenSplitted[colenSplitted.Length - 1];
}
The most straigtforward solution is to use a regex where every (sub)field is matched inside a group
var subjectString = #"50|Hallogen|Mercury|M:4;C:40;A:1
90|Oxygen|Mars|M:10;C:20;A:00
5|Hydrogen|Saturn|M:33;C:00;A:3";
Regex regexObj = new Regex(#"^(.*?)\|(.*?)\|(.*?)\|M:(.*?);C:(.*?);A:(.*?)$", RegexOptions.Multiline);
Match match = regexObj.Match(subjectString);
while (match.Success) {
match.Groups[1].Value.Dump();
match.Groups[2].Value.Dump();
match.Groups[3].Value.Dump();
match.Groups[4].Value.Dump();
match.Groups[5].Value.Dump();
match.Groups[6].Value.Dump();
match = match.NextMatch();
}
If I understand correctly, your input is well formed. In this case you could use something like this:
string[] parts = Regex.Split(line, #"[|;-]");
var planet = new Planets(parts);
...
public Planets(string[] parts) {
int.TryParse(parts[0], this.Number);
this.name = parts[1];
this.object = parts[2];
this.proportion = new List<proportion>();
Regex PropRegex = new Regex("\d+");
for(int i = 3; i < parts.Length; i++){
Match PropMatch = PropRegex.Match(part[i]);
if(PropMatch.IsMatch){
this.proportion.Add(int.Parse(PropMatch.Value));
}
}
}

CSV appending specific strings in Dictionary

There are two CSV files I am looking to consolidate into 1.
A.CSV
WBS Element,Purchasing Document,Purchase order text,Val/COArea Crcy
ABC123,,,75000
ABC124,4200028630,Service,1069.2
ABC124,4200041490,Service,25518.24
ABC124,4200041490,Service,-1890.24
ABC126,4200028630,Service,2268
ABC126,4200028630,Service,-2268
ABC126,4200029435,Service,25149.65
ABC137,,,4146.2
B.CSV
WBS Element,Ref Document Number,Val/COArea Crcy,Name
ABC124,1000060610,0,Slab Locates & Steel Differential
ABC124,1000081223,0,NOCN339A&3921
ABC124,1000081223,0,Slab Locates & Steel Differential
ABC126,1000067757,0,Structural Steel
ABC 137,4200041490,0,Service
ABC 137,4200028630,5393.52,Service
ABC 137,4200029435,0,Service
I want to make 1 CSV file that combines both of these. The lines starting with WBS Element are joined together. The WBS Elements from each file are then placed on the same line if they match. If A has a WBS Element B does not, then the section for B is just "," and vice versa.
Sample target output:
WBS Element,Purchasing Document,Purchase order text,Val/COArea Crcy,WBS Element,Ref Document Number,Val/COArea Crcy,Name
ABC123,,,75000,,,,
ABC124,4200028630,Service,1069.2,ABC124,1000060610,0,Slab Locates & Steel Differential
I have the following code:
static void Main(string[] args)
{
StreamReader a = new StreamReader(#"Input\a.csv");
StreamReader b = new StreamReader(#"Input\b.csv");
StreamWriter output = new StreamWriter(#"Output\output.csv");
Dictionary<string, string> Adict = new Dictionary<string, string>();
Dictionary<string, string> Bdict = new Dictionary<string, string>();
output.WriteLine(a.ReadLine() + "," + b.ReadLine());
while (!a.EndOfStream && !b.EndOfStream)
{
//section for A
List<string> atempList = new List<string>();
string atempString;
string Aline = a.ReadLine();
string[] Atokens = Aline.Split(','); //split the line into array
foreach (string s in Atokens)
atempList.Add(s); //add each string in token array to tempList
atempList.Remove(Atokens[0]); //remove Dict Key from tempList
StringBuilder d = new StringBuilder();
if (!Adict.ContainsKey(Atokens[0]))
{
foreach (string s in atempList)
d.Append(s + ","); //rejoin tempList into a string with ","
d.Append("\n"); //add a linebreak to end of templist string
Adict.Add(Atokens[0], d.ToString()); //Add line to dictionary with Key
}
else //Adict does contain key... need to remove Key and add bigger string
{
List<string> removeKey = new List<string>(); //temporary list
foreach (string s in Atokens)
removeKey.Add(s); //create a new list from the token array
removeKey.Remove(Atokens[0]); //remove the key from the removeKey list
atempString = Adict[Atokens[0]]; //temporary string is what's already in dictionary
Adict.Remove(Atokens[0]); //remove the Key + Value from dictionary.
Adict.Add(Atokens[0], d.Append(atempString + Aline + "\n").ToString()); // string.Concat(tempString, ",", line));
}
//section for B
List<string> btempList = new List<string>();
string btempString;
string Bline = b.ReadLine();
string[] Btokens = Bline.Split(',');
foreach (string s in Btokens)
btempList.Add(s);
btempList.Remove(Btokens[0]);
StringBuilder f = new StringBuilder();
if (!Bdict.ContainsKey(Btokens[0]))
{
foreach (string s in btempList)
f.Append(s + ",");
f.Append("\n");
Bdict.Add(Btokens[0], f.ToString());
}
else
{
List<string> removeKey = new List<string>();
foreach (string s in Btokens)
removeKey.Add(s);
removeKey.Remove(Atokens[0]);
btempString = Bdict[Btokens[0]];
Bdict.Remove(Btokens[0]);
Bdict.Add(Btokens[0], f.Append(btempString + Bline + "\n").ToString());
}
}
output.Close();
// Console.ReadLine();
}
}
I am stuck now I dont know how to look through each Dictionary and compare keys, then join (insert?) just the line that has a matching key.
first of all, I think you should make a class to use this.
The class I made for this problem is really simple:
class WbsElement
{
public string PurchasingDocument;
public string PurchaseOrderText;
public string ValCoAreaCrcyA;
public string ValCoAreaCrcyB;
public string RefDocumentNumber;
public string Name;
}
It has some attirbutes that you can use to store the data.
Then I took your code and changed it to this:
private static void Main(string[] args)
{
StreamReader a = new StreamReader(#"A.CSV");
StreamReader b = new StreamReader(#"B.CSV");
StreamWriter output = new StreamWriter(#"output.csv");
Dictionary<string, WbsElement> newDict = new Dictionary<string, WbsElement>();
output.WriteLine(a.ReadLine() + "," + b.ReadLine());
while (!a.EndOfStream && !b.EndOfStream)
{
//section for A
string Aline = a.ReadLine();
string[] Atokens = Aline.Split(','); //split the line into array
if (newDict.ContainsKey(Atokens[0]))
{
newDict[Atokens[0]].PurchasingDocument = Atokens[1];
newDict[Atokens[0]].PurchaseOrderText = Atokens[2];
newDict[Atokens[0]].ValCoAreaCrcyA = Atokens[3];
}
else
{
WbsElement elementToAdd = new WbsElement();
elementToAdd.PurchasingDocument = Atokens[1];
elementToAdd.PurchaseOrderText = Atokens[2];
elementToAdd.ValCoAreaCrcyA = Atokens[3];
newDict.Add(Atokens[0], elementToAdd);
}
}
while (!b.EndOfStream)
{
//section for B
string Bline = b.ReadLine();
string[] Btokens = Bline.Split(',');
if (newDict.ContainsKey(Btokens[0]))
{
newDict[Btokens[0]].RefDocumentNumber = Btokens[1];
newDict[Btokens[0]].ValCoAreaCrcyB = Btokens[2];
newDict[Btokens[0]].Name = Btokens[3];
}
else
{
WbsElement elementToAdd = new WbsElement();
elementToAdd.RefDocumentNumber = Btokens[1];
elementToAdd.ValCoAreaCrcyB = Btokens[2];
elementToAdd.Name = Btokens[3];
newDict.Add(Btokens[0], elementToAdd);
}
}
foreach (KeyValuePair<string, WbsElement> keyValuePair in newDict)
{
output.WriteLine(string.Format("{0},{1},{2},{3},{4},{5},{6},{7}", keyValuePair.Key, keyValuePair.Value.PurchasingDocument,
keyValuePair.Value.PurchaseOrderText, keyValuePair.Value.ValCoAreaCrcyA,
keyValuePair.Key,
keyValuePair.Value.RefDocumentNumber, keyValuePair.Value.ValCoAreaCrcyB,
keyValuePair.Value.Name));
}
output.Close();
// Console.ReadLine();
}
I make a new dictionary that stores the key + one instance of the class I made.
When I find the same key again, I just add the information to the class.
On the end of the application I just flush all the correct data to the output stream.
The class is the key to making this easy.
In case you want it generic for different length of data input, you could use this:
private static void Main(string[] args)
{
StreamReader a = new StreamReader(#"A.CSV");
StreamReader b = new StreamReader(#"B.CSV");
StreamWriter output = new StreamWriter(#"output.csv");
Dictionary<string, List<string>> newDict = new Dictionary<string, List<string>>();
string aLine = a.ReadLine();
int aLength = aLine.Split(',').Count();
output.WriteLine(aLine + "," + b.ReadLine());
while (!a.EndOfStream && !b.EndOfStream)
{
//section for A
string Aline = a.ReadLine();
string[] Atokens = Aline.Split(','); //split the line into array
if (newDict.ContainsKey(Atokens[0]))
{
for (int i = 0; i < Atokens.Length; i++)
{
newDict[Atokens[0]][i] = Atokens[i];
}
}
else
{
List<string> listToAdd = new List<string>();
for (int i = 0; i < Atokens.Length; i++)
{
listToAdd.Add(Atokens[i]);
}
newDict.Add(Atokens[0], listToAdd);
}
}
while (!b.EndOfStream)
{
//section for B
string Bline = b.ReadLine();
string[] Btokens = Bline.Split(',');
if (newDict.ContainsKey(Btokens[0]))
{
if (newDict[Btokens[0]].Count > aLength)
{
for (int i = 0; i < Btokens.Length; i++)
{
newDict[Btokens[0]][i + aLength] = Btokens[i];
}
}
else
{
for (int i = 0; i < Btokens.Length; i++)
{
newDict[Btokens[0]].Add(Btokens[i]);
}
}
}
else
{
List<string> listToAdd = new List<string>(aLength);
listToAdd.AddRange(Btokens);
newDict.Add(Btokens[0], listToAdd);
}
}
foreach (KeyValuePair<string, List<string>> keyValuePair in newDict)
{
string outputLine = string.Empty;
foreach (string s in keyValuePair.Value)
{
if (outputLine != string.Empty)
{
outputLine += ",";
}
outputLine += s;
}
output.WriteLine(outputLine);
}
output.Close();
// Console.ReadLine();
}
It uses a list to keep track of input data.

Categories

Resources