How do I change the space separation to tab separation.
Because [ LD Port 02 CSTID F7SC67: Sorts Complete. ] has a lot of space separation.
So I can’t split through Split !
List<string> add_list = new List<string>();
List<string> ok_list = new List<string>();
IEnumerable<String> lines = File.ReadLines("C:\\Users\\USER\\Downloads\\1231231231.log");
foreach (string line in lines)
{
add_list.Add(line);
}
foreach (string row in add_list)
{
var data = row.Split(' ');
Console.WriteLine(data);
}
Edit (by #chase): OP does not want to split by just spaces - this is why their code is wrong. Their final outcome should be the one mentioned in What I want: - where each item is separated by a comma and each item is clearly distinct. The input to reach this outcome is mentioned in Data:
Try following :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication4
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
List<List<string>> data = GetFixedWidth(FILENAME);
}
static List<List<string>> GetFixedWidth(string filename)
{
int[] startPosition = { 0, 6, 15, 24 };
StreamReader reader = new StreamReader(filename);
string line = "";
int length = 0;
List<List<string>> data = new List<List<string>>();
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if(line.Length > 0)
{
List<string> rowList = new List<string>();
data.Add(rowList);
for (int i = 0; i < startPosition.Length; i++)
{
if (line.Length > startPosition[i])
{
if (i == startPosition.Length - 1)
{
string lastColumn = line.Substring(startPosition[i]);
if (lastColumn.Contains(":"))
{
string[] splitData = line.Substring(startPosition[i]).Split(new char[] { ':' });
foreach (string split in splitData)
{
rowList.Add(split.Trim());
}
}
else
{
rowList.Add(lastColumn);
}
}
else
{
length = startPosition[i + 1] - startPosition[i];
rowList.Add(line.Substring(startPosition[i], length).Trim());
}
}
}
}
}
return data;
}
}
}
Related
I have a text file that displays students names and their scores. The format looks like this:
James Johnson, 85
Robert Jones, 90
Lindsey Parks, 98
etc.
I have 10 names and scores all in the above format. My problem is how do I split the text file by the delimiter, and use the integers from the text file
Here is my code so far:
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using Microsoft.VisualBasic;
using Microsoft.VisualBasic.FileIO;
namespace TextFiles1
{
class Program
{
static void Main(string[] args)
{
StreamReader sr = new StreamReader(#"C:\Users\jonda\Desktop\StudentScores.txt.txt");
string data = sr.ReadLine();
while (data != null)
{
Console.WriteLine(data);
string[] names = data.Split(',');
data = sr.ReadLine();
}
int total = 0;
double average = 0;
for (int index = 0; index < data.Length; index++)
{
total = total + data[index];
}
average = (double)total / data.Length;
Console.WriteLine("Average = " + average.ToString("N2"));
int high = data[0];
for (int index = 0; index < data.Length; index++)
{
if (data[index] > high)
{
high = data[index];
}
}
Console.WriteLine("Highest Score =" + high);
sr.Close();
Console.ReadLine();
}
}
}
First of all, it's a good idea to separate file operations and other operations. File operations are slow and costly, and should be completed as soon as possible. I would use a separate method, read the lines into a List and close the file operation first.
private static List<string> ReadFile(string path)
{
List<string> records = new List<string>();
using (StreamReader sr = new StreamReader(path))
{
while (!sr.EndOfStream)
records.Add(sr.ReadLine());
}
return records;
}
Then I would pass that list to another function and calculate average, max etc.
private static void CalculateAverage(List<string> lines)
{
char[] seperator = new char[] { ',' };
List<int> scores = new List<int>();
if (lines != null && lines.Count > 0)
{
foreach (string line in lines)
{
Console.WriteLine(line);
string[] parts = line.Split(seperator);
int val;
if (int.TryParse(parts[1], out val))
scores.Add(val);
}
}
Console.WriteLine("Average: {0}", scores.Average());
Console.WriteLine("Highest Score: {0}", scores.Max());
}
Then in your main program call the methods like this:
List<string> lines = ReadFile(path);
CalculateAverage(lines);
Use Regex to find each person info and then split each of them and extract Name and Score.
Try like this:
var inputStr = "James Johnson, 85 Robert Jones, 90 Lindsey Parks, 98";
var regex = new Regex(#"[A-z]* [A-z]*, [0-9]*");
return regex.Matches(inputStr)
.OfType<Match>()
.Select(p => p.Value.Split(','))
.Select(p => new { Name = p[0], Score = Convert.ToInt32(p[1].Trim()) });
Result :
I hope to be helpful for you :)
what i'm trying to do here is to delete the longest line from a txt file. Code does it's job, but i also need it to delete multiple "longest lines" and blank lines as well. Any ideas on how to do it?
Code is in C#
namespace _5_2
{
//------------------------------------------------------------
class Program
{
const string CFd = "..\\..\\U1.txt";
const string CFr = "..\\..\\Results.txt";
static void Main(string[] args)
{
int nr;
Read(CFd, out nr);
Print(CFd, CFr, nr);
Console.WriteLine("Longest line nr. {0, 4:d}", nr + 1);
Console.WriteLine("Program done");
}
//------------------------------------------------------------
/** Finds number of the longest line.
#param fv - text file name
#param nr - number of the longest line */
//------------------------------------------------------------
static void Read(string fv, out int nr)
{
string[] lines = File.ReadAllLines(fv, Encoding.GetEncoding(1257));
int ilgis = 0;
nr = 0;
int nreil = 0;
foreach (string line in lines)
{
if (line.Length > ilgis)
{
ilgis = line.Length;
nr = nreil;
}
nreil++;
}
}
static void Print(string fv, string fvr, int nr)
{
string[] lines = File.ReadAllLines(fv, Encoding.GetEncoding(1257));
int nreil = 0;
using (var fr = File.CreateText(fvr))
{
foreach (string line in lines)
{
if (nr != nreil)
{
fr.WriteLine(line);
}
nreil++;
}
}
}
}
}
I would suggest using LINQ. Take advantage of the .Max extension method and iterate over the string array.
string[] lines = { "1", "two", "three" };
var longestLine = lines.Max(line => line.Length);
var revised = lines.Where(line => line.Length < longestLine).ToArray();
The revised variable will contain a string array that excludes the lines with the longest line count.
Read lines, filter out empty lines and the 10 longest lines, write lines:
string[] lines = File.ReadAllLines(inputFile, Encoding.GetEncoding(1257));
var filtered = lines
.Where(line => line.Length > 0) // remove all empty lines
.Except(lines.OrderByDescending(line => line.Length).Take(10)); // remove 10 longest lines
File.WriteAllLines(outputFile, filtered);
You could identify the longest line, and then loop through the list, deleting all of that length. To also delete empty ones, you could test against String.IsNullOrWhiteSpace.
Something like (pseudocode):
foreach (string line in lines)
{
if (String.IsNullOrWhiteSpace(line))
{
lines.Delete(line);
Continue;
}
if (line.Length >= longestLine) // ">=" instead of "==" just to be on the safe side
{
lines.Delete(line);
}
}
I have an initial file containing lines such as:
34 964:0.049759 1123:0.0031 2507:0.015979
32,48 524:0.061167 833:0.030133 1123:0.002549
34,52 534:0.07349 698:0.141667 1123:0.004403
106 389:0.013396 417:0.016276 534:0.023859
The first part of a line is the class number. A line can have several classes.
For each class, I create a new file.
For instance for class 34 the resulting file will be :
+1 964:0.049759 1123:0.0031 2507:0.015979
-1 524:0.061167 833:0.030133 1123:0.002549
+1 534:0.07349 698:0.141667 1123:0.004403
-1 389:0.013396 417:0.016276 534:0.023859
For class 106 the resulting file will be :
-1 964:0.049759 1123:0.0031 2507:0.015979
-1 524:0.061167 833:0.030133 1123:0.002549
-1 534:0.07349 698:0.141667 1123:0.004403
+1 389:0.013396 417:0.016276 534:0.023859
The problem is I have 13 files to write for 200 class.
I already ran a less optimized version of my code and it took several hours.
With my code below it takes 1 hour to generate the 2600 files.
Is there a way to perform such a replacement in a faster way? Are regex a viable option?
Below is my implementation (works on LINQPAD with this data file)
static void Main()
{
const string filePath = #"C:\data.txt";
const string generatedFilesFolderPath = #"C:\";
const string fileName = "data";
using (new TimeIt("Whole process"))
{
var fileLines = File.ReadLines(filePath).Select(l => l.Split(new[] { ' ' }, 2)).ToList();
var classValues = GetClassValues();
foreach (var classValue in classValues)
{
var directoryPath = Path.Combine(generatedFilesFolderPath, classValue);
if (!Directory.Exists(directoryPath))
Directory.CreateDirectory(directoryPath);
var classFilePath = Path.Combine(directoryPath, fileName);
using (var file = new StreamWriter(classFilePath))
{
foreach (var line in fileLines)
{
var lineFirstPart = line.First();
string newFirstPart = "-1";
var hashset = new HashSet<string>(lineFirstPart.Split(','));
if (hashset.Contains(classValue))
{
newFirstPart = "+1";
}
file.WriteLine("{0} {1}", newFirstPart, line.Last());
}
}
}
}
Console.Read();
}
public static List<string> GetClassValues()
{
// In real life there is 200 class values.
return Enumerable.Range(0, 2).Select(c => c.ToString()).ToList();
}
public class TimeIt : IDisposable
{
private readonly string _name;
private readonly Stopwatch _watch;
public TimeIt(string name)
{
_name = name;
_watch = Stopwatch.StartNew();
}
public void Dispose()
{
_watch.Stop();
Console.WriteLine("{0} took {1}", _name, _watch.Elapsed);
}
}
The output:
Whole process took 00:00:00.1175102
EDIT: I also ran a profiler and it looks like the split method is the hottest spot.
EDIT 2: Simple example:
2,1 1:0.8 2:0.2
3 1:0.4 3:0.6
12 1:0.02 4:0.88 5:0.1
Expected output for class 2:
+1 1:0.8 2:0.2
-1 1:0.4 3:0.6
-1 1:0.02 4:0.88 5:0.1
Expected output for class 3:
-1 1:0.8 2:0.2
+1 1:0.4 3:0.6
-1 1:0.02 4:0.88 5:0.1
Expected output for class 4:
-1 1:0.8 2:0.2
-1 1:0.4 3:0.6
-1 1:0.02 4:0.88 5:0.1
I have eliminated the hottest paths from your code by removing the split and using a bigger buffer on the FileStream.
Instead of Split I now call ToCharArray and then parse the first Chars to the first space and while I'm at it a match with classValue on a char by char basis is performed. The boolean found indicates an exact match for anything before the , of the first space. The rest of the handling is the same.
var fsw = new FileStream(classFilePath,
FileMode.Create,
FileAccess.Write,
FileShare.None,
64*1024*1024); // use a large buffer
using (var file = new StreamWriter(fsw)) // use the filestream
{
foreach(var line in fileLines) // for( int i = 0;i < fileLines.Length;i++)
{
char[] chars = line.ToCharArray();
int matched = 0;
int parsePos = -1;
bool takeClass = true;
bool found = false;
bool space = false;
// parse until space
while (parsePos<chars.Length && !space )
{
parsePos++;
space = chars[parsePos] == ' '; // end
// tokens
if (chars[parsePos] == ' ' ||
chars[parsePos] == ',')
{
if (takeClass
&& matched == classValue.Length)
{
found = true;
takeClass = false;
}
else
{
// reset matching
takeClass = true;
matched = 0;
}
}
else
{
if (takeClass
&& matched < classValue.Length
&& chars[parsePos] == classValue[matched])
{
matched++; // on the next iteration, match next
}
else
{
takeClass = false; // no match!
}
}
}
chars[parsePos - 1] = '1'; // replace 1 in front of space
var correction = 1;
if (parsePos > 1)
{
// is classValue before the comma (or before space)
if (found)
{
chars[parsePos - 2] = '+';
}
else
{
chars[parsePos - 2] = '-';
}
correction++;
}
else
{
// is classValue before the comma (or before space)
if (found)
{
// not enough space in the array, write a single char
file.Write('+');
}
else
{
file.Write('-');
}
}
file.WriteLine(chars, parsePos - correction, chars.Length - (parsePos - correction));
}
}
Instead of iterating over the un-parsed lines 200 times, how about parsing the lines upfront into a data structure then iterating over that 200 times? This should minimize the numer of string manipulation operations.
Also using StreamReader instead of File.ReadLines, so the entire file is not in memory twice -- once as string[] and another time as Detail[].
static void Main(string[] args)
{
var details = ReadDetail("data.txt").ToArray();
var classValues = Enumerable.Range(0, 10).ToArray();
foreach (var classValue in classValues)
{
// Create file/directory etc
using (var file = new StreamWriter("out.txt"))
{
foreach (var detail in details)
{
file.WriteLine("{0} {1}", detail.Classes.Contains(classValue) ? "+1" : "-1", detail.Line);
}
}
}
}
static IEnumerable<Detail> ReadDetail(string filePath)
{
using (StreamReader reader = new StreamReader(filePath))
{
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
int separator = line.IndexOf(' ');
Detail detail = new Detail
{
Classes = line.Substring(0, separator).Split(',').Select(c => Int32.Parse(c)).ToArray(),
Line = line.Substring(separator + 1)
};
yield return detail;
}
}
}
public class Detail
{
public int[] Classes { get; set; }
public string Line { get; set; }
}
Here is my code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.IO;
namespace DataAnalysis
{
class Data
{
public int InTime;
public string InLocation;
public bool Direction;
public int LOS_F;
// create a Data object from a CSV format string.
static Data FromString(string line)
{
var fields = line.split(",");
return new Data
{
InTime = TimeSpan.Parse(fields[3]),
InLocation = fields[5],
Direction = fields[5][0], // to get the direction E/N/S/W
LOS_F = float.Parse(fields[16])
};
}
}
class Program
{
string[] directions = new string[] { "E", "N", "S", "W" };
static void Main(string[] args)
{
var path = #"C:\Documents and Settings\Siva-Admin\Desktop\5.5 Capacity Models\";
// ^--- No need to escape the backslashes
var subdirs = Directory.GetDirectories(path);
// The subdirs variable contains the FULL paths
foreach (string subdir in subdirs)
{
List<List<float>> allAvgs = new List<List<float>>();
using (StreamWriter compiled = new StreamWriter(
Path.Combine(subdir, "compiledresults.csv")))
{
compiled.Write("heading,EastAvg,NorthAvg,SouthAvg,WestAvg");
for (int i = 1; i <= 10; i++)
{
List<Data> info = new List<Data>();
using (StreamReader reader = new StreamReader(
Path.Combine(subdir, "results" + i.ToString() + #"\JourneyTimes.csv")))
{
// Read the header line first!
string line = reader.ReadLine();
while ((line = reader.ReadLine()) != null)
info.Add(Data.FromString(line));
}
List<float> avgs = new List<float>();
for (string dir in directions)
{
List<Data> perDirection = info.Where(d => d.Direction = dir) as List<Data>;
float sum = perDirection.Sum(d => d.LOS_F);
float average = sum / perDirection.Count();
avgs.Add(average);
}
allAvgs.Add(avgs);
compiled.Write("results" + i.ToString() + "," + string.Join(",", avgs) + "\n");
}
compiled.Write("scenario_average");
for (int j = 1; j <= 4; j++)
{
compiled.Write("," + allAvgs.Sum(d => d[0]) / allAvgs.Count());
}
}
}
}
}
}
I get the following errors:
Error 1; expected(Line 67, "for (string dir in directions)" )
Error 2; expected(LINE 67, " " )
Error 3 'string' does not contain a definition for 'split' and no extension method 'split' accepting a first argument of type 'string' could be found (are you missing a using directive or an assembly reference?) (LINE 20, var fields = line.split (","); )
I do not understand the need for the ; as these are arguments being passed are they not? I also do not understand why I cannot split a string.
Error 1 and 2: Your for loop should be a foreach loop:
foreach (string dir in directions)
Error 3: the S in split should be upper case:
var fields = line.Split(',');
for (string dir in directions) should be foreach (string dir in directions)
EDIT to add:
Addtionally, here:
List<Data> perDirection = info.Where(d => d.Direction = dir) as List<Data>;
You seem to be making an assignment instead of a equality check, i.e
d.Direction = dir should be d.Direction == dir
However, it still won't work since you are comparing a string to a bool.
You need to replace line.split(",") with line.Split(',') (note the uppercase S).
(also replace for with foreach in line 67 as others have pointed out).
Error1 & Error2 use foreach instead of for;
Erorr3 it's Split instead of split.
I'm trying to read a text file which contains numbers separated by a comma. When I read using File.Readline() I get it to a string[]. I need to convert it in to a int array but it gives an error.
The contents of the text file:
146429,143689,144380,141523,139572,136930,133714,130011,125843,121110,115974,110363,104367,97909,91245,84218,77008,69626,62058,54445,46942,39436,32146,24932,18359,12601,9039,9776,13638,18314,23221,27728,32142,35941,39577,42657,45692,48180
My code:
while ((line = sr.ReadLine()) != null)
{
string[] values = line.Split(new string[] { " , " }, StringSplitOptions.None);
for (int i = 0; i < values.Length; i++)
{
// Console.WriteLine(values[i]);
valArr[LineCount][i] = Convert.ToInt64(values[i]); // error
}
LineCount++;
}
I think this is what you are after:
static void Main(string[] args)
{
var sr = new StreamReader(#"d:\test.txt");
long[] data = ExtractData(sr).ToArray();
}
private static IEnumerable<long> ExtractData(StreamReader sr)
{
string line;
while ((line = sr.ReadLine()) != null)
{
var items = line.Split(',');
foreach (var item in items)
{
yield return Convert.ToInt64(item);
}
}
}
With my test file (d:\test.txt) holding:
1,2,3,4,5
1,2,3,4
I get the array containing:
1,2,3,4,5,1,2,3,4
EDIT
As Monroe pointed out, I missed the fact you wanted an array of arrays. Here's another version that gives such a jagged array. Still keeping yield in though ;)
static void Main(string[] args)
{
var sr = new StreamReader(#"d:\test.txt");
var data = ExtractData(sr).ToArray();
}
private static IEnumerable<long[]> ExtractData(StreamReader sr)
{
string line;
while ((line = sr.ReadLine()) != null)
{
yield return line.Split(',').Select(i => Convert.ToInt64(i)).ToArray();
}
}
Using List can help you, and use StringSplitOptions.RemoveEmptyEntries to prevent null exception in Convert.ToInt64
var lineArray = new List<List<Int64>>();
foreach (var lineString in File.ReadAllLines("path"))
{
var line = new List<Int64>();
string[] values = lineString.Split(new[] { ',', ' ' },
StringSplitOptions.RemoveEmptyEntries);
line.AddRange(values.Select(t => Convert.ToInt64(t)));
lineArray.Add(line);
}
and using it:
// Array of numbers for specific line
var resultArray = lineArray[lineNumber].ToArray();
It should be something nearly to:
int yourIntArray =
new List<string>(RealAllText(fileName)
.Split(new char[] {',', '\r', '\n', ' '},
stringSplitOptions.RemoveEmptyEntries))
.ConvertAll<int>(s => int.Parse(s))
.ToArray();
Of course, for more robustness, int.Parse should be replaced with some method that will handle errors in numbers, and so on...
EDIT:
Oh, I just saw that you have lines that are another index to your array... Well then, this code should be modified in that case, but I'll leave that to you.
I think this is what you want to do. Only thing is that your vallArr should be a two dimensional array that can keep all values. Something like var valArr = new Int64[x, y];
while ((line = sr.ReadLine()) != null)
{
string[] values = line.Split(new string[] { " , " }, StringSplitOptions.None);
for (int i = 0; i < values.Length; i++)
{
valArr[LineCount, i] = Int64.Parse(values[i]);
}
LineCount++;
}