I have a text file that displays students names and their scores. The format looks like this:
James Johnson, 85
Robert Jones, 90
Lindsey Parks, 98
etc.
I have 10 names and scores all in the above format. My problem is how do I split the text file by the delimiter, and use the integers from the text file
Here is my code so far:
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using Microsoft.VisualBasic;
using Microsoft.VisualBasic.FileIO;
namespace TextFiles1
{
class Program
{
static void Main(string[] args)
{
StreamReader sr = new StreamReader(#"C:\Users\jonda\Desktop\StudentScores.txt.txt");
string data = sr.ReadLine();
while (data != null)
{
Console.WriteLine(data);
string[] names = data.Split(',');
data = sr.ReadLine();
}
int total = 0;
double average = 0;
for (int index = 0; index < data.Length; index++)
{
total = total + data[index];
}
average = (double)total / data.Length;
Console.WriteLine("Average = " + average.ToString("N2"));
int high = data[0];
for (int index = 0; index < data.Length; index++)
{
if (data[index] > high)
{
high = data[index];
}
}
Console.WriteLine("Highest Score =" + high);
sr.Close();
Console.ReadLine();
}
}
}
First of all, it's a good idea to separate file operations and other operations. File operations are slow and costly, and should be completed as soon as possible. I would use a separate method, read the lines into a List and close the file operation first.
private static List<string> ReadFile(string path)
{
List<string> records = new List<string>();
using (StreamReader sr = new StreamReader(path))
{
while (!sr.EndOfStream)
records.Add(sr.ReadLine());
}
return records;
}
Then I would pass that list to another function and calculate average, max etc.
private static void CalculateAverage(List<string> lines)
{
char[] seperator = new char[] { ',' };
List<int> scores = new List<int>();
if (lines != null && lines.Count > 0)
{
foreach (string line in lines)
{
Console.WriteLine(line);
string[] parts = line.Split(seperator);
int val;
if (int.TryParse(parts[1], out val))
scores.Add(val);
}
}
Console.WriteLine("Average: {0}", scores.Average());
Console.WriteLine("Highest Score: {0}", scores.Max());
}
Then in your main program call the methods like this:
List<string> lines = ReadFile(path);
CalculateAverage(lines);
Use Regex to find each person info and then split each of them and extract Name and Score.
Try like this:
var inputStr = "James Johnson, 85 Robert Jones, 90 Lindsey Parks, 98";
var regex = new Regex(#"[A-z]* [A-z]*, [0-9]*");
return regex.Matches(inputStr)
.OfType<Match>()
.Select(p => p.Value.Split(','))
.Select(p => new { Name = p[0], Score = Convert.ToInt32(p[1].Trim()) });
Result :
I hope to be helpful for you :)
Related
I'm writing something in C# and I need to find a way to split a text file into more files with the number of lines in the file being equivalent to a user input.
Example : file a had 1000 lines in it and I want the code to ask the user for a number and then use that number to make more files like this
a = 1000 lines .
Then after the code has run with the input of 300
a = 300 lines
b = 300 lines
c = 300 lines
d = 300 lines
e = 300 lines
Repeat that until the original file has been split into more files all with 300 lines .
This is what I have so far
var file = File.ReadAllLines(ofd.FileName);
Console.Write("> ");
int userlinestosplit = int.Parse(Console.ReadLine());
ArrayList fileA = new ArrayList();
for (int i = 0; i < userlinestosplit; i++)
{
string line = file[i];
fileA.Add(line);
}
int linesleft = file.Length - userlinestosplit;
ArrayList fileB = new ArrayList();
for (int i = linesleft; i < file.Length; i++)
{
string line = file[i];
fileB.Add(line);
}
string[] fileAArr = (string[])fileA.ToArray(typeof(string));
string[] fileBArr = (string[])fileB.ToArray(typeof(string));
string resdir = "results";
string modir = "splited";
Directory.CreateDirectory(resdir);
Directory.SetCurrentDirectory(resdir);
Directory.CreateDirectory(modir);
Directory.SetCurrentDirectory(modir);
File.WriteAllLines("FA.txt", fileAArr);
File.WriteAllLines("FB.txt", fileBArr);
Console.ReadKey();
Any help would be greatly appreciated
Here's a way to do it using streams. This has the benefit of not needing to read it all into memory at once, allowing it to work on very large files.
Console.Write("> ");
var maxLines = int.Parse(Console.ReadLine());
var filename = ofd.FileName;
var fileStream = File.OpenRead(filename);
var readStream = new StreamReader(fileStream);
var nameBase = filename[0..^4]; //strip .txt
var parts = 1;
var notfinished = true;
while (notfinished)
{
var part = File.OpenWrite($"{nameBase}-{parts}.txt");
var writer = new StreamWriter(part);
for (int i = 0; i < maxLines; i++)
{
writer.WriteLine(readStream.ReadLine());
if (readStream.EndOfStream)
{
notfinished = false;
break;
}
}
writer.Close();
parts++;
}
Console.WriteLine($"Done splitting the file into {parts} parts.");
Splitting text file to multiple parts
public void SplitFile(string inputFile, int size, string path)
{
int index = 0;
string s = string.Empty;
using (StreamReader sr = File.OpenText(inputFile))
{
while (true)
{
if (sr.EndOfStream) break;
using (StreamWriter output = new StreamWriter($"{path}\\part{index}.txt", false, Encoding.UTF8))
{
int linesRead = 0;
while ((s = sr.ReadLine()) != null && linesRead < size)
{
output.WriteLine(s);
linesRead++;
}
}
index++;
}
}
}
How to use:
var inputFile = "test.txt";
int size =300;
SplitFile(inputFile, size, "c:\\data");
How do I change the space separation to tab separation.
Because [ LD Port 02 CSTID F7SC67: Sorts Complete. ] has a lot of space separation.
So I can’t split through Split !
List<string> add_list = new List<string>();
List<string> ok_list = new List<string>();
IEnumerable<String> lines = File.ReadLines("C:\\Users\\USER\\Downloads\\1231231231.log");
foreach (string line in lines)
{
add_list.Add(line);
}
foreach (string row in add_list)
{
var data = row.Split(' ');
Console.WriteLine(data);
}
Edit (by #chase): OP does not want to split by just spaces - this is why their code is wrong. Their final outcome should be the one mentioned in What I want: - where each item is separated by a comma and each item is clearly distinct. The input to reach this outcome is mentioned in Data:
Try following :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication4
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
List<List<string>> data = GetFixedWidth(FILENAME);
}
static List<List<string>> GetFixedWidth(string filename)
{
int[] startPosition = { 0, 6, 15, 24 };
StreamReader reader = new StreamReader(filename);
string line = "";
int length = 0;
List<List<string>> data = new List<List<string>>();
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if(line.Length > 0)
{
List<string> rowList = new List<string>();
data.Add(rowList);
for (int i = 0; i < startPosition.Length; i++)
{
if (line.Length > startPosition[i])
{
if (i == startPosition.Length - 1)
{
string lastColumn = line.Substring(startPosition[i]);
if (lastColumn.Contains(":"))
{
string[] splitData = line.Substring(startPosition[i]).Split(new char[] { ':' });
foreach (string split in splitData)
{
rowList.Add(split.Trim());
}
}
else
{
rowList.Add(lastColumn);
}
}
else
{
length = startPosition[i + 1] - startPosition[i];
rowList.Add(line.Substring(startPosition[i], length).Trim());
}
}
}
}
}
return data;
}
}
}
As input, I have a csv file of approximately 1,000,000 lines (about 300Mb) which contains columns separated by semi-colons.
colA;colB;colC;colD;colE
aaaa;bbbb;cccc;dddd;"eeee;"
aaaa;bbbb;cccc;dddd;evfvdfeee
aaaa;bb1bb;cc2cc;dd3dd;evfve
Some of the fields on the 5th column may have a semi-colon. Not all of them though. So, I want to remove all semi-colons after the 4th occurrence. The code below works, but it takes ages (approx. 10min) to save the csv to the file. How can I speed up this?
void Main()
{
string finput = #"myfile.csv";
FileInfo finfo = new FileInfo(finput);
var Lines = File.ReadLines(finfo.FullName);
List<string> output = new List<string>();
Stopwatch sw = new Stopwatch();
sw.Start();
int count = 0;
foreach (string s in Lines)
{
count++;
if (count % 10000 == 0)
count.Dump();
output.Add((StringExtender.ReplaceAfterNthOccurrency(s, ";", ".", 5)));
}
sw.Elapsed.Dump();
File.WriteAllLines(finfo.DirectoryName + finfo.Name + "_conv.csv", output);
}
public static class StringExtender
{
public static string ReplaceAfterNthOccurrency(string input, string to_replace, string to_add, int n)
{
var cont = true;
int count = 0;
int start = 0;
while (cont)
{
int i = input.IndexOf(to_replace, start);
if (i != -1)
{
count++;
start = i + 1;
if (count >= n)
{
input = input.Remove(i, 1);
input = input.Insert(i, to_add);
}
}
else
cont = false;
}
return input;
}
}
I have a hard time figuring out how to remove extra letters using Regex.
I have this example below that says that it has 42 of "|" (vertical bars) per line.
|V.7|42|
1|0|1|58|4|4|351|25|8|||1|0||6|3|1000|49|20|430|17|6|0|10|0|1200|25||30|20|20|20|20|0|100|61028|1|0|0|1|1|0|
1|0|1|58|4|4|351|25|8|||1|0||6|3|1000|49|20|430|17|6|0|10|0|1200|25||30|20|20|20|20|0|100|61028|1|0|0|1|1|0|
2|543|2|58|4|4|366|26|9|100||2|200||8|3|1000|49|20|430|17|6|10|21|54|2400|36||30|20|20|20|20|543|150|61028|2|100|1|2|2|0|
3|1230|3|60|5|5|390|26|10|100||3|1500||10|3|1000|49|20|430|17|6|10|32|123|4800|46||30|20|20|20|20|1230|200|61028|3|1000|2|3|3|0|
4|2002|4|61|6|6|424|27|12|100||4|6000||12|4|769|37|15|315|12|4|10|45|200|9600|57||30|20|20|20|20|2002|250|61028|4|5000|3|4|4|0|
5|3306|5|63|7|7|468|29|14|100||5|18000||16|4|556|27|11|208|8|2|10|58|331||69||30|20|20|20|20|3306|300|61027|1|10000|4|5|5|0|
6|4950|6|66|8|8|522|31|17|100||6|||18|4|435|21|9|147|6|1|10|74|495||80||30|20|20|20|20|4950|350|61027|2|30000|5|6|6|0|
7|6947|7|69|10|10|585|33|20|100||7|||20|4|333|17|7|97|4|1|10|90|695||92||20|15|15|15|15|6947|400|61027|3|50000|6|7|7|0|
8|9309|8|73|12|12|658|35|24|100||8|||24|4|286|14|6|73|3|1|10|109|931||105||20|15|15|15|15|9309|450|61026|1|100000|7|8|8|0|
9|12050|9|77|14|14|741|38|28|100||9|||27|5|250|13|5|55|3|1|10|129|1205||117||20|15|15|15|15|12050|500|61026|2|300000|8|9|9|0|
10|15183|10|82|16|16|834|41|33|100|100|10|||29|5|222|11|4|0|0|0|10|151|1366||130|5|20|15|15|15|15|15183|550|61025|1|500000|9|10|10|0|
11|18720|11|87|19|19|936|45|38|100|100|11|||31|5|200|10|4|0|0|0|11|176|1685||143|10|20|15|15|15|15|18720|600|||||||0|
12|21335|12|92|22|22|1048|48|44|100|100|12|||36|5|182|9|4|0|0|0|12|203|2134||157|15|10|15|10|10|10|21335|650|||||||0|
Now I have another one with 45, what I want is to remove the new letters so that it has exactly 42 vertical bars like above.
|V.8|45|
1|0|1|58|4|4|351|25|8|||1|0||6|3|1000|49|20|430|17|6|0|10|0|1200|25||30|20|20|20|20|0|100|61028|1|0|0|1|1|0|5000|40022|1|
2|543|2|58|4|4|366|26|9|100||2|200||8|3|1000|49|20|430|17|6|10|21|54|2400|36||30|20|20|20|20|543|150|61028|2|100|1|2|2|0|25000|61034|1|
3|1230|3|60|5|5|390|26|10|100||3|1500||10|3|1000|49|20|430|17|6|10|32|123|4800|46||30|20|20|20|20|1230|200|61028|3|1000|2|3|3|0|75000|40250|1|
4|2002|4|61|6|6|424|27|12|100||4|6000||12|4|769|37|15|315|12|4|10|45|200|9600|57||30|20|20|20|20|2002|250|61028|4|5000|3|4|4|0|160000|61035|1|
5|3306|5|63|7|7|468|29|14|100||5|18000||16|4|556|27|11|208|8|2|10|58|331||69||30|20|20|20|20|3306|300|61027|1|10000|4|5|5|0|300000|40355|3|
6|4950|6|66|8|8|522|31|17|100||6|||18|4|435|21|9|147|6|1|10|74|495||80||30|20|20|20|20|4950|350|61027|2|30000|5|6|6|0||||
7|6947|7|69|10|10|585|33|20|100||7|||20|4|333|17|7|97|4|1|10|90|695||92||20|15|15|15|15|6947|400|61027|3|50000|6|7|7|0||||
8|9309|8|73|12|12|658|35|24|100||8|||24|4|286|14|6|73|3|1|10|109|931||105||20|15|15|15|15|9309|450|61026|1|100000|7|8|8|0||||
9|12050|9|77|14|14|741|38|28|100||9|||27|5|250|13|5|55|3|1|10|129|1205||117||20|15|15|15|15|12050|500|61026|2|300000|8|9|9|0||||
10|15183|10|82|16|16|834|41|33|100|100|10|||29|5|222|11|4|0|0|0|10|151|1366||130|5|20|15|15|15|15|15183|550|61025|1|500000|9|10|10|0||||
11|18720|11|87|19|19|936|45|38|100|100|11|||31|5|200|10|4|0|0|0|11|176|1685||143|10|20|15|15|15|15|18720|600|||||||0||||
12|21335|12|92|22|22|1048|48|44|100|100|12|||36|5|182|9|4|0|0|0|12|203|2134||157|15|10|15|10|10|10|21335|650|||||||0||||
And I have this code at the moment:
public string Fix(string FileName, int columnsCount)
{
var InputFile = File.ReadLines(FileName).Skip(1).ToArray();
string Result = "";
for(int i = 0; i < InputFile.Length; i++)
{
int FoundMatches = Regex.Matches(Regex.Escape(InputFile[i]), FindWhatTxtBox.Text).Count;
// If too many letters found, trim the rest.
if(FoundMatches > CountTxtBox.Text.Length)
{
string CurrentLine = InputFile[i];
}
}
return Result;
}
As you can see each line has either one to no numbers inside its vertical bar. How can I remove the extra letters?
Do you have to use a RegEx? It can also be done with string manipulation like this:
using System;
using System.Linq;
public class Program
{
public static void Main()
{
string s = "1|0|1|58|4|4|351|25|8|||1|0||6|3|1000|49|20|430|17|6|0|10|0|1200|25||30|20|20|20|20|0|100|61028|1|0|0|1|1|0|5000|40022|1|";
var arr = s.Split('|') ;
var retVal = String.Join("|", arr.Take(43));
Console.WriteLine(retVal);
}
}
It takes 43 because the 1st digit seems a counter to me... But you can make it 42 of course. Beware that this code will fail is there are less than 43 entries to work with.
Too simple to use Regex. See code below :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
const string INPUT_FILENAME = #"c:\temp\test.txt";
const string OUTPUT_FILENAME = #"c:\temp\test1.txt";
static void Main(string[] args)
{
StreamReader reader = new StreamReader(INPUT_FILENAME);
StreamWriter writer = new StreamWriter(OUTPUT_FILENAME);
string inputLine = "";
int lineCount = 0;
while ((inputLine = reader.ReadLine()) != null)
{
if (++lineCount == 1)
{
writer.WriteLine(inputLine);
}
else
{
string[] inputArray = inputLine.Split(new char[] {'|'});
writer.WriteLine(string.Join("|", inputArray.Take(43)));
}
}
reader.Close();
writer.Flush();
writer.Close();
}
}
}
Here is a data file, let us keep it easy by only needing 5 items but still using Regex.
Keep your examples small for StackOverflow...one will get more answers.
The below code can be changed to 42 ({0,42}) or any number as needed, but the example will read then write out only 5.
Data File
1|2|3|4|5|6|7|8|9|10
10|9|8|7|6|5|4|3|2|1|0|1|
||||||||||||11|12|
Code To get 0 to 5 Items per line
var data = File.ReadAllText(#"C:\Temp\test.txt");
string pattern = #"^(\d*\|){0,5}";
File.WriteAllLines(#"C:\Temp\testOut.txt",
Regex.Matches(data, pattern, RegexOptions.Multiline)
.OfType<Match>()
.Select(mt => mt.Groups[0].Value));
Resultant File
1|2|3|4|5|
10|9|8|7|6|
|||||
Using C#, write an algorithm to find the three longest unique palindromes in a string. For the three longest palindromes, report the palindrome text, start index and length in descending order of length. For example, the output for string,
sqrrqabccbatudefggfedvwhijkllkjihxymnnmzpop
should be:
Text: hijkllkjih, Index: 23, Length: 10 Text: defggfed, Index: 13, Length: 8 Text: abccba, Index: 5 Length: 6
Now I got to the part where I can write out the palindromes and its length but I have a problem with the index. Need help on how to include the index of the palindrome and how to get unique lengths
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string inputString = "sqrrqabccbatudefggfedvwhijkllkjihxymnnmzpop";
string currentStr = string.Empty;
List<string> listOfPalindromes = new List<string>();
char[] inputStrArr = inputString.ToCharArray();
for (int i = 0; i < inputStrArr.Length; i++)
{
for (int j = i+1; j < inputStrArr.Length; j++)
{
currentStr = inputString.Substring(i, j - i + 1);
if (IsPalindrome(currentStr))
{
listOfPalindromes.Add(currentStr);
}
}
}
var longest = (from str in listOfPalindromes
orderby str.Length descending
select str).Take(3);
foreach (var item in longest)
{
Console.WriteLine("Text: " + item.ToString() + " Index: " + + " Length: " + item.Length.ToString());
}
}
private static bool IsPalindrome(String str)
{
bool IsPalindrome = true;
if (str.Length > 0)
{
for (int i = 0; i < str.Length / 2; i++)
{
if (str.Substring(i, 1) != str.Substring(str.Length - (i + 1), 1))
{
IsPalindrome = false;
}
}
}
else
{
IsPalindrome = false;
}
return IsPalindrome;
}
}
}
ok now that's out of the way how do I get distinct lengths? can it be done by using DISTINCT or do I need to edit something else?
You need to store more information when a palindrome is found.
First define a class:
class PalindromeResult
{
public string Text { get; set; }
public int Index { get; set; }
}
Then instead of your List<string>, create a list of this class:
List<PalindromeResult> listOfPalindromes = new List<PalindromeResult>();
When a result is found, ad it like this
if (IsPalindrome(currentStr))
{
listOfPalindromes.Add(new PalindromeResult { Text = currentStr, Index = i});
}
You would have to update your sorting and printing accordingly.
The most optimal solution (as pointed out by Sinatr) would be to store the index of the palindromes as you find them.
You could instead use the IndexOf function to find the index of the first occurrence of a substring within a string.
For example inputString.IndexOf(item) could be used in your Console.WriteLine function.
Try This
public static bool IsPalindromic(int l)
{
IEnumerable<char> forwards = l.ToString().ToCharArray();
return forwards.SequenceEqual(forwards.Reverse());
}
public int LongestPalindrome(List<int> integers)
{
int length=0;
int num;
foreach (var integer in integers)
{
if (integer.ToString().Length > length)
{
num = integer;
length = integer.ToString().Length;
}
}
return num;
}
public void MyFunction(string input)
{
var numbers = Regex.Split(input, #"\D+").ToList();
var allPalindromes = (from value in numbers where !string.IsNullOrEmpty(value) select int.Parse(value) into i where IsPalindromic(i) select i).ToList();
if (allPalindromes.Count>0)
Console.WriteLine(LongestPalindrome(allPalindromes));
else
Console.WriteLine("Any Palindrome number was found");
}
you cans mix the 2 two functions to have a beautiful code but i done it like this to simplify.