Character counting with a Linked List - c#

I am trying to Design a program that reads an ASCII text file one byte at a time and outputs the frequency count of how many times each character appears.
The output does not need to be sorted. The character frequency objects need to be processed and stored using a Linked List.
I know that my character frequency class is okay, however I cannot figure out how to count the characters. This is the code I have so far, but my counts end up as 0.
LinkedList<CharacterFrequency> list = new LinkedList<CharacterFrequency>();
CharacterFrequency cf = new CharacterFrequency();
StreamReader file = new StreamReader(#"input.txt");
var ch = -1;
while ((ch = file.Read ()) !=-1)
{
LinkedListNode<CharacterFrequency> node;
node = list.Find(cf);
if (list.Any())
{
cf.Increment();
}
if (node != null)
{
list.AddLast(cf);
}
}
Console.WriteLine(cf.ToString());
file.Close();
//and here is my character frequency class;
class CharacterFrequency
{
private Char m_char;
private int m_count;
public CharacterFrequency()
{
m_char = '\0';
m_count = 0;
}
public CharacterFrequency(Char ch)
{
m_char = ch;
m_count = 0;
}
public CharacterFrequency(Char ch, int count)
{
m_char = ch;
m_count = count;
}
public CharacterFrequency(CharacterFrequency cf)
{
m_char = cf.Character;
m_count = cf.Count;
}
public Char Character
{
get
{
return m_char;
}
set
{
m_char = value;
}
}
public int Count
{
get
{
return m_count;
}
set
{
if (m_count > value)
m_count = value;
}
}
public void Increment()
{
m_count++;
}
public override string ToString()
{
return String.Format("{0}({1}) {2}", m_char, (int)m_char, m_count);
}
public override int GetHashCode()
{
return (int)m_char;
}
public override bool Equals(object obj)
{
if (obj == null)
return false;
if (obj == this)
return true;
if (!(obj.GetType() == GetType()))
return false;
CharacterFrequency rhs = (CharacterFrequency)obj;
return this.Character == rhs.Character;
}
}
Updated code (I have not been able to work through the issue with the increment method yet):
class Program
{
static void Main(string[] args)
{
int ch;
const int Input_File = 0;
const int Output_File = 1;
StreamReader reader = null;
StreamWriter writer = null;
//linked list declaration
LinkedList<CharacterFrequency> list =
new LinkedList<CharacterFrequency>();
//read the characters from the input file
reader = new StreamReader(File.OpenRead(args[Input_File]));
while ((ch = reader.Read ()) !=-1)//casts to integer
{
CharacterFrequency cf = LinkedList<CharacterFrequency>list(ch); //attempt to create linked list from character frequency BIG ISSUES HERE!
LinkedListNode<CharacterFrequency> node; //attempt to create node
node = list.Find(cf); // find method
if (node != null) //increment if found
{
node.Value.Increment();
}
else //add if not found
{
list.AddLast(cf);
}
}
reader.Close();
//write outputfile
writer = new StreamWriter(File.OpenWrite(args[Output_File]));
foreach (CharacterFrequency cf in list)
writer.WriteLine(cf.ToString());
writer.Close();
}
}
}

I suspect the problem is here :
if (list.Any())
{
cf.Increment();
}
I think it should be (You may need to adjust it more):
if(node != null) {
node.getData().increment();
}
else {
list.AddLast(cf);
}
There's also a problem with the fact that you don't use ch . At the beggining of the loop, you should recreate a LinkedListNode<CharacterFrequency> cf from the char you've read, or the code won't work .
EDIT: You can't declare a variable more than once, so simply take it outside the loop
CharacterFrequency cf;
while ((ch = reader.Read ()) !=-1)//casts to integer
{
cf = LinkedList<CharacterFrequency>list(ch);
....

Related

Prevent ' Process is terminated due to StackOverflowException' in C#

I have a program which builds a very large tree from input data and traverses it, both by recursion. I have tested the program on smaller inputs (and thus smaller trees) and it functions as intended. However when the input data is much larger i run into 'Process is terminated due to StackOverflowException'. I assume this is due to the stack running out of space. Is there any way to prevent this or do I have to switch to building the tree via iteration instead? Or perhaps I am missing a case of infinite recursion somewhere?
Here is the code:
class Program
{
static int[] tileColors;
static Color[] colors;
static int totalTiles;
static void Main(string[] args)
{
Stopwatch s = new Stopwatch();
s.Start();
string[] data = File.ReadAllLines("colors.txt");
totalTiles = int.Parse(data[0].Split(' ')[0]);
int totalColors = int.Parse(data[0].Split(' ')[1]);
string[] colorsRaw = data[1].Split(' ');
tileColors = new int[totalTiles];
for (int i = 0; i < totalTiles; i++)
{
tileColors[i] = int.Parse(colorsRaw[i]) - 1;
}
colors = new Color[totalColors];
for (int i = 3; i < data.Length; i++)
{
string[] raw = data[i].Split(' ');
int[] pair = new int[] { int.Parse(raw[0]) - 1, int.Parse(raw[1]) - 1 };
if (colors[pair[0]] == null)
colors[pair[0]] = new Color(pair[1]);
else
colors[pair[0]].pairs.Add(pair[1]);
if (colors[pair[1]] == null)
colors[pair[1]] = new Color(pair[0]);
else
colors[pair[1]].pairs.Add(pair[0]);
}
Tree t = new Tree();
t.root = new Node(0);
PopulateTree(t.root);
long ans = t.CountMatchingLeaves(t.root, totalTiles - 1) % 1000000007;
Console.WriteLine(ans);
s.Stop();
Console.WriteLine(s.ElapsedMilliseconds);
}
static void PopulateTree(Node root)
{
for (int i = root.tile + 1; i < totalTiles; i++)
{
if (colors[tileColors[i]] == null) continue;
if (colors[tileColors[i]].Compatible(tileColors[root.tile]))
{
var node = new Node(i);
root.children.Add(node);
PopulateTree(node);
}
}
}
}
class Color
{
public List<int> pairs = new List<int>();
public Color(int pair)
{
pairs.Add(pair);
}
public bool Compatible(int c)
{
return pairs.Contains(c);
}
}
class Node
{
public List<Node> children = new List<Node>();
public int tile;
public Node(int tile)
{
this.tile = tile;
}
}
class Tree
{
public Node root;
public List<Node> GetMatchingLeaves(Node root, int match)
{
if (root.children.Count == 0)
{
if (root.tile == match)
{
return new List<Node>() { root };
}
return new List<Node>();
}
List<Node> list = new List<Node>();
foreach(var c in root.children)
{
list.AddRange(GetMatchingLeaves(c, match));
}
return list;
}
public long CountMatchingLeaves(Node root, int match)
{
if (root.children.Count == 0)
{
if (root.tile == match)
{
return 1;
}
return 0;
}
long count = 0;
foreach (var c in root.children)
{
count += CountMatchingLeaves(c, match);
}
return count;
}
}
You can always rewrite recursion as iteration, usually by using a stack class rather than rely on your thread's stack. For your code it would look like this:
static void PopulateTree(Node start)
{
var nodes = new Stack<Node>();
nodes.Push(start);
while(nodes.Count != 0)
{
var root = nodes.Pop();
for (int i = root.tile + 1; i < totalTiles; i++)
{
if (colors[tileColors[i]] == null) continue;
if (colors[tileColors[i]].Compatible(tileColors[root.tile]))
{
var node = new Node(i);
root.children.Add(node);
nodes.Push(node);
}
}
}
}
The while loop checking for more items is the equivalent of your terminating condition in recursion.

Custom linked list c# for practice - not sure why my insertion is't working

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication1
{
class Program
{
class node
{
public int data;
public node next;
public node(int value)
{
next = null;
data = value;
}
public void addFront(node head, int value)
{
var tempNode = new node(value);
tempNode.next = head;
head = tempNode;
}
public void addBack(node head, int value)
{
var insertNode = new node(value);
var traverseNode = head;
while (traverseNode != null)
{
if (traverseNode.next == null)
{
traverseNode.next = insertNode;
insertNode.data = value;
}
traverseNode = traverseNode.next;
}
}
public void printList(node head)
{
var counter = 0;
var traverseNode = head;
while (traverseNode != null)
{
Console.WriteLine("LL item {0}: {1}", counter, traverseNode.data);
traverseNode = traverseNode.next;
counter++;
}
}
public int deleteFront(node head)
{
//Don't need to delete the memory because the garbage collector handles that in c# (at least by my understanding). If this were C, I'd have to use free, and in C++ I'd use keyword Delete
var returnValue = head.data;
head = head.next;
return returnValue;
}
public int deleteBack(node head)
{
node traverseNode = head;
node traverseNodeTrailer = head;
if (traverseNode == null)
{
return 0;
}
while (traverseNode != null)
{
if (traverseNode.next == null)
{
traverseNodeTrailer.next = null;
break;
}
traverseNodeTrailer = traverseNode;
traverseNode = traverseNode.next;
}
return traverseNode.data;
}
public void deleteMiddle(node middleNode)
{
var traverseNode = middleNode;
while (traverseNode != null)
{
if (traverseNode.next.next == null)
{
traverseNode.data = traverseNode.next.data;
traverseNode.next = null;
break;
}
traverseNode.data = traverseNode.next.data;
traverseNode = traverseNode.next;
}
}
public node search(node head, int value)
{
var traverseNode = head;
while (traverseNode != null)
{
if (traverseNode.data == value)
return traverseNode;
traverseNode = traverseNode.next;
}
return null;//failed to find the value in the linked list
}
}
static void Main(string[] args)
{
Int32 userMenuInput = 0;
var running = true;
int userNodeInput = 0;
node head = null;
while (running)
{
Console.WriteLine("Enter a number depending on the operation you would like to perform.");
Console.WriteLine("1 - Insert node of specified value; 2 - Delete a node and return the value; 3 - Search for a node with a specified value; 4 - Print the list values. *All other values exit");
userMenuInput = Convert.ToInt32(Console.ReadLine());//get user input for what they want to do
switch (userMenuInput)
{
case 1:
Console.WriteLine("Enter a number you'd like to insert into the linked list.");
userNodeInput = Convert.ToInt32(Console.ReadLine());//get user input for value to insert
if (head == null)
{
head = new node(userNodeInput);
break;
}
head.addFront(head, userNodeInput);
break;
case 2:
if (head == null)
{
Console.WriteLine("The list is empty - cannot delete.");
break;
}
userNodeInput = head.deleteFront(head);
Console.WriteLine("{0} is the value that was deleted from the front of the list.", userNodeInput);
break;
case 3:
Console.WriteLine("What is the value you'd like to search for in the list?");
userNodeInput = Convert.ToInt32(Console.ReadLine());
if (head == null)
{
//do nothing
}
else if (head.search(head, userNodeInput) != null)
{
Console.WriteLine("That value was found");
break;
}
Console.WriteLine("That value was not found.");
break;
case 4:
if (head == null)
{
Console.WriteLine("The list has nothing to print.");
break;
}
head.printList(head);
break;
default:
running = false;
break;
}
}
}
}
}
DISCLAIMER: I'm very new to c# - just been using it for the past week or so. I will appreciate any optimization hints you have, but for now, I'm just trying to get this running.
So, this is correctly inserting 1 element, but nothing more. It is finding the element if it is in the first link. The delete doesn't seem to be working at all. It doesn't throw any errors but it also doesn't delete the first link. Note: A few of my functions in there are unused so far so don't worry about mentioning that - I'm aware.
Thanks everyone!
You can use ref if you want to modify argument passed to addFront method:
public void addFront(ref node head, int value)
{
var tempNode = new node(value);
tempNode.next = head;
head = tempNode;
}
.
head.addFront(ref head, userNodeInput);

XML - System.Xml.XmlException - hexadecimal value 0x06

I get this error. Later I searched and found out the reason of illegal characters in my XML and its solution. But I don't have the access to edit any of these files. My job is to read and fetch the tag value, attribute value and similar stuff. SO I can't replace the binary characters with escapes like '\x01' with &#01. Also I tried to include CheckCharacters =false in XMLreader settings. It doesn't take this. Still it is throwing the same error.
Is it not possible to fix in XMLreader? I read about XMLtextReader. It can skip the exception. But already I have coded for all my features using XMLreader. It would be good if I can find a solution for this. Otherwise I would have to change all my code.
My code:
private void button1_Click(object sender, EventArgs e)
{
int i = 0;
var filenames = System.IO.Directory
.EnumerateFiles(textBox1.Text, "*.xml", System.IO.SearchOption.AllDirectories)
.Select(System.IO.Path.GetFullPath);
foreach (var f in filenames)
{
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[#"X1.DTD"] = #"\\location\X1.DTD";
resolver.DtdFileMap[#"R2.DTD"] = #"\\location\X2.DTD";
resolver.DtdFileMap[#"R5.DTD"] = #"\\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(f, settings);
while (doc.Read())
{
if ((doc.NodeType == XmlNodeType.Element) && (doc.Name == "ap"))
{
if (doc.HasAttributes)
{
String fin = doc.GetAttribute("ap");
if (fin == "no")
{
String[] array = new String[10000];
array[i] = (f);
File.AppendAllText(#"\\location\NAPP.txt", array[i] + Environment.NewLine);
i++;
}
else
{
String[] abs = new String[10000];
abs[i] = (f);
File.AppendAllText(#"\\location\APP.txt", abs[i] + Environment.NewLine);
i++;
}
}
}
}
}
MessageBox.Show("Done");
}
This is a very simple example of character "filter" that will replae the 0x06 character with a space:
public class MyStreamReader : StreamReader {
public MyStreamReader(string path)
: base(path) {
}
public override int Read(char[] buffer, int index, int count) {
int res = base.Read(buffer, index, count);
for (int i = 0; i < res; i++) {
if (buffer[i] == 0x06) {
buffer[i] = ' ';
}
}
return res;
}
}
You use it this way:
using (var sr = new MyStreamReader(f)) {
var doc = XmlReader.Create(sr, settings);
Note that it's very simple because it's replacing a character (the 0x06) with another character of the same "length" (the space). If you wanted to replace a character with a "sequence" of characters (to escape it), it would get more complex (not impossible, 30 minutes of work difficult)
(I have checked and it seems the XmlTextReader only uses that method and not the Read() method)
As always, when a programmer tells you 30 minutes, it means 0 minutes or 2 hours :-)
This is the "more complex" ReplacingStreamReader:
/// <summary>
/// Only the Read methods are supported!
/// </summary>
public class ReplacingStreamReader : StreamReader
{
public ReplacingStreamReader(string path)
: base(path)
{
}
public Func<char, string> ReplaceWith { get; set; }
protected char[] RemainingChars { get; set; }
protected int RemainingCharsIndex { get; set; }
public override int Read()
{
int ch;
if (RemainingChars != null)
{
ch = RemainingChars[RemainingCharsIndex];
RemainingCharsIndex++;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
}
else
{
ch = base.Read();
if (ch != -1)
{
string replace = ReplaceWith((char)ch);
if (replace == null)
{
// Do nothing
}
else if (replace.Length == 1)
{
ch = replace[0];
}
else
{
ch = replace[0];
RemainingChars = replace.ToCharArray(1, replace.Length - 1);
RemainingCharsIndex = 0;
}
}
}
return ch;
}
public override int Read(char[] buffer, int index, int count)
{
int res = 0;
// We leave error handling to the StreamReader :-)
// We handle only "working" parameters
if (RemainingChars != null && buffer != null && index >= 0 && count > 0 && index + count <= buffer.Length)
{
int remainingCharsCount = RemainingChars.Length - RemainingCharsIndex;
res = Math.Min(remainingCharsCount, count);
Array.Copy(RemainingChars, RemainingCharsIndex, buffer, index, res);
RemainingCharsIndex += res;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
if (res == count)
{
return res;
}
index += res;
count -= res;
}
while (true)
{
List<char> sb = null;
int res2 = base.Read(buffer, index, count);
if (res2 == 0 || ReplaceWith == null)
{
return res;
}
int j = 0;
for (int i = 0; i < res2; i++)
{
char ch = buffer[index + i];
string replace = ReplaceWith(ch);
if (sb != null)
{
if (replace == null)
{
sb.Add(ch);
}
else
{
sb.AddRange(replace);
}
}
else if (replace == null)
{
buffer[j] = ch;
j++;
}
else if (replace.Length == 1)
{
buffer[j] = replace[0];
j++;
}
else if (replace.Length == 0)
{
// We do not advance
}
else
{
sb = new List<char>();
sb.AddRange(replace);
}
}
res2 = j;
if (sb != null)
{
int res3 = Math.Min(sb.Count, count - res2);
sb.CopyTo(0, buffer, index + res2, res3);
if (res3 < sb.Count)
{
RemainingChars = new char[sb.Count - res3];
RemainingCharsIndex = 0;
sb.CopyTo(res3, RemainingChars, 0, RemainingChars.Length);
}
res += res3;
}
else
{
res2 = j;
// Can't happen if sb != null (at least a character must
// have been added)
if (res2 == 0)
{
continue;
}
}
res += res2;
return res;
}
}
}
Use it like:
using (var sr = new ReplacingStreamReader(f))
{
sr.ReplaceWith = x =>
{
return x == 0x6 ? " " : null;
// return x == '.' ? " " : null; // Replace all . with
};
var doc = XmlReader.Create(sr, settings);
Be aware that the ReplacingStreamReader doesn't "know" which part of the xml it is modifying, so rarely a "blind" replace is ok :-) Other than this limitation, you can replace any character with any string (null in the ReplaceWith means "keep the current character", equivalent to x.ToString() in the example given. Returning string.Empty is valid, means remove the current character).
The class is quite interesting: it keeps a char[] RemainingChars with the chars that have been read (and filtered by ReplaceWith) but that haven't been returned by a Read() method because the passed buffer was too much small (the ReplaceWith method could "enlarge" the read string, making it too much big for the buffer!). Note that sb is a List<char> instead of a StringBuilder. Probably using one or the other would be nearly equivalent, code-wise.
You could first read the content into a string replace (escape) the content, and then load it into a XmlReader:
foreach (var f in filenames) {
string text;
using (StreamReader s = new StreamReader(f,Encoding.UTF8)) {
text = s.ReadToEnd();
}
text = text.Replace("\x01",#"&#01"); //replace the content
//load some settings
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[#"X1.DTD"] = #"\\location\X1.DTD";
resolver.DtdFileMap[#"R2.DTD"] = #"\\location\X2.DTD";
resolver.DtdFileMap[#"R5.DTD"] = #"\\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(text, settings);
//perform processing task
//...
}

Streamreader with custom LineBreak - Performance optimisation

Edit: See my Solution below...
I had the following Problem to solve:
We receive Files (mostly adress-Information) from different sources, these can be in Windows Standard with CR/LF ('\r''\n') as Line Break or UNIX with LF ('\n').
When reading text in using the StreamReader.ReadLine() method, this is no Problem because it handles both cases equally.
The Problem occurs when you have a CR or a LF somewhere in the File that is not supposed to be there.
This happens for example if you Export a EXCEL-File with Cells that contain LineBreaks within the Cell to .CSV or other Flat-Files.
Now you have a File that for example has the following structure:
FirstName;LastName;Street;HouseNumber;PostalCode;City;Country'\r''\n'
Jane;Doe;co James Doe'\n'TestStreet;5;TestCity;TestCountry'\r''\n'
John;Hancock;Teststreet;1;4586;TestCity;TestCounty'\r''\n'
Now the StreamReader.ReadLine() Method reads the First Line as:
FirstName;LastName;Street;HouseNumber;PostalCode;City;Country
Which is fine but the seccond Line will be:
Jane;Doe;co James Doe
This will either break your Code or you will have false Results, as the following Line will be:
TestStreet;5;TestCity;TestCountry
So we usualy ran the File trough a tool that checks if there are loose '\n' or '\r' arround and delete them.
But this step is easy to Forget and so I tried to implement a ReadLine() method of my own. The requirement was that it would be able to use one or two LineBreak characters and those characters could be defined freely by the consuming logic.
This is the Class that I came up with:
public class ReadFile
{
private FileStream file;
private StreamReader reader;
private string fileLocation;
private Encoding fileEncoding;
private char lineBreak1;
private char lineBreak2;
private bool useSeccondLineBreak;
private bool streamCreated = false;
private bool endOfStream;
public bool EndOfStream
{
get { return endOfStream; }
set { endOfStream = value; }
}
public ReadFile(string FileLocation, Encoding FileEncoding, char LineBreak1, char LineBreak2, bool UseSeccondLineBreak)
{
fileLocation = FileLocation;
fileEncoding = FileEncoding;
lineBreak1 = LineBreak1;
lineBreak2 = LineBreak2;
useSeccondLineBreak = UseSeccondLineBreak;
}
public string ReadLine()
{
if (streamCreated == false)
{
file = new FileStream(fileLocation, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
reader = new StreamReader(file, fileEncoding);
streamCreated = true;
}
StringBuilder builder = new StringBuilder();
char[] buffer = new char[1];
char lastChar = new char();
char currentChar = new char();
bool first = true;
while (reader.EndOfStream != true)
{
if (useSeccondLineBreak == true)
{
reader.Read(buffer, 0, 1);
lastChar = currentChar;
if (currentChar == lineBreak1 && buffer[0] == lineBreak2)
{
break;
}
else
{
currentChar = buffer[0];
}
if (first == false)
{
builder.Append(lastChar);
}
else
{
first = false;
}
}
else
{
reader.Read(buffer, 0, 1);
if (buffer[0] == lineBreak1)
{
break;
}
else
{
currentChar = buffer[0];
}
builder.Append(currentChar);
}
}
if (reader.EndOfStream == true)
{
EndOfStream = true;
}
return builder.ToString();
}
public void Close()
{
if (streamCreated == true)
{
reader.Close();
file.Close();
}
}
}
This code works fine, it does what it is supposed to do but compared to the original StreamReader.ReadLine() method, it is ~3 Times slower. As we work with large row-Counts the difference is not only messured but also reflected in real world Performance.
(for 700'000 Rows it takes ~ 5 Seconds to read all Lines, extract a Chunk and write it to a new File, with my method it takes ~15 Seconds on my system)
I tried different aproaches with bigger buffers but so far I wasn't able to increase Performance.
What I would be interessted in:
Any suggestions how I could improve the performance of this code to get closer to the original Performance of StreamReader.ReadLine()?
Solution:
This now takes ~6 Seconds (compared to ~5 Sec using the Default 'StreamReader.ReadLine()' ) for 700'000 Rows to do the same things as the code above does.
Thanks Jim Mischel for pointing me in the right direction!
public class ReadFile
{
private FileStream file;
private StreamReader reader;
private string fileLocation;
private Encoding fileEncoding;
private char lineBreak1;
private char lineBreak2;
private bool useSeccondLineBreak;
const int BufferSize = 8192;
int bufferedCount;
char[] rest = new char[BufferSize];
int position = 0;
char lastChar;
bool useLastChar;
private bool streamCreated = false;
private bool endOfStream;
public bool EndOfStream
{
get { return endOfStream; }
set { endOfStream = value; }
}
public ReadFile(string FileLocation, Encoding FileEncoding, char LineBreak1, char LineBreak2, bool UseSeccondLineBreak)
{
fileLocation = FileLocation;
fileEncoding = FileEncoding;
lineBreak1 = LineBreak1;
lineBreak2 = LineBreak2;
useSeccondLineBreak = UseSeccondLineBreak;
}
private int readInBuffer()
{
return reader.Read(rest, 0, BufferSize);
}
public string ReadLine()
{
StringBuilder builder = new StringBuilder();
bool lineFound = false;
if (streamCreated == false)
{
file = new FileStream(fileLocation, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 8192);
reader = new StreamReader(file, fileEncoding);
streamCreated = true;
bufferedCount = readInBuffer();
}
while (lineFound == false && EndOfStream != true)
{
if (position < bufferedCount)
{
for (int i = position; i < BufferSize; i++)
{
if (useLastChar == true)
{
useLastChar = false;
if (rest[i] == lineBreak2)
{
count++;
position = i + 1;
lineFound = true;
break;
}
else
{
builder.Append(lastChar);
}
}
if (rest[i] == lineBreak1)
{
if (useSeccondLineBreak == true)
{
if (i + 1 <= BufferSize - 1)
{
if (rest[i + 1] == lineBreak2)
{
position = i + 2;
lineFound = true;
break;
}
else
{
builder.Append(rest[i]);
}
}
else
{
useLastChar = true;
lastChar = rest[i];
}
}
else
{
position = i + 1;
lineFound = true;
break;
}
}
else
{
builder.Append(rest[i]);
}
position = i + 1;
}
}
else
{
bufferedCount = readInBuffer();
position = 0;
}
}
if (reader.EndOfStream == true && position == bufferedCount)
{
EndOfStream = true;
}
return builder.ToString();
}
public void Close()
{
if (streamCreated == true)
{
reader.Close();
file.Close();
}
}
}
The way to speed this up would be to have it read more than one character at a time. For example, create a 4 kilobyte buffer, read data into that buffer, and then go character-by-character. If you copy character-by-character to a StringBuilder, it's pretty easy.
The code below shows how to parse out lines in a loop. You'd have to split this up so that it can maintain state between calls, but it should give you the idea.
const int BufferSize = 4096;
const string newline = "\r\n";
using (var strm = new StreamReader(....))
{
int newlineIndex = 0;
var buffer = new char[BufferSize];
StringBuilder sb = new StringBuilder();
int charsInBuffer = 0;
int bufferIndex = 0;
char lastChar = (char)-1;
while (!(strm.EndOfStream && bufferIndex >= charsInBuffer))
{
if (bufferIndex > charsInBuffer)
{
charsInBuffer = strm.Read(buffer, 0, buffer.Length);
if (charsInBuffer == 0)
{
// nothing read. Must be at end of stream.
break;
}
bufferIndex = 0;
}
if (buffer[bufferIndex] == newline[newlineIndex])
{
++newlineIndex;
if (newlineIndex == newline.Length)
{
// found a line
Console.WriteLine(sb.ToString());
newlineIndex = 0;
sb = new StringBuilder();
}
}
else
{
if (newlineIndex > 0)
{
// copy matched newline characters
sb.Append(newline.Substring(0, newlineIndex));
newlineIndex = 0;
}
sb.Append(buffer[bufferIndex]);
}
++bufferIndex;
}
// Might be a line left, without a newline
if (newlineIndex > 0)
{
sb.Append(newline.Substring(0, newlineIndex));
}
if (sb.Length > 0)
{
Console.WriteLine(sb.ToString());
}
}
You could optimize this a bit by keeping track of the starting position so that when you find a line you create a string from buffer[start] to buffer[current], without creating a StringBuilder. Instead you call the String(char[], int32, int32) constructor. That's a little tricky to handle when you cross a buffer boundary. Probably would want to handle crossing the buffer boundary as a special case and use a StringBuilder for temporary storage in that case.
I wouldn't bother with that optimization, though, until after I got this first version working.

Using HttpUtility.HtmlDecode method with .NET Compact Framework?

A project using .NET Compact Framework does not allow use System.Web library.
How can I do to HTMLDecode a string using .NET Compact Framework?
I didn't found any class that do that.
If you decompile the .NET Framework source you can get the methods you need. I did it for you, so you can copy and paste the two classes below:
internal class HtmlEntities
{
// Fields
private static string[] _entitiesList = new string[] {
"\"-quot", "&-amp", "<-lt", ">-gt", "\x00a0-nbsp", "\x00a1-iexcl", "\x00a2-cent", "\x00a3-pound", "\x00a4-curren", "\x00a5-yen", "\x00a6-brvbar", "\x00a7-sect", "\x00a8-uml", "\x00a9-copy", "\x00aa-ordf", "\x00ab-laquo",
"\x00ac-not", "\x00ad-shy", "\x00ae-reg", "\x00af-macr", "\x00b0-deg", "\x00b1-plusmn", "\x00b2-sup2", "\x00b3-sup3", "\x00b4-acute", "\x00b5-micro", "\x00b6-para", "\x00b7-middot", "\x00b8-cedil", "\x00b9-sup1", "\x00ba-ordm", "\x00bb-raquo",
"\x00bc-frac14", "\x00bd-frac12", "\x00be-frac34", "\x00bf-iquest", "\x00c0-Agrave", "\x00c1-Aacute", "\x00c2-Acirc", "\x00c3-Atilde", "\x00c4-Auml", "\x00c5-Aring", "\x00c6-AElig", "\x00c7-Ccedil", "\x00c8-Egrave", "\x00c9-Eacute", "\x00ca-Ecirc", "\x00cb-Euml",
"\x00cc-Igrave", "\x00cd-Iacute", "\x00ce-Icirc", "\x00cf-Iuml", "\x00d0-ETH", "\x00d1-Ntilde", "\x00d2-Ograve", "\x00d3-Oacute", "\x00d4-Ocirc", "\x00d5-Otilde", "\x00d6-Ouml", "\x00d7-times", "\x00d8-Oslash", "\x00d9-Ugrave", "\x00da-Uacute", "\x00db-Ucirc",
"\x00dc-Uuml", "\x00dd-Yacute", "\x00de-THORN", "\x00df-szlig", "\x00e0-agrave", "\x00e1-aacute", "\x00e2-acirc", "\x00e3-atilde", "\x00e4-auml", "\x00e5-aring", "\x00e6-aelig", "\x00e7-ccedil", "\x00e8-egrave", "\x00e9-eacute", "\x00ea-ecirc", "\x00eb-euml",
"\x00ec-igrave", "\x00ed-iacute", "\x00ee-icirc", "\x00ef-iuml", "\x00f0-eth", "\x00f1-ntilde", "\x00f2-ograve", "\x00f3-oacute", "\x00f4-ocirc", "\x00f5-otilde", "\x00f6-ouml", "\x00f7-divide", "\x00f8-oslash", "\x00f9-ugrave", "\x00fa-uacute", "\x00fb-ucirc",
"\x00fc-uuml", "\x00fd-yacute", "\x00fe-thorn", "\x00ff-yuml", "Œ-OElig", "œ-oelig", "Š-Scaron", "š-scaron", "Ÿ-Yuml", "ƒ-fnof", "ˆ-circ", "˜-tilde", "Α-Alpha", "Β-Beta", "Γ-Gamma", "Δ-Delta",
"Ε-Epsilon", "Ζ-Zeta", "Η-Eta", "Θ-Theta", "Ι-Iota", "Κ-Kappa", "Λ-Lambda", "Μ-Mu", "Ν-Nu", "Ξ-Xi", "Ο-Omicron", "Π-Pi", "Ρ-Rho", "Σ-Sigma", "Τ-Tau", "Υ-Upsilon",
"Φ-Phi", "Χ-Chi", "Ψ-Psi", "Ω-Omega", "α-alpha", "β-beta", "γ-gamma", "δ-delta", "ε-epsilon", "ζ-zeta", "η-eta", "θ-theta", "ι-iota", "κ-kappa", "λ-lambda", "μ-mu",
"ν-nu", "ξ-xi", "ο-omicron", "π-pi", "ρ-rho", "ς-sigmaf", "σ-sigma", "τ-tau", "υ-upsilon", "φ-phi", "χ-chi", "ψ-psi", "ω-omega", "ϑ-thetasym", "ϒ-upsih", "ϖ-piv",
" -ensp", " -emsp", " -thinsp", "‌-zwnj", "‍-zwj", "‎-lrm", "‏-rlm", "–-ndash", "—-mdash", "‘-lsquo", "’-rsquo", "‚-sbquo", "“-ldquo", "”-rdquo", "„-bdquo", "†-dagger",
"‡-Dagger", "•-bull", "…-hellip", "‰-permil", "′-prime", "″-Prime", "‹-lsaquo", "›-rsaquo", "‾-oline", "⁄-frasl", "€-euro", "ℑ-image", "℘-weierp", "ℜ-real", "™-trade", "ℵ-alefsym",
"←-larr", "↑-uarr", "→-rarr", "↓-darr", "↔-harr", "↵-crarr", "⇐-lArr", "⇑-uArr", "⇒-rArr", "⇓-dArr", "⇔-hArr", "∀-forall", "∂-part", "∃-exist", "∅-empty", "∇-nabla",
"∈-isin", "∉-notin", "∋-ni", "∏-prod", "∑-sum", "−-minus", "∗-lowast", "√-radic", "∝-prop", "∞-infin", "∠-ang", "∧-and", "∨-or", "∩-cap", "∪-cup", "∫-int",
"∴-there4", "∼-sim", "≅-cong", "≈-asymp", "≠-ne", "≡-equiv", "≤-le", "≥-ge", "⊂-sub", "⊃-sup", "⊄-nsub", "⊆-sube", "⊇-supe", "⊕-oplus", "⊗-otimes", "⊥-perp",
};
private static Hashtable _entitiesLookupTable;
private static object _lookupLockObject = new object();
internal static char Lookup(string entity)
{
if (_entitiesLookupTable == null)
{
lock (_lookupLockObject)
{
if (_entitiesLookupTable == null)
{
Hashtable hashtable = new Hashtable();
foreach (string str in _entitiesList)
{
hashtable[str.Substring(2)] = str[0];
}
_entitiesLookupTable = hashtable;
}
}
}
object obj2 = _entitiesLookupTable[entity];
if (obj2 != null)
{
return (char)obj2;
}
return '\0';
}
}
public sealed class HttpUtility
{
private static char[] s_entityEndingChars = new char[] { ';', '&' };
public static string HtmlDecode(string s)
{
if (s == null)
{
return null;
}
if (s.IndexOf('&') < 0)
{
return s;
}
StringBuilder sb = new StringBuilder();
StringWriter output = new StringWriter(sb);
HtmlDecode(s, output);
return sb.ToString();
}
public static void HtmlDecode(string s, TextWriter output)
{
if (s != null)
{
if (s.IndexOf('&') < 0)
{
output.Write(s);
}
else
{
int length = s.Length;
for (int i = 0; i < length; i++)
{
char ch = s[i];
if (ch == '&')
{
int num3 = s.IndexOfAny(s_entityEndingChars, i + 1);
if ((num3 > 0) && (s[num3] == ';'))
{
string entity = s.Substring(i + 1, (num3 - i) - 1);
if ((entity.Length > 1) && (entity[0] == '#'))
{
try
{
if ((entity[1] == 'x') || (entity[1] == 'X'))
{
ch = (char)int.Parse(entity.Substring(2), NumberStyles.AllowHexSpecifier);
}
else
{
ch = (char)int.Parse(entity.Substring(1));
}
i = num3;
}
catch (FormatException)
{
i++;
}
catch (ArgumentException)
{
i++;
}
}
else
{
i = num3;
char ch2 = HtmlEntities.Lookup(entity);
if (ch2 != '\0')
{
ch = ch2;
}
else
{
output.Write('&');
output.Write(entity);
output.Write(';');
goto Label_0103;
}
}
}
}
output.Write(ch);
Label_0103: ;
}
}
}
}
}
You could always port the code from Mono, sample VB code or sample Java code found with a simple web search.
I've used reflector in the past to find just the methods I need from the framework that aren't included and compile them directly into my compact.net project.

Categories

Resources