I am trying to create a program which can sort the number of results associated with any specified google search. I need a big table very fast so I thought about using a loop. Each time I try it though, the debugger crashes due to a "System.Windows.Markup.XamlParseException".
public long resultStat(string a)
{
var req = (HttpWebRequest)WebRequest.Create("https://www.google.ca/search?hl=fr&output=search&sclient=psy-ab&q=a" + a + "&btnK=");
using (req as IDisposable)
{
WebResponse rep = req.GetResponse();
Stream str = rep.GetResponseStream();
StreamReader rdr = new StreamReader(str);
string res = rdr.ReadToEnd();
rdr.Close();
//This is my code to get the number results (it works perfectly)
int index = res.IndexOf(">Environ");
int cond = 0;
string final = "";
try
{
while (res[++index] != '<')
{
if (cond-- == 0 && res[index] != '&')
{ final += res[index]; cond = 0; }
else if (res[index] == '&') cond = 5;
}
}
catch { return 0; }
string temp = "";
foreach (char i in final) if (i < 48 && i > 58) temp += i;
return Int64.Parse(temp);
}
}
This whole method is simply used in the main in a for loop such as :
public void main()
{
//Other code
for (int i = 0; i < 3; i++) resultStat(i.ToString()); // For example
//Other code
}
I know it's the problem because as soon as I comment the loop, or lower it to one rep, nothing goes wrong. I've tried:
HttpWebRequest().Abort();
HttpWebRequest().KeepAlive = false;
It didn't work
I don't think the away you are doing is the correct way to do this. The simple one i can tell you is use Lib curl c#. You can send in an array of urls and get response as an array. That would be perfect for what you require here. Here is a sample class code below that does the multitasking itself. You just send in the urls.
public class MultiHttp
{
public static string UserAgent = "Mozilla 5.0";
public static string Header = "Content-Type: application/x-www-form-urlencoded; charset=UTF-8";
private static string[] Result;
public static string[] MultiPost(string[] Url, string post, int timeOut)
{
Result = new string[post.Length];
try
{
Curl.GlobalInit((int)CURLinitFlag.CURL_GLOBAL_ALL);
Easy.WriteFunction wf = new Easy.WriteFunction(OnWriteData);
//Easy.HeaderFunction hf = new Easy.HeaderFunction(OnHeaderData);
Easy[] easy = new Easy[Url.Length];
Multi multi = new Multi();
for (int i = 0; i < Url.Length; i++)
{
if (Url[i] != null)
{
easy[i] = new Easy();
easy[i].SetOpt(CURLoption.CURLOPT_URL, Url[i]);
easy[i].SetOpt(CURLoption.CURLOPT_WRITEFUNCTION, wf);
easy[i].SetOpt(CURLoption.CURLOPT_WRITEDATA, i);
//easy[i].SetOpt(CURLoption.CURLOPT_HEADERFUNCTION, hf);
//easy[i].SetOpt(CURLoption.CURLOPT_HEADERDATA, i);
easy[i].SetOpt(CURLoption.CURLOPT_TIMEOUT, timeOut);
easy[i].SetOpt(CURLoption.CURLOPT_USERAGENT, UserAgent);
Slist sl = new Slist();
sl.Append(Header);
easy[i].SetOpt(CURLoption.CURLOPT_HTTPHEADER, sl);
easy[i].SetOpt(CURLoption.CURLOPT_POSTFIELDS, post);
easy[i].SetOpt(CURLoption.CURLOPT_FOLLOWLOCATION, true);
easy[i].SetOpt(CURLoption.CURLOPT_POST, true);
//easy[i].SetOpt(CURLoption.CURLOPT_NOBODY, true);
if (Url[i].Contains("https"))
{
easy[i].SetOpt(CURLoption.CURLOPT_SSL_VERIFYHOST, 1);
easy[i].SetOpt(CURLoption.CURLOPT_SSL_VERIFYPEER, 0);
}
multi.AddHandle(easy[i]);
}
}
int stillRunning = 1;
while (multi.Perform(ref stillRunning) == CURLMcode.CURLM_CALL_MULTI_PERFORM) ;
while (stillRunning != 0)
{
multi.FDSet();
int rc = multi.Select(1000); // one second
switch (rc)
{
case -1:
stillRunning = 0;
break;
case 0:
default:
{
while (multi.Perform(ref stillRunning) == CURLMcode.CURLM_CALL_MULTI_PERFORM) ;
break;
}
}
}
// various cleanups
multi.Cleanup();
for (int i = 0; i < easy.Length; i++)
{
easy[i].Cleanup();
}
Curl.GlobalCleanup();
}
catch (Exception)
{
//r = ex+"";
}
return Result;
}
public static Int32 OnWriteData(Byte[] buf, Int32 size, Int32 nmemb,
Object extraData)
{
int tmp = Convert.ToInt32(extraData.ToString()); ;
Result[tmp] += System.Text.Encoding.UTF8.GetString(buf);
return size * nmemb;
}
}
Call it like :
String[] url= new String[2];
url[1]="https://www.google.ca/search?hl=fr&output=search&sclient=psy-ab&q=a1&btnK=";
url[2]="https://www.google.ca/search?hl=fr&output=search&sclient=psy-ab&q=a2&btnK=";
string postString=""; // IF YOU DO NOT WANT TO POST ANYTHING YOU CAN DO THE SAME THING KEEP URL THE SAME SEND POST ARRAY AND CHANGE THE CLASS IT WORKS BOTH WAYS
String[] result = MultiHttp.MultiPost(url, postString, timeOut);
Its just a sample but will get you the working idea to sort out your problem.
Related
I'm reading continuously from a TcpClient streamreader.
The data coming from the stream is raw XML. There is no message framing. So there is now reliable method to know when the message is finished. Though I only have 3 XML messages coming from the server. But when they are coming is unknown. And I can't configure/program the server.
This is my code so far.
public void Start()
{
StreamReader reader = new StreamReader(_tcpClient.GetStream());
char[] chars = new char[Int16.MaxValue];
while (!_requestStop)
{
try
{
while ((reader.Read(chars, 0, chars.Length)) != 0)
{
string s = new string(chars);
s = removeEmptyChars(s);
if (s.IndexOf("<foo", StringComparison.OrdinalIgnoreCase) > 0 &&
s.IndexOf("</foo>", StringComparison.OrdinalIgnoreCase) > 0)
{
Console.WriteLine(s);
OnAlarmResponseComplete(new CustomEventArgs(s));
}
if (s.IndexOf("<bar", StringComparison.OrdinalIgnoreCase) > 0 &&
s.IndexOf("</bar>", StringComparison.OrdinalIgnoreCase) > 0)
{
Console.WriteLine(s);
OnAckComplete(new CustomEventArgs(s));
}
}
}
catch (Exception e)
{
Console.WriteLine(e.Message);
//break;
}
}
reader.Close();
Console.WriteLine("Stopping TcpReader thread!");
}
Then in my main thread I'm processing the events. I'm adding them to a list.
Where I process the list.
When I'm debugging my application, I will be receiving 10 foo and 10 bar messages. And in my lists I have only 1 foo and 1 bar message stored.
Are the eventhandlers to slow to process this? Or am I missing something?
Here is the code you should use to cover all kinds of input issues (foo or bar received partially, foo and bar received together, etc..)
I can't say I approve using string parsing to handle XML content, but anyways.
private static string ProcessAndTrimFooBar(string s, out bool foundAny)
{
foundAny = false;
int fooStart = s.IndexOf("<foo", StringComparison.OrdinalIgnoreCase);
int fooEnd = s.IndexOf("</foo>", StringComparison.OrdinalIgnoreCase);
int barStart = s.IndexOf("<bar", StringComparison.OrdinalIgnoreCase);
int barEnd = s.IndexOf("</bar>", StringComparison.OrdinalIgnoreCase);
bool fooExists = fooStart >= 0 && fooEnd >= 0;
bool barExists = barStart >= 0 && barEnd >= 0;
if ((fooExists && !barExists) || (fooExists && barExists && fooStart < barStart))
{
string fooNodeContent = s.Substring(fooStart, fooEnd - fooStart + 6);
s = s.Substring(fooEnd + 6);
Console.WriteLine("Received <foo>: {0}", fooNodeContent);
OnAlarmResponseComplete(new CustomEventArgs(fooNodeContent));
foundAny = true;
}
if ((barExists && !fooExists) || (barExists && fooExists && barStart < fooStart))
{
string barNodeContent = s.Substring(barStart, barEnd - barStart + 6);
s = s.Substring(barEnd + 6);
Console.WriteLine("Received <bar>: {0}", barNodeContent);
OnAckComplete(new CustomEventArgs(barNodeContent));
foundAny = true;
}
return s;
}
public static void Start()
{
StreamReader reader = new StreamReader(_tcpClient.GetStream());
char[] chars = new char[Int16.MaxValue];
while (!_requestStop)
{
try
{
int currentOffset = 0;
while ((reader.Read(chars, currentOffset, chars.Length - currentOffset)) != 0)
{
string s = new string(chars).TrimEnd('\0');
bool foundAny;
do
{
s = ProcessAndTrimFooBar(s, out foundAny);
} while (foundAny);
chars = s.PadRight(Int16.MaxValue, '\0').ToCharArray();
currentOffset = s.Length;
}
}
catch (Exception e)
{
Console.WriteLine(e.Message);
//break;
}
}
reader.Close();
Console.WriteLine("Stopping TcpReader thread!");
}
I get this error. Later I searched and found out the reason of illegal characters in my XML and its solution. But I don't have the access to edit any of these files. My job is to read and fetch the tag value, attribute value and similar stuff. SO I can't replace the binary characters with escapes like '\x01' with . Also I tried to include CheckCharacters =false in XMLreader settings. It doesn't take this. Still it is throwing the same error.
Is it not possible to fix in XMLreader? I read about XMLtextReader. It can skip the exception. But already I have coded for all my features using XMLreader. It would be good if I can find a solution for this. Otherwise I would have to change all my code.
My code:
private void button1_Click(object sender, EventArgs e)
{
int i = 0;
var filenames = System.IO.Directory
.EnumerateFiles(textBox1.Text, "*.xml", System.IO.SearchOption.AllDirectories)
.Select(System.IO.Path.GetFullPath);
foreach (var f in filenames)
{
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[#"X1.DTD"] = #"\\location\X1.DTD";
resolver.DtdFileMap[#"R2.DTD"] = #"\\location\X2.DTD";
resolver.DtdFileMap[#"R5.DTD"] = #"\\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(f, settings);
while (doc.Read())
{
if ((doc.NodeType == XmlNodeType.Element) && (doc.Name == "ap"))
{
if (doc.HasAttributes)
{
String fin = doc.GetAttribute("ap");
if (fin == "no")
{
String[] array = new String[10000];
array[i] = (f);
File.AppendAllText(#"\\location\NAPP.txt", array[i] + Environment.NewLine);
i++;
}
else
{
String[] abs = new String[10000];
abs[i] = (f);
File.AppendAllText(#"\\location\APP.txt", abs[i] + Environment.NewLine);
i++;
}
}
}
}
}
MessageBox.Show("Done");
}
This is a very simple example of character "filter" that will replae the 0x06 character with a space:
public class MyStreamReader : StreamReader {
public MyStreamReader(string path)
: base(path) {
}
public override int Read(char[] buffer, int index, int count) {
int res = base.Read(buffer, index, count);
for (int i = 0; i < res; i++) {
if (buffer[i] == 0x06) {
buffer[i] = ' ';
}
}
return res;
}
}
You use it this way:
using (var sr = new MyStreamReader(f)) {
var doc = XmlReader.Create(sr, settings);
Note that it's very simple because it's replacing a character (the 0x06) with another character of the same "length" (the space). If you wanted to replace a character with a "sequence" of characters (to escape it), it would get more complex (not impossible, 30 minutes of work difficult)
(I have checked and it seems the XmlTextReader only uses that method and not the Read() method)
As always, when a programmer tells you 30 minutes, it means 0 minutes or 2 hours :-)
This is the "more complex" ReplacingStreamReader:
/// <summary>
/// Only the Read methods are supported!
/// </summary>
public class ReplacingStreamReader : StreamReader
{
public ReplacingStreamReader(string path)
: base(path)
{
}
public Func<char, string> ReplaceWith { get; set; }
protected char[] RemainingChars { get; set; }
protected int RemainingCharsIndex { get; set; }
public override int Read()
{
int ch;
if (RemainingChars != null)
{
ch = RemainingChars[RemainingCharsIndex];
RemainingCharsIndex++;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
}
else
{
ch = base.Read();
if (ch != -1)
{
string replace = ReplaceWith((char)ch);
if (replace == null)
{
// Do nothing
}
else if (replace.Length == 1)
{
ch = replace[0];
}
else
{
ch = replace[0];
RemainingChars = replace.ToCharArray(1, replace.Length - 1);
RemainingCharsIndex = 0;
}
}
}
return ch;
}
public override int Read(char[] buffer, int index, int count)
{
int res = 0;
// We leave error handling to the StreamReader :-)
// We handle only "working" parameters
if (RemainingChars != null && buffer != null && index >= 0 && count > 0 && index + count <= buffer.Length)
{
int remainingCharsCount = RemainingChars.Length - RemainingCharsIndex;
res = Math.Min(remainingCharsCount, count);
Array.Copy(RemainingChars, RemainingCharsIndex, buffer, index, res);
RemainingCharsIndex += res;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
if (res == count)
{
return res;
}
index += res;
count -= res;
}
while (true)
{
List<char> sb = null;
int res2 = base.Read(buffer, index, count);
if (res2 == 0 || ReplaceWith == null)
{
return res;
}
int j = 0;
for (int i = 0; i < res2; i++)
{
char ch = buffer[index + i];
string replace = ReplaceWith(ch);
if (sb != null)
{
if (replace == null)
{
sb.Add(ch);
}
else
{
sb.AddRange(replace);
}
}
else if (replace == null)
{
buffer[j] = ch;
j++;
}
else if (replace.Length == 1)
{
buffer[j] = replace[0];
j++;
}
else if (replace.Length == 0)
{
// We do not advance
}
else
{
sb = new List<char>();
sb.AddRange(replace);
}
}
res2 = j;
if (sb != null)
{
int res3 = Math.Min(sb.Count, count - res2);
sb.CopyTo(0, buffer, index + res2, res3);
if (res3 < sb.Count)
{
RemainingChars = new char[sb.Count - res3];
RemainingCharsIndex = 0;
sb.CopyTo(res3, RemainingChars, 0, RemainingChars.Length);
}
res += res3;
}
else
{
res2 = j;
// Can't happen if sb != null (at least a character must
// have been added)
if (res2 == 0)
{
continue;
}
}
res += res2;
return res;
}
}
}
Use it like:
using (var sr = new ReplacingStreamReader(f))
{
sr.ReplaceWith = x =>
{
return x == 0x6 ? " " : null;
// return x == '.' ? " " : null; // Replace all . with
};
var doc = XmlReader.Create(sr, settings);
Be aware that the ReplacingStreamReader doesn't "know" which part of the xml it is modifying, so rarely a "blind" replace is ok :-) Other than this limitation, you can replace any character with any string (null in the ReplaceWith means "keep the current character", equivalent to x.ToString() in the example given. Returning string.Empty is valid, means remove the current character).
The class is quite interesting: it keeps a char[] RemainingChars with the chars that have been read (and filtered by ReplaceWith) but that haven't been returned by a Read() method because the passed buffer was too much small (the ReplaceWith method could "enlarge" the read string, making it too much big for the buffer!). Note that sb is a List<char> instead of a StringBuilder. Probably using one or the other would be nearly equivalent, code-wise.
You could first read the content into a string replace (escape) the content, and then load it into a XmlReader:
foreach (var f in filenames) {
string text;
using (StreamReader s = new StreamReader(f,Encoding.UTF8)) {
text = s.ReadToEnd();
}
text = text.Replace("\x01",#""); //replace the content
//load some settings
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[#"X1.DTD"] = #"\\location\X1.DTD";
resolver.DtdFileMap[#"R2.DTD"] = #"\\location\X2.DTD";
resolver.DtdFileMap[#"R5.DTD"] = #"\\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(text, settings);
//perform processing task
//...
}
Edit: See my Solution below...
I had the following Problem to solve:
We receive Files (mostly adress-Information) from different sources, these can be in Windows Standard with CR/LF ('\r''\n') as Line Break or UNIX with LF ('\n').
When reading text in using the StreamReader.ReadLine() method, this is no Problem because it handles both cases equally.
The Problem occurs when you have a CR or a LF somewhere in the File that is not supposed to be there.
This happens for example if you Export a EXCEL-File with Cells that contain LineBreaks within the Cell to .CSV or other Flat-Files.
Now you have a File that for example has the following structure:
FirstName;LastName;Street;HouseNumber;PostalCode;City;Country'\r''\n'
Jane;Doe;co James Doe'\n'TestStreet;5;TestCity;TestCountry'\r''\n'
John;Hancock;Teststreet;1;4586;TestCity;TestCounty'\r''\n'
Now the StreamReader.ReadLine() Method reads the First Line as:
FirstName;LastName;Street;HouseNumber;PostalCode;City;Country
Which is fine but the seccond Line will be:
Jane;Doe;co James Doe
This will either break your Code or you will have false Results, as the following Line will be:
TestStreet;5;TestCity;TestCountry
So we usualy ran the File trough a tool that checks if there are loose '\n' or '\r' arround and delete them.
But this step is easy to Forget and so I tried to implement a ReadLine() method of my own. The requirement was that it would be able to use one or two LineBreak characters and those characters could be defined freely by the consuming logic.
This is the Class that I came up with:
public class ReadFile
{
private FileStream file;
private StreamReader reader;
private string fileLocation;
private Encoding fileEncoding;
private char lineBreak1;
private char lineBreak2;
private bool useSeccondLineBreak;
private bool streamCreated = false;
private bool endOfStream;
public bool EndOfStream
{
get { return endOfStream; }
set { endOfStream = value; }
}
public ReadFile(string FileLocation, Encoding FileEncoding, char LineBreak1, char LineBreak2, bool UseSeccondLineBreak)
{
fileLocation = FileLocation;
fileEncoding = FileEncoding;
lineBreak1 = LineBreak1;
lineBreak2 = LineBreak2;
useSeccondLineBreak = UseSeccondLineBreak;
}
public string ReadLine()
{
if (streamCreated == false)
{
file = new FileStream(fileLocation, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
reader = new StreamReader(file, fileEncoding);
streamCreated = true;
}
StringBuilder builder = new StringBuilder();
char[] buffer = new char[1];
char lastChar = new char();
char currentChar = new char();
bool first = true;
while (reader.EndOfStream != true)
{
if (useSeccondLineBreak == true)
{
reader.Read(buffer, 0, 1);
lastChar = currentChar;
if (currentChar == lineBreak1 && buffer[0] == lineBreak2)
{
break;
}
else
{
currentChar = buffer[0];
}
if (first == false)
{
builder.Append(lastChar);
}
else
{
first = false;
}
}
else
{
reader.Read(buffer, 0, 1);
if (buffer[0] == lineBreak1)
{
break;
}
else
{
currentChar = buffer[0];
}
builder.Append(currentChar);
}
}
if (reader.EndOfStream == true)
{
EndOfStream = true;
}
return builder.ToString();
}
public void Close()
{
if (streamCreated == true)
{
reader.Close();
file.Close();
}
}
}
This code works fine, it does what it is supposed to do but compared to the original StreamReader.ReadLine() method, it is ~3 Times slower. As we work with large row-Counts the difference is not only messured but also reflected in real world Performance.
(for 700'000 Rows it takes ~ 5 Seconds to read all Lines, extract a Chunk and write it to a new File, with my method it takes ~15 Seconds on my system)
I tried different aproaches with bigger buffers but so far I wasn't able to increase Performance.
What I would be interessted in:
Any suggestions how I could improve the performance of this code to get closer to the original Performance of StreamReader.ReadLine()?
Solution:
This now takes ~6 Seconds (compared to ~5 Sec using the Default 'StreamReader.ReadLine()' ) for 700'000 Rows to do the same things as the code above does.
Thanks Jim Mischel for pointing me in the right direction!
public class ReadFile
{
private FileStream file;
private StreamReader reader;
private string fileLocation;
private Encoding fileEncoding;
private char lineBreak1;
private char lineBreak2;
private bool useSeccondLineBreak;
const int BufferSize = 8192;
int bufferedCount;
char[] rest = new char[BufferSize];
int position = 0;
char lastChar;
bool useLastChar;
private bool streamCreated = false;
private bool endOfStream;
public bool EndOfStream
{
get { return endOfStream; }
set { endOfStream = value; }
}
public ReadFile(string FileLocation, Encoding FileEncoding, char LineBreak1, char LineBreak2, bool UseSeccondLineBreak)
{
fileLocation = FileLocation;
fileEncoding = FileEncoding;
lineBreak1 = LineBreak1;
lineBreak2 = LineBreak2;
useSeccondLineBreak = UseSeccondLineBreak;
}
private int readInBuffer()
{
return reader.Read(rest, 0, BufferSize);
}
public string ReadLine()
{
StringBuilder builder = new StringBuilder();
bool lineFound = false;
if (streamCreated == false)
{
file = new FileStream(fileLocation, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 8192);
reader = new StreamReader(file, fileEncoding);
streamCreated = true;
bufferedCount = readInBuffer();
}
while (lineFound == false && EndOfStream != true)
{
if (position < bufferedCount)
{
for (int i = position; i < BufferSize; i++)
{
if (useLastChar == true)
{
useLastChar = false;
if (rest[i] == lineBreak2)
{
count++;
position = i + 1;
lineFound = true;
break;
}
else
{
builder.Append(lastChar);
}
}
if (rest[i] == lineBreak1)
{
if (useSeccondLineBreak == true)
{
if (i + 1 <= BufferSize - 1)
{
if (rest[i + 1] == lineBreak2)
{
position = i + 2;
lineFound = true;
break;
}
else
{
builder.Append(rest[i]);
}
}
else
{
useLastChar = true;
lastChar = rest[i];
}
}
else
{
position = i + 1;
lineFound = true;
break;
}
}
else
{
builder.Append(rest[i]);
}
position = i + 1;
}
}
else
{
bufferedCount = readInBuffer();
position = 0;
}
}
if (reader.EndOfStream == true && position == bufferedCount)
{
EndOfStream = true;
}
return builder.ToString();
}
public void Close()
{
if (streamCreated == true)
{
reader.Close();
file.Close();
}
}
}
The way to speed this up would be to have it read more than one character at a time. For example, create a 4 kilobyte buffer, read data into that buffer, and then go character-by-character. If you copy character-by-character to a StringBuilder, it's pretty easy.
The code below shows how to parse out lines in a loop. You'd have to split this up so that it can maintain state between calls, but it should give you the idea.
const int BufferSize = 4096;
const string newline = "\r\n";
using (var strm = new StreamReader(....))
{
int newlineIndex = 0;
var buffer = new char[BufferSize];
StringBuilder sb = new StringBuilder();
int charsInBuffer = 0;
int bufferIndex = 0;
char lastChar = (char)-1;
while (!(strm.EndOfStream && bufferIndex >= charsInBuffer))
{
if (bufferIndex > charsInBuffer)
{
charsInBuffer = strm.Read(buffer, 0, buffer.Length);
if (charsInBuffer == 0)
{
// nothing read. Must be at end of stream.
break;
}
bufferIndex = 0;
}
if (buffer[bufferIndex] == newline[newlineIndex])
{
++newlineIndex;
if (newlineIndex == newline.Length)
{
// found a line
Console.WriteLine(sb.ToString());
newlineIndex = 0;
sb = new StringBuilder();
}
}
else
{
if (newlineIndex > 0)
{
// copy matched newline characters
sb.Append(newline.Substring(0, newlineIndex));
newlineIndex = 0;
}
sb.Append(buffer[bufferIndex]);
}
++bufferIndex;
}
// Might be a line left, without a newline
if (newlineIndex > 0)
{
sb.Append(newline.Substring(0, newlineIndex));
}
if (sb.Length > 0)
{
Console.WriteLine(sb.ToString());
}
}
You could optimize this a bit by keeping track of the starting position so that when you find a line you create a string from buffer[start] to buffer[current], without creating a StringBuilder. Instead you call the String(char[], int32, int32) constructor. That's a little tricky to handle when you cross a buffer boundary. Probably would want to handle crossing the buffer boundary as a special case and use a StringBuilder for temporary storage in that case.
I wouldn't bother with that optimization, though, until after I got this first version working.
I try to rewrite pct2rgb.py script into C# (this script gets 8-bit TIFF image and changes the bit depth of it into 24-bit). It worked when its output was stored on the HDD. I want to change the script a little. It should return Dataset on-the-fly w/o storing it on the disk. It works... Almost... The problem is that the returned Dataset contains monochromatic data... I don't really know why. Maybe it's caused by the change of a driver from GTiff to MEM (Driver gTiffDriver = Gdal.GetDriverByName("MEM");). But I don't know how to use GTiff driver and not to store the data on the HDD...
Here's my class:
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using OSGeo.GDAL;
using OSGeo.OGR;
using OSGeo.OSR;
using Driver = OSGeo.GDAL.Driver;
using Encoder = System.Drawing.Imaging.Encoder;
using PixelFormat = System.Drawing.Imaging.PixelFormat;
namespace RasterLoader
{
public class TiffConverter
{
private string _format = "GTiff";
private string _srcFileName = null;
private int _outBands = 3;
private int _bandNumber = 1;
private string[] _args;
public TiffConverter(string[] args)
{
_args = args;
}
public Dataset Convert8To24Bit()
{
Gdal.AllRegister();
string[] argv = Gdal.GeneralCmdLineProcessor(_args, 0);
int i = 1;
while (i < argv.Count())
{
string arg = argv.ElementAt(i);
switch (arg)
{
case "-of":
i++;
_format = argv[i];
break;
case "-b":
i++;
_bandNumber = int.Parse(argv[i]);
break;
case "-rgba":
_outBands = 4;
break;
default:
if (string.IsNullOrEmpty(_srcFileName))
{
_srcFileName = argv[i];
}
else
{
Usage();
}
break;
}
i++;
}
string tmpFileName = _srcFileName + ".bak";
// open source file
Dataset srcDS = Gdal.Open(_srcFileName, Access.GA_ReadOnly);
if (srcDS == null)
{
throw new Exception("Unable to open " + _srcFileName + ".");
}
Band srcBand = srcDS.GetRasterBand(_bandNumber);
// ensure we recognise the driver
Driver dstDriver = Gdal.GetDriverByName(_format);
if (dstDriver == null)
{
throw new Exception("\"" + _format + "\" not registered.");
}
// build color table
int[][] lookup = new int[4][];
lookup[0] = Enumerable.Range(0, 256).ToArray();
lookup[1] = Enumerable.Range(0, 256).ToArray();
lookup[2] = Enumerable.Range(0, 256).ToArray();
lookup[3] = new int[256];
for (i = 0; i < 256; i++)
{
lookup[3][i] = 255;
}
ColorTable ct = srcBand.GetRasterColorTable();
if (ct != null)
{
for (i = 0; i < Math.Min(256, ct.GetCount()); i++)
{
ColorEntry entry = ct.GetColorEntry(i);
for (int j = 0; j < 4; j++)
{
switch (j)
{
case 0:
lookup[j][i] = entry.c1;
break;
case 1:
lookup[j][i] = entry.c2;
break;
case 2:
lookup[j][i] = entry.c3;
break;
case 3:
lookup[j][i] = entry.c4;
break;
}
}
}
}
// create the working file
string tifFileName = string.Empty;
if (_format.Equals("GTiff", StringComparison.OrdinalIgnoreCase))
{
tifFileName = tmpFileName;
}
else
{
tifFileName = Path.Combine(Directory.GetParent(tmpFileName).Name, "temp.gif");
}
// Driver gTiffDriver = Gdal.GetDriverByName("GTiff");
Driver gTiffDriver = Gdal.GetDriverByName("MEM");
Dataset tifDS = gTiffDriver.Create(tifFileName, srcDS.RasterXSize, srcDS.RasterYSize, _outBands, DataType.GDT_Byte,
new string[] {});
// we should copy projection information and so forth at this point
tifDS.SetProjection(srcDS.GetProjection());
double[] geotransform = new double[6];
srcDS.GetGeoTransform(geotransform);
tifDS.SetGeoTransform(geotransform);
if (srcDS.GetGCPCount() > 0)
{
tifDS.SetGCPs(srcDS.GetGCPs(), srcDS.GetGCPProjection());
}
// do the processing one scanline at a time
for (int iY = 0; iY < srcDS.RasterYSize; iY++)
{
byte[] srcData = new byte[srcDS.RasterXSize*1];
srcBand.ReadRaster(0, iY, srcDS.RasterXSize, 1, srcData, srcDS.RasterXSize, 1, 0, 0);
for (int iBand = 0; iBand < _outBands; iBand++)
{
int[] bandLookup = lookup[iBand];
int[] dstData = new int[srcData.Count()];
for (int index = 0; index < srcData.Count(); index++)
{
byte b = srcData[index];
dstData[index] = bandLookup[b];
}
tifDS.GetRasterBand(iBand + 1).WriteRaster(0, iY, srcDS.RasterXSize, 1, dstData,
srcDS.RasterXSize, 1, 0, 0);
}
}
return tifDS;
}
private void Usage()
{
Console.WriteLine("Usage: pct2rgb.py [-of format] [-b <band>] [-rgba] source_file dest_file");
throw new Exception("Bad arguments.");
}
}
Ok. I found a solution. My problem was I tried to convert all the files (even in a good format). Then ct variable (ColorTable) was null.
Solution:
if (ct != null)
{
// same code here
}
else
{
return srcDS;
}
What C# template engine
that uses 'pure' HTML having only text and markers
sans any control flow like if, while, loop or expressions,
separating html from control code ?
Below is the example phone book list code,
expressing how this should be done:
string html=#"
<html><head><title>#title</title></head>
<body>
<table>
<tr>
<td> id</td> <td> name</td> <td> sex</td> <td>phones</td>
</tr><!--#contacts:-->
<tr>
<td>#id</td> <td>#name</td> <td>#sex</td>
<td>
<!--#phones:-->#phone <br/>
<!--:#phones-->
</td>
</tr><!--:#contacts-->
</table>
</body>
</html>";
var contacts = from c in db.contacts select c;
Marker m = new Marker(html);
Filler t = m.Mark("title");
t.Set("Phone book");
Filler c = m.Mark("contacts", "id,name,sex");
// **foreach** expressed in code, not in html
foreach(var contact in contacts) {
int id = contact.id;
c.Add(id, contact.name, contact.sex);
Filler p = c.Mark("phones", "phone");
var phones = from ph in db.phones
where ph.id == id
select new {ph.phone};
if (phones.Any()) {
foreach(var ph in phones) {
p.Add(ph);
}
} else {
fp.Clear();
}
}
Console.Out.WriteLine(m.Get());
Use this code:
Templet.cs
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace templaten.com.Templaten
{
public class tRange
{
public int head, toe;
public tRange(int _head, int _toe)
{
head = _head;
toe = _toe;
}
}
public enum AType
{
VALUE = 0,
NAME = 1,
OPEN = 2,
CLOSE = 3,
GROUP = 4
}
public class Atom
{
private AType kin;
private string tag;
private object data;
private List<Atom> bag;
public Atom(string _tag = "",
AType _kin = AType.VALUE,
object _data = null)
{
tag = _tag;
if (String.IsNullOrEmpty(_tag))
_kin = AType.GROUP;
kin = _kin;
if (_kin == AType.GROUP)
bag = new List<Atom>();
else
bag = null;
data = _data;
}
public AType Kin
{
get { return kin; }
}
public string Tag
{
get { return tag; }
set { tag = value; }
}
public List<Atom> Bag
{
get { return bag; }
}
public object Data
{
get { return data; }
set { data = value; }
}
public int Add(string _tag = "",
AType _kin = AType.VALUE,
object _data = null)
{
if (bag != null)
{
bag.Add(new Atom(_tag, _kin, _data));
return bag.Count - 1;
}
else
{
return -1;
}
}
}
public class Templet
{
private string content;
string namepat = "\\w+";
string justName = "(\\w+)";
string namePre = "#";
string namePost = "";
string comment0 = "\\<!--\\s*";
string comment1 = "\\s*--\\>";
private Atom tokens; // parsed contents
private Dictionary<string, int> iNames; // name index
private Dictionary<string, tRange> iGroups; // groups index
private Atom buffer; // output buffer
private Dictionary<string, int> _iname; // output name index
private Dictionary<string, tRange> _igroup; // output index
public Templet(string Content = null)
{
Init(Content);
}
private int[] mark(string[] names, string group)
{
if (names == null || names.Length < 1) return null;
tRange t = new tRange(0, buffer.Bag.Count - 1);
if (group != null)
{
if (!_igroup.ContainsKey(group)) return null;
t = _igroup[group];
}
int[] marks = new int[names.Length];
for (int i = 0; i < marks.Length; i++)
marks[i] = -1;
for (int i = t.head; i <= t.toe; i++)
{
if (buffer.Bag[i].Kin == AType.NAME)
{
for (int j = 0; j < names.Length; j++)
{
if (String.Compare(
names[j],
buffer.Bag[i].Tag,
true) == 0)
{
marks[j] = i;
break;
}
}
}
}
return marks;
}
public Filler Mark(string group, string names)
{
Filler f = new Filler(this, names);
f.di = mark(f.names, group);
f.Group = group;
tRange t = null;
if (_igroup.ContainsKey(group)) t = _igroup[group];
f.Range = t;
return f;
}
public Filler Mark(string names)
{
Filler f = new Filler(this, names);
f.di = mark(f.names, null);
f.Group = "";
f.Range = null;
return f;
}
public void Set(int[] locations, object[] x)
{
int j = Math.Min(x.Length, locations.Length);
for (int i = 0; i < j; i++)
{
int l = locations[i];
if ((l >= 0) && (buffer.Bag[l] != null))
buffer.Bag[l].Data = x[i];
}
}
public void New(string group, int seq = 0)
{
// place new group copied from old group just below it
if (!( iGroups.ContainsKey(group)
&& _igroup.ContainsKey(group)
&& seq > 0)) return;
tRange newT = null;
tRange t = iGroups[group];
int beginRange = _igroup[group].toe + 1;
for (int i = t.head; i <= t.toe; i++)
{
buffer.Bag.Insert(beginRange,
new Atom(tokens.Bag[i].Tag,
tokens.Bag[i].Kin,
tokens.Bag[i].Data));
beginRange++;
}
newT = new tRange(t.toe + 1, t.toe + (t.toe - t.head + 1));
// rename past group
string pastGroup = group + "_" + seq;
t = _igroup[group];
buffer.Bag[t.head].Tag = pastGroup;
buffer.Bag[t.toe].Tag = pastGroup;
_igroup[pastGroup] = t;
// change group indexes
_igroup[group] = newT;
}
public void ReMark(Filler f, string group)
{
if (!_igroup.ContainsKey(group)) return;
Map(buffer, _iname, _igroup);
f.di = mark(f.names, group);
f.Range = _igroup[group];
}
private static void Indexing(string aname,
AType kin,
int i,
Dictionary<string, int> dd,
Dictionary<string, tRange> gg)
{
switch (kin)
{
case AType.NAME: // index all names
dd[aname] = i;
break;
case AType.OPEN: // index all groups
if (!gg.ContainsKey(aname))
gg[aname] = new tRange(i, -1);
else
gg[aname].head = i;
break;
case AType.CLOSE:
if (!gg.ContainsKey(aname))
gg[aname] = new tRange(-1, i);
else
gg[aname].toe = i;
break;
default:
break;
}
}
private static void Map(Atom oo,
Dictionary<string, int> dd,
Dictionary<string, tRange> gg)
{
for (int i = 0; i < oo.Bag.Count; i++)
{
string aname = oo.Bag[i].Tag;
Indexing(oo.Bag[i].Tag, oo.Bag[i].Kin, i, dd, gg);
}
}
public void Init(string Content = null)
{
content = Content;
tokens = new Atom("", AType.GROUP);
iNames = new Dictionary<string, int>();
iGroups = new Dictionary<string, tRange>();
// parse content into tokens
string namePattern = namePre + namepat + namePost;
string patterns =
"(?<var>" + namePattern + ")|" +
"(?<head>" + comment0 + namePattern + ":" + comment1 + ")|" +
"(?<toe>" + comment0 + ":" + namePattern + comment1 + ")";
Regex jn = new Regex(justName, RegexOptions.Compiled);
Regex r = new Regex(patterns, RegexOptions.Compiled);
MatchCollection ms = r.Matches(content);
int pre = 0;
foreach (Match m in ms)
{
tokens.Add(content.Substring(pre, m.Index - pre));
int idx = -1;
if (m.Groups.Count >= 3)
{
string aname = "";
MatchCollection x = jn.Matches(m.Value);
if (x.Count > 0 && x[0].Groups.Count > 1)
aname = x[0].Groups[1].ToString();
AType t = AType.VALUE;
if (m.Groups[1].Length > 0) t = AType.NAME;
if (m.Groups[2].Length > 0) t = AType.OPEN;
if (m.Groups[3].Length > 0) t = AType.CLOSE;
if (aname.Length > 0)
{
tokens.Add(aname, t);
idx = tokens.Bag.Count - 1;
}
Indexing(aname, t, idx, iNames, iGroups);
}
pre = m.Index + m.Length;
}
if (pre < content.Length)
tokens.Add(content.Substring(pre, content.Length - pre));
// copy tokens into buffer
buffer = new Atom("", AType.GROUP);
for (int i = 0; i < tokens.Bag.Count; i++)
buffer.Add(tokens.Bag[i].Tag, tokens.Bag[i].Kin);
// initialize index of output names
_iname = new Dictionary<string, int>();
foreach (string k in iNames.Keys)
_iname[k] = iNames[k];
// initialize index of output groups
_igroup = new Dictionary<string, tRange>();
foreach (string k in iGroups.Keys)
{
tRange t = iGroups[k];
_igroup[k] = new tRange(t.head, t.toe);
}
}
public string Get()
{
StringBuilder sb = new StringBuilder("");
for (int i = 0; i < buffer.Bag.Count; i++)
{
switch (buffer.Bag[i].Kin)
{
case AType.VALUE:
sb.Append(buffer.Bag[i].Tag);
break;
case AType.NAME:
sb.Append(buffer.Bag[i].Data);
break;
case AType.OPEN:
case AType.CLOSE:
break;
default: break;
}
}
return sb.ToString();
}
}
public class Filler
{
private Templet t = null;
public int[] di;
public string[] names;
public string Group { get; set; }
public tRange Range { get; set; }
private int seq = 0;
public Filler(Templet tl, string markers = null)
{
t = tl;
if (markers != null)
names = markers.Split(new char[] { ',' },
StringSplitOptions.RemoveEmptyEntries);
else
names = null;
}
public void init(int length)
{
di = new int[length];
for (int i = 0; i < length; i++)
di[i] = -1;
seq = 0;
Group = "";
Range = null;
}
// clear contents inside marked object or group
public void Clear()
{
object[] x = new object[di.Length];
for (int i = 0; i < di.Length; i++)
x[i] = null;
t.Set(di, x);
}
// set value for marked object,
// or add row to group and set value to columns
public void Set(params object[] x)
{
t.Set(di, x);
}
public void Add(params object[] x)
{
if (Group.Length > 0)
{
t.New(Group, seq);
++seq;
t.ReMark(this, Group);
}
t.Set(di, x);
}
}
}
Testing program
Program.cs
Templet m = new Templet(html);
Filler f= m.Mark("title");
f.Set("Phone book");
Filler fcontacts = m.Mark("contacts", "id,name,sex,phone");
fcontacts.Add(1, "Akhmad", "M", "123456");
fcontacts.Add(2, "Barry", "M", "234567");
fcontacts.Add(1, "Charles", "M", "345678");
Console.Out.WriteLine(m.Get());
Still can't do nested loop- yet.
Just use ASP.NET. Whether you use webforms or MVC, it's super easy to have C# in your .cs files, and HTML in your .aspx files.
As with anything in programming, it's 99% up to you to do things right. Flexible UI engines aren't going to enforce that you follow good coding practices.
In principle most any template engine you choose can separate HTML from control logic with the proper architecture. using an MVC (Or MVVM) pattern, if you construct your model in such a way that the controller contains the if/then logic instead of the view you can eliminate it from the view.
That said, the syntax you use is very close to Razor syntax which is easily available for ASP.NET MVC through NuGet packages.
I totally hear you. I built SharpFusion, which has some other stuff in it but if you look for the template.cs file you will see the handler that parses a HTML file and simply replaces out tokens with values that you've made in c#.
Because no XML parsing is done like ASP.NET the framework loads much faster than even an MVC site.
Another alternative is ServiceStack.