When exporting my data from sql to excel, it starts with the second line and not the first. I think I've narrowed down the problem to the streamwriter section of code but can't seem to work out where it's going wrong!
This is the code;
public static void ToCsv3(IDataReader myReader, string fileName, bool includeHeaderAsFirstRow)
{
const string Separator = ",";
Stream s = File.Create(fileName + ".txt");
StreamWriter streamWriter = new StreamWriter(s, Encoding.Unicode);
StringBuilder sb = null;
if (includeHeaderAsFirstRow)
{
sb = new StringBuilder();
for (int index = 0; index < myReader.FieldCount; index++)
{
if (myReader.GetName(index) != null)
sb.Append(myReader.GetName(index));
if (index < myReader.FieldCount - 1)
sb.Append(Separator);
}
streamWriter.WriteLine(sb.ToString());
}
int j = 0;
while (myReader.Read())
{
sb = new StringBuilder();
for (int index = 0; index < myReader.FieldCount - 1; index++)
{
if (!myReader.IsDBNull(index))
{
string value = myReader.GetValue(index).ToString();
if (myReader.GetFieldType(index) == typeof(String))
{
if (value.IndexOf("\"") >= 0)
value = value.Replace("\"", "\"\"");
if (value.IndexOf(Separator) >= 0)
value = "\"" + value + "\"";
}
if (j != 0)
{
if (index == 0)
{
sb.Append(Environment.NewLine);
}
}
sb.Append(value);
j = j + 1;
}
if (index < myReader.FieldCount - 1)
sb.Append(Separator);
}
if (!myReader.IsDBNull(myReader.FieldCount - 1))
sb.Append(myReader.GetValue(myReader.FieldCount).ToString().Replace(Separator, " "));
streamWriter.Write(sb.ToString());
}
myReader.Close();
streamWriter.Close();
}
I'd rather decompose the solution into creating CSV and saving it to the file:
public static IEnumerable<String> ToCsv(IDataReader reader,
Boolean includeHeaderAsFirstRow,
Char separator = ',',
Char quotation = '"') {
if (null == reader)
throw new ArgumentNullException("reader");
String qt = quotation.ToString();
StringBuilder Sb = new StringBuilder();
if (includeHeaderAsFirstRow) {
for (int i = 0; i < reader.FieldCount; ++i) {
if (i > 0)
Sb.Append(separator);
String name = reader.GetName(i);
if (name.Contains(separator) || name.Contains(quotation))
name = qt + name.Replace(qt, qt + qt) + qt;
Sb.Append(name);
}
yield return Sb.ToString();
}
while (reader.Read()) {
Sb.Clear();
for (int i = 0; i < reader.FieldCount; ++i) {
if (i > 0)
Sb.Append(separator);
if (!reader.IsDBNull(i)) {
String item = Convert.ToString(reader[i]);
if (item.Contains(separator) || item.Contains(quotation))
item = qt + item.Replace(qt, qt + qt) + qt;
Sb.Append(item);
}
}
yield return Sb.ToString();
}
}
public static void CsvToFile(String fileName,
IDataReader reader,
Char separator = ',',
Char quotation = '"') {
if (String.IsNullOrEmpty(Path.GetExtension(fileName)))
fileName += ".txt"; // ".csv" looks better here
File.WriteAllLines(fileName, ToCsv(reader, separator, quotation));
}
Related
So I've finished Huffman compression algorithm, but it will work only if a .txt file doesn't contain , / ;
So I want to ignore only these symbols
What I've got:
Dictionary of a character and its code, separated by comma. Each pair separated by ;
/ at the end of a dictionary (because next symbol is number of added zeros)
Example:
c,1;a,00;t,01;/3
Of course, when I type in file something like "I love dogs, cats", I've got problems:) Like this one: System.FormatException: "String must be exactly one character long."
Here's my code:
Dictionary<char, string> forDecoding = new Dictionary<char, string>();
using (sr = new StreamReader(fileName))
{
string line = sr.ReadToEnd();
string[] lines = line.Split('/');
string[] dict = lines[0].Split(';');
string[] item = null;
foreach (var v in lines[0])
{
encoded.Add(v);
}
count = Convert.ToInt32(lines[1]);
for (int i = 0; i < dict.Length - 1; i++)
{
item = dict[i].Split(',');
forDecoding.Add(Convert.ToChar(item[0]), item[1]);
}
}
Is there any way to modify it in order to see that comma inside like another symbol of input information that has to be encoded?
Try the following:
Dictionary<char, string> forDecoding = new Dictionary<char, string>();
...
private void LoadEncodedData(string filename)
{
//remove existing items
forDecoding.Clear();
using (System.IO.StreamReader sr = new System.IO.StreamReader(filename))
{
int fileLength = 0;
char previousChar = char.MinValue;
char secondPreviousChar = char.MinValue;
char dictKey = char.MinValue;
string dictVal = string.Empty;
bool isDictVal = false;
bool isNextCharEOF = false;
bool isNumberOfZeros = false;
int numberOfAddedZeros = 0;
string numberOfZerosStr = string.Empty;
while (!sr.EndOfStream)
{
//read char and move position
char currentChar = (char)sr.Read();
//read next char without moving position
char nextChar = (char)sr.Peek();
//65535 = EOF
if (nextChar == 65535)
{
//set val
isNextCharEOF = true;
}
if (!isNextCharEOF && !isDictVal && nextChar == ',')
{
//set value
dictKey = currentChar;
}
else if (!isDictVal && previousChar == ',' && currentChar != ',')
{
//start saving chars for dictionary value
dictVal = currentChar.ToString();
//set value
isDictVal = true;
}
else if (isDictVal && currentChar == ';')
{
System.Diagnostics.Debug.WriteLine("dictKey: '" + dictKey.ToString() + "' dictVal: '" + dictVal + "'");
//add to dictionary
forDecoding.Add(dictKey, dictVal);
//re-initialize
dictVal = string.Empty;
//set value
isDictVal = false;
}
else if (isDictVal)
{
//append
dictVal += currentChar.ToString();
}
else if (!isDictVal && secondPreviousChar == ';' && previousChar == '/' && currentChar != ',')
{
if (!isNextCharEOF)
{
//set value
isNumberOfZeros = true;
numberOfZerosStr = currentChar.ToString();
}
else
{
//set value
numberOfZerosStr = currentChar.ToString();
numberOfAddedZeros = 0;
Int32.TryParse(numberOfZerosStr, out numberOfAddedZeros);
System.Diagnostics.Debug.WriteLine(" numberOfAddedZeros: " + numberOfAddedZeros + System.Environment.NewLine);
//set value
isNumberOfZeros = false;
numberOfZerosStr = string.Empty;
}
}
else if (isNumberOfZeros && (isNextCharEOF || nextChar != ','))
{
//append
numberOfZerosStr += currentChar;
numberOfAddedZeros = 0;
Int32.TryParse(numberOfZerosStr, out numberOfAddedZeros);
System.Diagnostics.Debug.WriteLine(" numberOfAddedZeros: " + numberOfAddedZeros + System.Environment.NewLine);
//set value
isNumberOfZeros = false;
numberOfZerosStr = string.Empty;
}
else if (isNumberOfZeros)
{
//append
numberOfZerosStr += currentChar;
}
//set value
secondPreviousChar = previousChar;
previousChar = currentChar;
fileLength += 1; //increment
}
}
}
Example data: c,1;/,00;t,01;/3
I have to find subtext in text without using builtin function of string.
public static void Main(string[] args)
{
string subtext = "polly";
string text = "polly put the katle on,polly put the katle on,polly put the katle on,we all have tea";
int i, j, found;
int strLen, wordLen;
strLen = text.Length;
wordLen = subtext.Length;
for (i = 0; i < strLen - wordLen; i++)
{
found = 1;
for (j = 0; j < wordLen; j++)
{
if (text[i + j] != subtext[j])
{
found = 0;
break;
}
}
if (found == 1)
{
Console.WriteLine(" found at index:", subtext, i);
Console.ReadLine();
}
}
}
I am not sure how long you would like to search, your current code seems to find all indexes (or at least that seems to be the intent)
Some things you could change however is instead of always starting the loop, you could validate the if the char at position i matches the first char of the subtext, and if not continue.
When you want to write the data to the console, don't forget to add the spaceholders for your arguments, like:
Console.WriteLine("found {0} at index: {1}", subtext, i);
For the rest, I guess your current implementation is okay, but you could add some validations, like ensuring that both texts are available, and if subtext is longer than the text, simply return -1 directly.
For a simple find of first index, I wrote this one up, it still looks pretty similar to yours
private static int FindIn( string text, string sub ) {
if (string.IsNullOrWhiteSpace( text ) || string.IsNullOrWhiteSpace( sub ) ) {
return string.IsNullOrWhiteSpace( sub ) ? 0 : -1;
}
if (text.Length < sub.Length) {
return -1;
}
for (int i = 0; i < text.Length - sub.Length; i++) {
if (text[i] != sub[0]) {
continue;
}
var matched = true;
for (int j = 1; j < sub.Length && i + j < text.Length; j++) {
if (text[i+j] != sub[j]) {
matched = false;
break;
}
}
if (matched) {
return i;
}
}
return -1;
}
Which you can play around with here
There are a lot of pattern-matching algorithms in this book, i will leave here c# implementation of Knuth-Morris-Pratt algorithm.
static int[] GetPrefix(string s)
{
int[] result = new int[s.Length];
result[0] = 0;
int index = 0;
for (int i = 1; i < s.Length; i++)
{
while (index >= 0 && s[index] != s[i]) { index--; }
index++;
result[i] = index;
}
return result;
}
static int FindSubstring(string pattern, string text)
{
int res = -1;
int[] pf = GetPrefix(pattern);
int index = 0;
for (int i = 0; i < text.Length; i++)
{
while (index > 0 && pattern[index] != text[i]) { index = pf[index - 1]; }
if (pattern[index] == text[i]) index++;
if (index == pattern.Length)
{
return res = i - index + 1;
}
}
return res;
}
If you are looking for all occurance of the subtect in the text you can use the following code:
public static void Main(string[] args)
{
string subtext = "polly";
string text = "polly put the katle on,polly put the katle on,polly put the katle on,we all have tea";
int index = 0;
int startPosition = 0;
bool found = false;
while (index < text.Length - 1)
{
if (subtext[0] == text[index])
{
startPosition = index;
index++;
for (int j = 1; j <= subtext.Length - 1; j++)
{
if (subtext[j] != text[index])
{
found = false;
break;
}
else
{
found = true;
}
index++;
}
}
if (found)
{
Console.WriteLine("{0} found at index: {1}", subtext, startPosition);
found = false;
}
index++;
}
Console.ReadLine();
}
If you are looking only for the first occurance add break in the "if (found)" condition
I want to search some lines containing some specific data and store these data in a list, how can I do this? I have written this code for my project, but this code can only find and store the first line containing my wanted data because all these lines are started with the same structure, for an example, my data is repeated in lines 100, 250, 400, 660, and all these lines are stated with "|PROD |OIL |" .
double[] oil_prdc = new double[10];
double[] water_prdc = new double[10];
double[] gas_prdc = new double[10];
double[] water_injc = new double[10];
double[] gas_injc = new double[10];
int length_time = 5;
string[][] characters = new string[2391][];
string [] Charss;
string[][]counter = new string[20][];
while ((line = file.ReadLine()) != null)
{
for (int i = 0; i < length_time; i++)
{
total_production.Add(null);
total_production[i] = new List<_5_points>();
while (line.Contains("|PROD |OIL |"))
{
Charss = line.Split('|');
for (int j = 0; j < Charss.Length; j++)
{
if (j == 9)
{
oil_prdc[i] = 1000 * Convert.ToDouble(Charss[j]);
}
else if (j == 10)
{
water_prdc[i] = Convert.ToDouble(Charss[j]);
}
else if (j == 11)
{
gas_prdc[i] = 1000 * Convert.ToDouble(Charss[j]);
}
}
count++;
if (count > Charss.Length) break;
}
while (line.Contains(" |WINJ |WAT "))
{
Charss = line.Split('|');
for (int jj = 0; jj < Charss.Length; jj++)
{
if (jj == 8)
{
water_injc[i] = 1000 * Convert.ToDouble(Charss[jj]);
}
}
count++;
if (count > Charss.Length) break;
}
while (line.Contains(" |GINJ |Passive "))
{
Charss = line.Split('|');
for (int ij = 0; ij < Charss.Length; ij++)
{
if (ij == 9)
{
gas_injc[i] = 1000 * Convert.ToDouble(Charss[ij]);
}
}
count++;
if (count > Charss.Length) break;
}
_5_points temp=new _5_points{OIL_PRD =oil_prdc[i],GAS_PRD=gas_prdc[i],WATER_PRD=water_prdc[i],WATER_INJ=water_injc[i],GAS_INJ=gas_injc[i]};
total_production[i].Add(temp);
}
}
Example of how to read a file:
try
{
// Create an instance of StreamReader to read from a file.
// The using statement also closes the StreamReader.
using (StreamReader sr = new StreamReader("TestFile.txt"))
{
string line;
// Read and display lines from the file until the end of
// the file is reached.
while ((line = sr.ReadLine()) != null)
{
Console.WriteLine(line);
}
}
}
catch (Exception e)
{
// Let the user know what went wrong.
Console.WriteLine("The file could not be read:");
Console.WriteLine(e.Message);
}
Search for specific text in each "line":
public static string getBetween(string strSource, string strStart, string strEnd)
{
int Start, End;
if (strSource.Contains(strStart) && strSource.Contains(strEnd))
{
Start = strSource.IndexOf(strStart, 0) + strStart.Length;
End = strSource.IndexOf(strEnd, Start);
return strSource.Substring(Start, End - Start);
}
else
{
return "";
}
}
How to use it:
string text = "This is an example string and my data is here";
string data = getBetween(text, "my", "is");
I need a help with iText in C#. I'm trying to extract a table from a PDF file and save this into a new CSV file, keeping the values in the correct places. For this, I thought the solution was to create a two-dimensional array to organize the data.
Extracting all information from PDF with iText, I saw it was possible to get some numerical data that seemed to be the position of a piece of text on the page and I organized my array based these indexes. It didn’t work, the text was completely dispersed in various different cells. Now, I want to know what this values means, because they don't follow a "correct" order and I want to know if is possible to organize the future table with this.
I'm using ";" as delimiter cell.
For testing, I'm using this PDF
http://www.americana.sp.gov.br/americanaV5/download/contasPublicas/Despesa_Categoria_Economica_2014.pdf
Here's my code:
protected void Button2_Click(object sender, EventArgs e)
{
try
{
TextBox2.Text = "";
byte[] conteudo = download(TextBox1.Text);
if (conteudo != null)
{
PdfReader leitorp = new PdfReader(conteudo);
ITextExtractionStrategy estrategia = new SimpleTextExtractionStrategy();
List<Celula> celulas = new List<Celula>();
int i, j;
for (i = 1; i <= leitorp.NumberOfPages; i++)
{
//Total and crude extraction of all information from text in PDF via iText, separate lines in an array of strings.
string[] linhas = (Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, leitorp.GetPageContent(i)))).Split('\n');
for (j = 1; j < linhas.Length; j++)
{
if (linhas[j].Length > 2)
{
if (linhas[j].Substring(0, 2).Equals("BT"))
{
string[] campos = linhas[j].Split(' ');
Celula umacelula = new Celula();
umacelula.coluna = float.Parse(campos[1]);
umacelula.linha = float.Parse(campos[2]);
linhadodebug = j;
int t1 = linhas[j].IndexOf('(');
int t2 = linhas[j].LastIndexOf(')');
umacelula.conteudo = System.Text.RegularExpressions.Regex.Replace((linhas[j].Substring(linhas[j].IndexOf('(') + 1, (linhas[j].LastIndexOf(')') - 1 - linhas[j].IndexOf('(')))), #"\s\s+", "");
celulas.Add(umacelula);
}
}
}
}
leitorp.Close();
string[] totallinhas = new string[celulas.Count];
string[] totalcolunas = new string[celulas.Count];
for (i = 0; i < celulas.Count; i++)
{
totallinhas[i] = celulas[i].linha.ToString();
totalcolunas[i] = celulas[i].coluna.ToString();
}
totallinhas = totallinhas.Distinct().ToArray();
totalcolunas = totalcolunas.Distinct().ToArray();
Array.Sort(totallinhas);
Array.Reverse(totallinhas);
Array.Sort(totalcolunas);
Array.Reverse(totalcolunas);
string[,] matriz = new string[totallinhas.Length + 1, totalcolunas.Length + 1];
for (i = 1; i < totallinhas.Length; i++)
{
matriz[i, 0] = totallinhas[i - 1].ToString();
}
for (i = 1; i < totalcolunas.Length; i++)
{
matriz[0, i] = totalcolunas[i - 1].ToString();
}
int z;
for (i = 0; i < celulas.Count(); i++)
{
for (j = 1; j < matriz.GetLength(0); j++)
{
for (z = 1; z < matriz.GetLength(1); z++)
{
if ((celulas[i].linha.ToString().Equals(matriz[j, 0])) && (celulas[i].coluna.ToString().Equals(matriz[0, z])))
{
matriz[j, z] = celulas[i].conteudo.ToString();
}
}
}
}
StringWriter texto = new StringWriter();
for (i = 0; i < matriz.GetLength(0); i++)
{
for (j = 0; j < matriz.GetLength(1); j++)
{
texto.Write(matriz[i, j] + ";");
}
texto.WriteLine();
}
Response.ContentType = "text/plain";
Response.AddHeader("content-disposition", "attachment;filename=" + string.Format("teste-{0}.csv", string.Format("{0:ddMMyyyy}", DateTime.Today)));
Response.Clear();
using (StreamWriter writer = new StreamWriter(Response.OutputStream, Encoding.UTF8))
{
writer.Write(texto.ToString());
}
Response.End();
}
}
catch (Exception E)
{
TextBox2.Text = "Erro Button2_Click: " + E.Message + " # " + linhadodebug.ToString();
}
}
And here, the struct of celula (cell) and method to download the file:
public struct Celula
{
public float coluna;
public float linha;
public string conteudo;
public Celula(float coluna, float linha, string conteudo)
{
this.coluna = coluna;
this.linha = linha;
this.conteudo = conteudo;
}
public Celula(Celula celula)
{
this.coluna = celula.coluna;
this.linha = celula.linha;
this.conteudo = celula.conteudo;
}
}
protected byte[] download(string url)
{
try
{
WebRequest endereco = HttpWebRequest.Create(url);
Stream leitor = endereco.GetResponse().GetResponseStream();
MemoryStream memoria = new MemoryStream();
byte[] conteudo = null;
int count = 0;
do
{
byte[] buffer = new byte[1024];
count = leitor.Read(buffer, 0, 1024);
memoria.Write(buffer, 0, count);
}
while (leitor.CanRead && count > 0);
// Converte da memória direto para bytes
conteudo = memoria.ToArray();
if (conteudo != null)
{
return conteudo;
}
else
{
TextBox2.Text = "Error: download null.";
return null;
}
}
catch (Exception E)
{
TextBox2.Text = "Error download: " + E.Message;
return null;
}
}
This is a non-profit project. I hope you can help me. Thank you!
I was wondering if someone could help. I am trying to change a string in the below format:
goeirjew98rut34ktljre9t30t4j3der
to be outputted as below:
geo_irje_w
98r_ut34_k
tlj_re9t_3
0t4_j3de_r
So insert an underscore after the third and seventh characters and insert a new line after the eighth.
After doing some reading I thought the best way to do this might be via a regular expression such as the example at the below link:
http://msdn.microsoft.com/en-us/library/xwewhkd1(v=vs.110).aspx
However I am not sure how to modify this for my needs. Any help what so ever would be greatly appreciated.
Thanks
It can be easily solved by a regular expression and a Replace, like this:
var s = "goeirjew98rut34ktljre9t30t4j3der";
Regex.Replace(s, #"(\w{3})(\w{4})(\w{1})", "$1_$2_$3\n").Dump();
This is definitely something that is best solved with a quick loop:
string s = "goeirjew98rut34ktljre9t30t4j3der";
var outputs = new List<string>();
for (var i = 0; i < s.Length; i += 8)
{
string sub = s.Substring(i, Math.Min(8, s.Length - i));
if(sub.Length > 3)
sub = sub.Insert(3, "_");
if (sub.Length > 8)
sub = sub.Insert(8, "_");
outputs.Add(sub);
}
I think you can not insert char in string by this rule, but I hope this code be useful for you.
public string Decode(string str)
{
var state = 0;
var c = 0;
var newStr = string.Empty;
foreach (var ch in str)
{
c++;
newStr += ch;
if (state == 0 && c == 3)
{
newStr += "_";
state = 1;
c = 0;
}
if (state == 1 && c == 4)
{
newStr += "_";
state = 2;
c = 0;
}
if (state == 2 && c == 1)
{
newStr += "\r\n";
state = 0;
c = 0;
}
}
return newStr;
}
I would make it so u can change everything on the fly and use stringextentions
class Program
{
static void Main(string[] args)
{
string str = "goeirjew98rut34ktljre9t30t4j3der";
var parts = str.SplitInParts(8); //Split in different parts
foreach (var part in parts)
{
string formattedString = part.MultiInsert("_", 2, 6); //Insert your char on all position u want
Console.WriteLine(formattedString);
}
Console.ReadKey();
}
}
static class StringExtensions
{
public static IEnumerable<String> SplitInParts(this String s, Int32 partLength)
{
if (s == null)
throw new ArgumentNullException("s");
if (partLength <= 0)
throw new ArgumentException("Part length has to be positive.", "partLength");
for (var i = 0; i < s.Length; i += partLength)
yield return s.Substring(i, Math.Min(partLength, s.Length - i));
}
public static string MultiInsert(this string str, string insertChar, params int[] positions)
{
StringBuilder sb = new StringBuilder(str.Length + (positions.Length * insertChar.Length));
var posLookup = new HashSet<int>(positions);
for (int i = 0; i < str.Length; i++)
{
sb.Append(str[i]);
if (posLookup.Contains(i))
sb.Append(insertChar);
}
return sb.ToString();
}
}