I am trying to scrape data from the webpage. However, I am having a trouble scraping all of data in the table. I need to switch pages to get all the data and I am willing to get an output with DataGridTable. I am having a trouble figuring out how to do this even though there is a change with number of pages they have in the website. I would like to add information automatically on a data grid table pages by pages. My input(Website) is only showing 25 items. Thats why I have 25 items in DataGridTable. I would like to justify a "number of pages" from "go to end page button"'s element. So that my program knows how many pages are there to scrape from the website. but, if there's a different way, I wanna know thank you.
This is my code for now.
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> arr = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
arr.Add(cellValue);
}
cellIndex++;
}
dt.Rows.Add(arr.ToArray());
}
dataGridView1.DataSource = dt;
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
}
This is the table that I am trying to scrape from.
This is the code for the following element that is shown picture above.
<span class="k-icon k-i-arrow-end-right"></span>
Thank you so much.
You may want to consider the index information "1 - 25 out of 64 items", since it is a good indicator of the total number of pages.
Batch = 1 - 25 i.e. 25 items per page
Total items = 64
No. of pages = roundup (64 / 25)
PS: A better option, without any computations, maybe to get the "data-page" attribute of the last page button.
I Finally got the answer for this.
private List<List<string>> GetRecords(IWebElement table)
{
List<List<string>> rows = new List<List<string>>(); ;
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> cols = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
cols.Add(cellValue);
}
cellIndex++;
}
rows.Add(cols);
}
return rows;
}
private void button1_Click(object sender, EventArgs e)
{
//Configure to Hide CMD
var chromeDriverService = ChromeDriverService.CreateDefaultService();
chromeDriverService.HideCommandPromptWindow = true;
//Configure to Hide Chrome
ChromeOptions option = new ChromeOptions();
option.AddArgument("--headless");
//HIDING CHROME UN-COMMNET THE SECOND ONE TO SHOW
//IWebDriver driver = new ChromeDriver(chromeDriverService, option);
IWebDriver driver = new ChromeDriver();
driver.Url = "**************";
driver.Manage().Window.Maximize();
driver.SwitchTo().DefaultContent();
//Log-in
driver.FindElement(By.Id("username")).SendKeys("*****");
driver.FindElement(By.Id("password")).SendKeys("******" + OpenQA.Selenium.Keys.Enter);
//Entering Access Code
driver.FindElement(By.Id("password")).SendKeys("*******");
driver.FindElement(By.Id("accesscode")).SendKeys("********" + OpenQA.Selenium.Keys.Enter);
//go to CustomerList
driver.Navigate().GoToUrl("***********");
driver.Navigate().GoToUrl("*****************");
//Wait till load 3 seconds
waitOnPage(2);
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
List<List<string>> records = GetRecords(table);
// Supposing you want the footer information
var lastPageStr = table.FindElement(By.ClassName("k-pager-last")).GetAttribute("data-page");
var lastPage = Convert.ToInt16(lastPageStr);
// You can select other info lik this
// class="k-link k-pager-nav" data-page="1"
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
// Cycle over the pages
for (int p = 0; p < (lastPage - 1); p++)
{
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > a:nth-child(4) > span")).Click();
waitOnPage(2);
var rows = GetRecords(table);
records.AddRange(rows);
}
// Add all rows to DT
//dt.Rows.Add(records[4].ToArray());
foreach(var row in records)
{
dt.Rows.Add(row.ToArray());
}
dataGridView1.DataSource = dt;
}
Related
I am trying to fill a DataTable with values from dynamically created controls. The idea is to have three columns in the DataTable, and fill them one after the other. First fill column 1, then 2, then 3. However, I am struggling to do this effectively.
What is happening is that the DataTable is being filled, but the Data in columns 1, 2 and 3 is not side by side, but instead being filled in different rows. I know this is because I am adding a row with each loop, but I have tried some other things that did not work. Could you help me out?
DataTable tvpIngredients = new DataTable();
tvpIngredients.Columns.Add("Quantity", typeof(string));
tvpIngredients.Columns.Add("Measure", typeof(string));
tvpIngredients.Columns.Add("Ingredient", typeof(string));
foreach (Control ctrlQtt in quantity.Controls)
{
if (ctrlQtt is TextBox)
{
DataRow drNew = tvpIngredients.NewRow();
TextBox quantity = (TextBox)ctrlQtt;
drNew["Quantity"] = quantity.Text;
tvpIngredients.Rows.Add(drNew);
}
}
foreach (Control ctrlMsr in measure.Controls)
{
if (ctrlMsr is DropDownList)
{
DataRow drNew = tvpIngredients.NewRow();
DropDownList measure = (DropDownList)ctrlMsr;
drNew["Measure"] = measure.SelectedValue.ToString();
tvpIngredients.Rows.Add(drNew);
}
}
foreach (Control ctrlIng in ingredient.Controls)
{
if (ctrlIng is TextBox)
{
DataRow drNew = tvpIngredients.NewRow();
TextBox ingredient = (TextBox)ctrlIng;
drNew["Ingredient"] = ingredient.Text;
tvpIngredients.Rows.Add(drNew);
}
}
Thank you!
You already added rows. You need to modify the row column instead.
DataTable tvpIngredients = new DataTable();
tvpIngredients.Columns.Add("Quantity", typeof(string));
tvpIngredients.Columns.Add("Measure", typeof(string));
tvpIngredients.Columns.Add("Ingredient", typeof(string));
foreach (Control ctrlQtt in quantity.Controls)
{
if (ctrlQtt is TextBox)
{
DataRow drNew = tvpIngredients.NewRow();
TextBox quantity = (TextBox)ctrlQtt;
drNew["Quantity"] = quantity.Text;
tvpIngredients.Rows.Add(drNew);
}
}
for (int i = 1; i < tvpIngredients.Rows.Count; i++)
{
if (ctrlMsr is DropDownList)
{
DataRow drNew = tvpIngredients.NewRow();
DropDownList measure = (DropDownList)ctrlMsr;
drNew["Measure"] = measure.SelectedValue.ToString();
tvpIngredients.Rows[i][1].ToString() = drNew;
}
}
for (int i = 1; i < tvpIngredients.Rows.Count; i++)
{
if (ctrlIng is TextBox)
{
DataRow drNew = tvpIngredients.NewRow();
TextBox ingredient = (TextBox)ctrlIng;
drNew["Ingredient"] = ingredient.Text;
tvpIngredients.Rows[i][2].ToString() = drNew;
}
}
To solve a problem like this you need to think about the way you model your data.
Looking at the code here I can guess that you have a Form let's call it Recipe that has at least 3 controls quantity, measure, and ingredients. These controls can contain an arbitrary number of items. It looks like they would be of equal length given that they are each related to one item in your recipe.
You could do this:
if(quantity.Controls.Count != measure.Controls.Count
|| quantity.Controls.Count != ingredient.Controls.Count)
{
throw new Exception("Invalid ingredients list");
}
var quantities = new Control[quantity.Controls.Count];
var measurements = new Control[measure.Controls.Count];
var ingredients = new Control[ingredient.Controls.Count];
quantity.Controls.CopyTo(quantities, 0);
measure.Controls.CopyTo(measurements , 0);
ingredient.Controls.CopyTo(ingredients , 0);
for(var i = 0; i < quantity.Length; ++i)
{
// Make your new row
// index the arrays and fill the row data in
}
There is probably a more elegant solution to the whole problem if you spend some time thinking through how you model it.
I'm loading a csv file into a DataGridView through WindowsForm control and it is automatically setting all columns to strings so they are sorted alphabetically.
The columns are
Product description (String)
Item Code (String)
Current Count (Integer)
On Order (String)
When Current Count column is sorted, it go in this order 1,2,22,3,35,36,4,40 and so on.
Also these columns and rows are not hard coded, I am reading the file and loading it into a DataGridView control.
How can I make so that table's row are sorted numerically relatively to Current Count column's values.
This is the code used to input the csv file into the DataGridView
private void Form1_Load(object sender, EventArgs e)
{
this.Size = new Size(800, 650);
dataGridView.Size = new Size(440, 550);
dataGridView.Location = new Point(15, 15);
dataGridView.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.AllCells;
string[] dataFile = System.IO.File.ReadAllLines(path);
//File.Copy("C:\\StockFile\\stocklist.csv", "C:\\StockFile\\stocklistNew.csv", true);
string[] dataCollection = null;
int x = 0;
foreach (string text_line in dataFile)
{
dataCollection = text_line.Split(',');
if(x ==0)
{
for(int i = 0; i <= dataCollection.Count() -1; i++)
{
dataTable.Columns.Add(dataCollection[i]) ;
}
x++;
}
else
{
dataTable.Rows.Add(dataCollection);
}
}
dataGridView.DataSource = dataTable;
this.Controls.Add(dataGridView);
dataGridView.Columns["Item Code"].ReadOnly = true;
dataGridView.Columns["Product Description"].ReadOnly = true;
dataGridView.Columns["On Order"].ReadOnly = true;
}
You must set the column type when creating it.
if (dataCollection[i] == "Count") // or "Current Count"
dataTable.Columns.Add(dataCollection[i], typeof(int));
else
dataTable.Columns.Add(dataCollection[i]); // typeof(string) by default
Without explicitly specifying it, the type will be string.
I have this code that copy selected rows from one grid to another
private void btnAddEmployee_Click(object sender, EventArgs e)
{
LayoutControl lc = new LayoutControl();
lc.Dock = DockStyle.Top;
LookUpEdit userShift = new LookUpEdit();
userShift.Properties.TextEditStyle = TextEditStyles.DisableTextEditor;
userShift.Properties.DataSource = paint.GetShiftTime();
userShift.Properties.DisplayMember = "ShiftTime";
userShift.Properties.ValueMember = "id";
userShift.Properties.ShowHeader = false;
var date = DateTime.Now;
if (8 < date.Hour && date.Hour < 16)
{
userShift.EditValue = 1;
}
else if (16 < date.Hour && date.Hour < 24)
{
userShift.EditValue = 2;
}
else
{
userShift.EditValue = 3;
}
lc.AddItem(Resources.workingHours, userShift).TextVisible = true;
lc.Height = 50;
this.Controls.Add(lc);
this.Dock = DockStyle.Top;
int[] selectedRows = gridView4.GetSelectedRows();
for(int n=0;n< selectedRows.Length;n++)
//foreach (int index in selectedRows)
{
if (DevExpress.XtraEditors.XtraDialog.Show(lc, Resources.options, MessageBoxButtons.OKCancel) == DialogResult.OK)
{
//Prevent duplicate data
for (int i = 0; i < gridView5.RowCount; i++)
{
if (gridView4.GetRowCellValue(gridView4.FocusedRowHandle, "Matricule").ToString() == gridView5.GetRowCellValue(i, "Matricule").ToString())
{
XtraMessageBox.Show(Resources.employeeAlreadyAdded, Resources.error, MessageBoxButtons.OK, MessageBoxIcon.Warning);
return;
}
}
DataRow r = EmplDT.NewRow();
r[0] = gridView4.GetRowCellValue(gridView4.FocusedRowHandle, "Matricule").ToString();
r[1] = gridView4.GetRowCellValue(gridView4.FocusedRowHandle, "Employé").ToString();
r[2] = userShift.Text;
r[3] = userShift.EditValue;
r[4] = txtDate.EditValue;
EmplDT.Rows.Add(r);
this is my code to create Columns in gridview 5
DataTable EmplDT = new DataTable();
void CreateEmployeeTable()
{
EmplDT.Columns.Add("Matricule");
EmplDT.Columns.Add("Employé");
EmplDT.Columns.Add("Heure");
EmplDT.Columns.Add("idShiftTime", typeof(Int32));
EmplDT.Columns.Add("Date", typeof(DateTime));
gridControl5.DataSource = EmplDT;
gridView5.Columns["idShiftTime"].Visible = false;
gridView5.Columns["Date"].Visible = false;
}
i have two problems in this code:
the first one is when i run the code it only add the first record and then i get error message of duplicate .
the second one i want to show layout control only the first time.
thanks in advance and sorry for my english.
Looping thru selected rows from a devexpress gridview and getting a row can be much simpler like this
int[] selectedRows = gridView4.GetSelectedRows();
for (int i = 0; i < selectedRows.Length; i++)
{
// Get a DataRow and fill it with all values from the this selected row
// This is where you went wrong, you kept using only the first selected row
DataRow rowGridView4 = (gridView4.GetRow(selectedRows[i]) as DataRowView).Row;
// Do a check for doubles here
DataRow[] doubles = EmplDT.Select("Matricule = '" + rowGridView4[0].ToString() +"'");
if (doubles.Length > 0)
{
XtraMessageBox.Show(Resources.employeeAlreadyAdded, Resources.error, MessageBoxButtons.OK, MessageBoxIcon.Warning);
return;
}
// fix for error "This row already belongs to another table"
DataRox row = EmplDT.NewRow();
row[0] = rowGridView4[0];
row[1] = rowGridView4[1];
row[2] = userShift.Text;
row[3] = userShift.EditValue;
row[4] = txtDate.EditValue;
EmplDT.Rows.Add(row);
}
Please note that with testing for doubles at this place will cause all records to be copied until a duplicate is found. So after your error message there might be some records copied and some not.
Is that how you intended this ?
I would leave out the error message and just skip duplicate records. You can still show a message with howmany records where copied if you want.
int[] selectedRows = gridView4.GetSelectedRows();
for (int i = 0; i < selectedRows.Length; i++)
{
// Get a DataRow and fill it with all values from the this selected row
// This is where you went wrong, you kept using only the first selected row
DataRow rowGridView4 = (gridView4.GetRow(selectedRows[i]) as DataRowView).Row;
// Do a check for doubles here
DataRow[] doubles = EmplDT.Select("Matricule = '" + rowGridView4[0].ToString() + "'");
if (doubles.Length == 0)
{
// fix for error "This row already belongs to another table"
DataRox row = EmplDT.NewRow();
row[0] = rowGridView4[0];
row[1] = rowGridView4[1];
row[2] = userShift.Text;
row[3] = userShift.EditValue;
row[4] = txtDate.EditValue;
EmplDT.Rows.Add(row);
}
}
I have a datagrid DGV. That gridview has a column of "File Name" and is populated by the name of the files you've selected in an openfildialog. After performing the calculations I was putting the results in a second datagrid DGV2 which you'll see I've commented out below as I'd like to instead put them on a second column next to their corresponding "File Name" on DGV and just use one gridview. However this is just taking the last calculation and duplicating it on each row rather than the individual calculations (as they should all be diff)
So it should look like:
File1 4.5
File2 3.5
Instead its just doing
File1 3.5
File2 3.5
I know I'm causing it, I've done something wrong here I'm just not sure how to fix it.
private void btnCalculate_Click(object sender, EventArgs e)
{
foreach (DataGridViewRow row in DGV_Hidden.Rows)
{
FileInfo info = new FileInfo();
{
var lines = File.ReadAllLines(row.Cells["colfilelocation"].Value.ToString());
var data = lines.Where(line => (!line.Contains(Data_Start_Point_Identifier) && !line.Contains(FSD__Line_Identifier) && !line.EndsWith("0.00"))).ToList();
if (data.Count > 1)
{
var line = data[0];
var firstsplit = data[1].Split(splitter);
info.startvalue = Convert.ToDouble(firstsplit[0]);
var secondsplit = data[data.Count - 1].Split(splitter);
info.endvalue = Convert.ToDouble(secondsplit[0]);
}
info.finalnum = info.startvalue - info.endvalue;
}
//DGV2.Rows.Add(info.finalnum);
for (int i = 0; i < DGV.Rows.Count; i++)
{
DGV.Rows[i].Cells["colfiledata"].Value = info.finalnum;
}
}
}
ok,
As you said you have similar data /number of rows, You just need to set the value while looping through your hidden grid. use the row index of the looping variable to get the correct row.
private void btnCalculate_Click(object sender, EventArgs e)
{
foreach (DataGridViewRow row in DGV_Hidden.Rows)
{
FileInfo info = new FileInfo();
{
var lines = File.ReadAllLines(row.Cells["colfilelocation"].Value.ToString());
var data = lines.Where(line => (!line.Contains(Data_Start_Point_Identifier) && !line.Contains(FSD__Line_Identifier) && !line.EndsWith("0.00"))).ToList();
if (data.Count > 1)
{
var line = data[0];
var firstsplit = data[1].Split(splitter);
info.startvalue = Convert.ToDouble(firstsplit[0]);
var secondsplit = data[data.Count - 1].Split(splitter);
info.endvalue = Convert.ToDouble(secondsplit[0]);
}
info.finalnum = info.startvalue - info.endvalue;
}
//set your value here
DGV.Rows[row.Index].Cells["colfiledata"].Value = info.finalnum;
}
}
I am using EPPlus.
The excel I am uploading has column headers in row number 2 . And from row 4 onward it has the data which may vary up to 2k records.
The way I am doing it , it takes a lot of time for reading 2k records and putting to a list .
using (var excel = new ExcelPackage(hpf.InputStream))
{
var ws = excel.Workbook.Worksheets["Sheet1"];
//Read the file into memory
for (int rw = 4; rw <= ws.Dimension.End.Row; rw++)
{
if (!ws.Cells[rw, 1, rw, 24].All(c => c.Value == null))
{
int headerRow = 2;
GroupMembershipUploadInput gm = new GroupMembershipUploadInput();
for (int col = ws.Dimension.Start.Column; col <= ws.Dimension.End.Column; col++)
{
var s = ws.Cells[rw, col].Value;
if (ws.Cells[headerRow, col].Value.ToString().Equals("Existing Constituent Master Id"))
{
gm.cnst_mstr_id = (ws.Cells[rw, col].Value ?? (Object)"").ToString();
}
else if (ws.Cells[headerRow, col].Value.ToString().Equals("Prefix of the constituent(Mr, Mrs etc)"))
{
gm.cnst_prefix_nm = (ws.Cells[rw, col].Value ?? (Object)"").ToString();
}
}
lgl.GroupMembershipUploadInputList.Add(gm);
}
}
GroupMembershipUploadInputList is the list of objects of type GroupMembershipUploadInput that I am adding the excel values to after reading from the cell wise.
Can it be done faster ? What am I missing here ?
Please help to improve the performance.
You are making a lot iterations there, for each row, you visit each column twice. I assume that you only need those two values per row and if so the following code would reduce time drastically:
using (var excel = new ExcelPackage(hpf.InputStream))
{
var ws = excel.Workbook.Worksheets["Sheet1"];
int headerRow = 2;
// hold the colum index based on the value in the header
int col_cnst_mstr_id = 2;
int col_cnst_prefix_nm = 4;
// loop once over the columns to fetch the column index
for (int col = ws.Dimension.Start.Column; col <= ws.Dimension.End.Column; col++)
{
if ("Existing Constituent Master Id".Equals(ws.Cells[headerRow, col].Value))
{
col_cnst_mstr_id = col;
}
if ("Prefix of the constituent(Mr, Mrs etc)".Equals(ws.Cells[headerRow, col].Value))
{
col_cnst_prefix_nm = col;
}
}
//Read the file into memory
// loop over all rows
for (int rw = 4; rw <= ws.Dimension.End.Row; rw++)
{
// check if both values are not null
if (ws.Cells[rw, col_cnst_mstr_id].Value != null &&
ws.Cells[rw, col_cnst_prefix_nm].Value != null)
{
// the correct cell will be selcted based on the column index
var gm = new GroupMembershipUploadInput
{
cnst_mstr_id = (string) ws.Cells[rw, col_cnst_mstr_id].Value ?? String.Empty,
cnst_prefix_nm = (string) ws.Cells[rw, col_cnst_prefix_nm].Value ?? String.Empty
};
lgl.GroupMembershipUploadInputList.Add(gm);
}
}
}
I removed the inner column loop and moved it to the start of the method. There it is used to just get the columnindex for each field you're interested in. The expensive null check can now also be reduced. To fetch the value, all that is now needed is a simple index lookup in the row.