I'm trying to download multiple files with several threads. The program uses a BFS algorithm to reach all the files given a particular url: http://www.police.am/Hanraqve/ The problem is that the same file can be downloaded multiple times as several threads are released. I'm thinking of a way to synchronize the download process so that each file is downloaded once only with the help of Mutexes or Semaphores. Any idea or actual code would be very much appreciated. Here is my initial code
public static async Task Download()
{
nodes.Enqueue(root);
while (nodes.Count() != 0)
{
String currentNode = "";
if (nodes.TryDequeue(out currentNode))
{
if (!visitedNodes.Contains(currentNode))
{
visitedNodes.Add(currentNode);
if (isFolder(currentNode))
{
List<String> urls = GetUrlsFromHtml(currentNode);
foreach (String url in urls)
{
nodes.Enqueue(url);
}
}
else
{
string fileName = currentNode.Remove(0, currentNode.LastIndexOf('/') + 1);
using (WebClient webClient = new WebClient())
{
await webClient.DownloadFileTaskAsync(new Uri(currentNode), destinationFolderPath + #"\" + fileName);
files.Enqueue(destinationFolderPath + #"\" + fileName);
}
}
}
}
}
//cts.Cancel();
}
public static List<String> GetUrlsFromHtml(string url)
{
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(url);
List<String> urls = new List<String>();
foreach (HtmlNode htmlNode in doc.DocumentNode.SelectNodes("//a[#href]"))
{
string hrefValue = htmlNode.Attributes["href"].Value;
if (hrefValue[0] >= '1' && hrefValue[0] <= '9')
{
urls.Add(url + hrefValue);
}
}
return urls;
}
public static bool isFolder(string url)
{
return url.EndsWith("/");
}
}
}
Check the urls your storing in visited, they may be different but still go to the same page.
http://foo.com?a=bar
http://foo.bar?b=foo
Related
In my code I recover the file, extract the text, manipulate it and write the modified string in the file, I have not had any problems to date, the file I had to manipulate today weighed over 2GB, with over 1 million lines
public static void ModifyFile(string directory, string filename)
{
string input = string.Empty;
using (StreamReader reader = new StreamReader(directory + filename))
{
input = reader.ReadToEnd();
}
string output = Manipulate(input);
File.WriteAllText($"{directory}{filename}", String.Empty);
WriteFile(directory, filename, output);
}
private static void WriteFile(string directory, string filename, string output)
{
using (StreamWriter writer = new StreamWriter(directory + filename, true))
{
{
writer.Write(output);
}
writer.Close();
}
}
private static string Manipulate(string input)
{
var counter = 1;
StringBuilder output = new StringBuilder();
string[] subs = input.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
foreach (var x in subs)
{
if (subs[subs.Length - 1] != x && subs[subs.Length - 2] != x)
{
var column = x.Substring(121, 2);
if (column.Equals("NA"))
{
var c = x.Substring(22, 9);
output.Append(ManipulateStringElement(x, counter, 22)
.Replace("\r\n", "\n").Replace("\r", "\n").Replace("\n", "\r\n"));
output.Append("\n");
counter++;
}
}
else if (subs[subs.Length - 2] == x)
{
output.Append(ManipulateStringElement(x, counter, 22)
.Replace("\r\n", "\n").Replace("\r", "\n").Replace("\n", "\r\n"));
}
}
return output.ToString();
}
private static string ManipulateStringElement(string item, int counter, int start)
{
return item.Replace(item.Substring(start, 9), GenerateProgressive(counter));
}
private static string GenerateProgressive(int counter)
{
return $"{counter}".PadLeft(9, '0');
}
But while running reader.ReadToEnd() I get "OutOfMemoryException" error, which makes me think the file is too big
The application is in .NET Framewrok 4.6.1, the operating system is 64bit (I had read that it could affect)
You need to do this in a streaming fashion in order to reduce memory consumption.
Open an input and an output file at the same time, and immediately output the result of a single line from Manipulate(). Ensure it ends with your custom newline character.
Finally replace the original file with the new one.
public static void ModifyFile(string directory, string filename)
{
string inputFile = Path.Combine(directory, filename);
string outputFile = Path.Combine(directory, filename + ".new");
using (var reader = new StreamReader(inputFile))
using (var reader = new StreamWriter(outputFile, true))
{
string input;
while((input = reader.ReadLine()) != null)
{
string output = Manipulate(input);
writer.Write(output);
}
}
File.Move(outputFile, inputFile, true);
}
You may also want to do this using async code, which could improve responsiveness.
I note that you are also retrieving the last two lines of the file. I suggest you do this separately, using this answer for example.
There are also other performance improvements you can make. For example:
private static string GenerateProgressive(int counter)
{
return counter.ToString("D9");
}
as well as:
private static string ManipulateStringElement(string item, int counter, int start)
{
return GenerateProgressive(counter) + item.Substring(9);
}
I have following code:
using System;
using System.Collections.Generic;
using System.IO;
using VirusTotalNET;
using VirusTotalNET.Objects;
using System.Linq;
using System.Security.Permissions;
namespace VirusTotalNETClient
{
class Program
{
private const string ScanUrl = "http://www.google.com/";
static void Main(string[] args)
{
VirusTotal virusTotal = new VirusTotal("5d8684f50946c2bdeaf5c4fd966f61f3661de808e9d7324b99788d6f4fb7ad57");
//Use HTTPS instead of HTTP
virusTotal.UseTLS = true;
//creating folder for programs reliqies and output log
string folderName = "C:\\OnlineScanner";
System.IO.Directory.CreateDirectory(folderName);
//get list of files to analyse
var paths = Traverse("C:\test");
File.WriteAllLines("C:\\OnlineScanner\\test.txt", paths);
foreach (string line in File.ReadLines("C:\\test.txt"))
{
//Define what file you want to analyse
FileInfo fileInfo = new FileInfo(line);
//Check if the file has been scanned before.
FileReport fileReport = virusTotal.GetFileReport(fileInfo);
bool hasFileBeenScannedBefore = fileReport.ResponseCode == ReportResponseCode.Present;
//If the file has been scanned before, the results are embedded inside the report.
if (hasFileBeenScannedBefore)
{
int detekce = fileReport.Positives;
if (detekce >= 1)
{
using (var writer = new StreamWriter("C:\\OnlineScanner\\OnlineScannerLog.txt"))
{
writer.WriteLine(line);
writer.WriteLine("URL to test: " + fileReport.Permalink);
writer.WriteLine("Detect ratio: " + fileReport.Positives + "/54");
writer.WriteLine("Message: " + fileReport.VerboseMsg);
writer.WriteLine();
writer.WriteLine();
}
}
System.Threading.Thread.Sleep(16000);
}
else
{
ScanResult fileResult = virusTotal.ScanFile(fileInfo);
int detekce = fileReport.Positives;
if (detekce >= 1)
{
using (var writer = new StreamWriter("C:\\OnlineScanner\\OnlineScannerLog.txt"))
{
writer.WriteLine(line);
writer.WriteLine("URL to test: " + fileReport.Permalink);
writer.WriteLine("Detect ratio: " + fileReport.Positives + "/54");
writer.WriteLine("Message: " + fileReport.VerboseMsg);
writer.WriteLine();
writer.WriteLine();
}
}
System.Threading.Thread.Sleep(16000);
}
}
}
private static IEnumerable<string> Traverse(string rootDirectory)
{
IEnumerable<string> files = Enumerable.Empty<string>();
IEnumerable<string> directories = Enumerable.Empty<string>();
try
{
// The test for UnauthorizedAccessException.
var permission = new FileIOPermission(FileIOPermissionAccess.PathDiscovery, rootDirectory);
permission.Demand();
files = Directory.GetFiles(rootDirectory);
directories = Directory.GetDirectories(rootDirectory);
}
catch
{
// Ignore folder (access denied).
rootDirectory = null;
}
foreach (var file in files)
{
yield return file;
}
// Recursive call for SelectMany.
var subdirectoryItems = directories.SelectMany(Traverse);
foreach (var result in subdirectoryItems)
{
yield return result;
}
}
}
}
This code run some time (arround 15secs) but then program crashs.
The error is
System.IO.IOException, process can't access to file C:\hiberfil.sys.
http://upnisito.cz/images/2016_12/319crasherrror.png
Do you have any idea how to solve it?
I have a program that needs to download ~200 XML pages of ~250 lines. It does this by grabbing a list of ID's from a main page, and then it then iterates through each ID, inserting it into a URL and using a StreamWriter to write the XML page to a file. The methods look like this:
private static void XMLUpdate(string path)
{
try
{
//create client for downloading XMLs
var client = new WebClient();
foreach (var node in GetId())
{
//stores xml text into a string
var s = client.DownloadString("https://example" + node + "/xml");
// assign to the output
var file = new StreamWriter(path + "\\" + node + ".xml");
file.WriteLine(s);
file.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private static string[] GetId()
{
var idList = new List<string>();
var request = WebRequest.Create("https://example/xml");
var i = 0;
using (var response = request.GetResponse())
using (var xmlReader = XmlReader.Create(response.GetResponseStream()))
{
while (xmlReader.Read())
{
xmlReader.ReadToFollowing("n" + i);
//go through each of the n nodes in the xmldocument and get the name and id
if (xmlReader.NodeType != XmlNodeType.Element || xmlReader.Name != "n" + i) continue;
xmlReader.ReadToFollowing("id");
if (xmlReader.NodeType == XmlNodeType.Element && xmlReader.Name == "id")
{
var id = xmlReader.ReadElementContentAsString();
idList.Add(id);
}
i += 1;
}
}
var IDs = idList.ToArray();
return IDs;
}
At the moment the program takes a long time to download everything I need. Is there something I can do to speed up the process?
Yes, you can run things in parallel, by using Parallel.ForEach:
Parallel.ForEach
( GetId()
, new ParallelOptions() { MaxDegreeOfParallelism = 32 } // run 32 in parallel
, node =>
{
//stores xml text into a string
var client = new WebClient();
var s = client.DownloadString("https://example" + node + "/xml");
// assign to the output
var file = new StreamWriter(path + "\\" + node + ".xml");
file.WriteLine(s);
file.Close();
}
);
You can tweak the MaxDegreeOfParallelism as you want and your service can handle.
What I'm trying to do is to upload a website using FTP in C# (C Sharp). So I need to upload all files and folders within a folder, keeping its structure. I'm using this FTP class: http://www.codeproject.com/Tips/443588/Simple-Csharp-FTP-Class for the actual uploading.
I have come to the conclusion that I need to write a recursive method that goes through every sub-directory of the main directory and upload all files and folders in it. This should make an exact copy of my folder to the FTP. Problem is... I have no clue how to write a method like that. I have written recursive methods before but I'm new to the FTP part.
This is what I have so far:
private void recursiveDirectory(string directoryPath)
{
string[] filePaths = null;
string[] subDirectories = null;
filePaths = Directory.GetFiles(directoryPath, "*.*");
subDirectories = Directory.GetDirectories(directoryPath);
if (filePaths != null && subDirectories != null)
{
foreach (string directory in subDirectories)
{
ftpClient.createDirectory(directory);
}
foreach (string file in filePaths)
{
ftpClient.upload(Path.GetDirectoryName(directoryPath), file);
}
}
}
But its far from done and I don't know how to continue. I'm sure more than me needs to know this! Thanks in advance :)
Ohh and... It would be nice if it reported its progress too :) (I'm using a progress bar)
EDIT:
It might have been unclear... How do I upload a directory including all sub-directories and files with FTP?
Problem Solved! :)
Alright so I managed to write the method myslef. If anyone need it feel free to copy:
private void recursiveDirectory(string dirPath, string uploadPath)
{
string[] files = Directory.GetFiles(dirPath, "*.*");
string[] subDirs = Directory.GetDirectories(dirPath);
foreach (string file in files)
{
ftpClient.upload(uploadPath + "/" + Path.GetFileName(file), file);
}
foreach (string subDir in subDirs)
{
ftpClient.createDirectory(uploadPath + "/" + Path.GetFileName(subDir));
recursiveDirectory(subDir, uploadPath + "/" + Path.GetFileName(subDir));
}
}
It works very well :)
I wrote an FTP classe and also wrapped it in a WinForms user control. You can see my code in the article An FtpClient Class and WinForm Control.
I wrote a reusable class to upload entire directory to an ftp site on windows server, the program also renames the old version of that folder (i use it to upload my windows service program to the server).
maybe some need this:
class MyFtpClient
{
protected string FtpUser { get; set; }
protected string FtpPass { get; set; }
protected string FtpServerUrl { get; set; }
protected string DirPathToUpload { get; set; }
protected string BaseDirectory { get; set; }
public MyFtpClient(string ftpuser, string ftppass, string ftpserverurl, string dirpathtoupload)
{
this.FtpPass = ftppass;
this.FtpUser = ftpuser;
this.FtpServerUrl = ftpserverurl;
this.DirPathToUpload = dirpathtoupload;
var spllitedpath = dirpathtoupload.Split('\\').ToArray();
// last index must be the "base" directory on the server
this.BaseDirectory = spllitedpath[spllitedpath.Length - 1];
}
public void UploadDirectory()
{
// rename the old folder version (if exist)
RenameDir(BaseDirectory);
// create a parent folder on server
CreateDir(BaseDirectory);
// upload the files in the most external directory of the path
UploadAllFolderFiles(DirPathToUpload, BaseDirectory);
// loop trough all files in subdirectories
foreach (string dirPath in Directory.GetDirectories(DirPathToUpload, "*",
SearchOption.AllDirectories))
{
// create the folder
CreateDir(dirPath.Substring(dirPath.IndexOf(BaseDirectory), dirPath.Length - dirPath.IndexOf(BaseDirectory)));
Console.WriteLine(dirPath.Substring(dirPath.IndexOf(BaseDirectory), dirPath.Length - dirPath.IndexOf(BaseDirectory)));
UploadAllFolderFiles(dirPath, dirPath.Substring(dirPath.IndexOf(BaseDirectory), dirPath.Length - dirPath.IndexOf(BaseDirectory))
}
}
private void UploadAllFolderFiles(string localpath, string remotepath)
{
string[] files = Directory.GetFiles(localpath);
// get only the filenames and concat to remote path
foreach (string file in files)
{
// full remote path
var fullremotepath = remotepath + "\\" + Path.GetFileName(file);
// local path
var fulllocalpath = Path.GetFullPath(file);
// upload to server
Upload(fulllocalpath, fullremotepath);
}
}
public bool CreateDir(string dirname)
{
try
{
WebRequest request = WebRequest.Create("ftp://" + FtpServerUrl + "/" + dirname);
request.Method = WebRequestMethods.Ftp.MakeDirectory;
request.Proxy = new WebProxy();
request.Credentials = new NetworkCredential(FtpUser, FtpPass);
using (var resp = (FtpWebResponse)request.GetResponse())
{
if (resp.StatusCode == FtpStatusCode.PathnameCreated)
{
return true;
}
else
{
return false;
}
}
}
catch
{
return false;
}
}
public void Upload(string filepath, string targetpath)
{
using (WebClient client = new WebClient())
{
client.Credentials = new NetworkCredential(FtpUser, FtpPass);
client.Proxy = null;
var fixedpath = targetpath.Replace(#"\", "/");
client.UploadFile("ftp://" + FtpServerUrl + "/" + fixedpath, WebRequestMethods.Ftp.UploadFile, filepath);
}
}
public bool RenameDir(string dirname)
{
var path = "ftp://" + FtpServerUrl + "/" + dirname;
string serverUri = path;
try
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(serverUri);
request.Method = WebRequestMethods.Ftp.Rename;
request.Proxy = null;
request.Credentials = new NetworkCredential(FtpUser, FtpPass);
// change the name of the old folder the old folder
request.RenameTo = DateTime.Now.ToString("yyyyMMddHHmmss");
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
using (var resp = (FtpWebResponse)request.GetResponse())
{
if (resp.StatusCode == FtpStatusCode.FileActionOK)
{
return true;
}
else
{
return false;
}
}
}
catch
{
return false;
}
}
}
Create an instance of that class:
static void Main(string[] args)
{
MyFtpClientftp = new MyFtpClient(ftpuser, ftppass, ftpServerUrl, #"C:\Users\xxxxxxxxxxx");
ftp.UploadDirectory();
Console.WriteLine("DONE");
Console.ReadLine();
}
Unless you're doing this for fun or self-improvement, use a commercial module. I can recommend one from Chilkat, but I'm sure there are others.
Note: I'm pretty sure this does answer the stated problem, What I'm trying to do is to upload a website using FTP in C# (C Sharp).
I have a listview which I show video results from YouTube. Everything works fine but one thing I noticed is that the way it works seems to be a bit slow and it might be due to my code. Are there any suggestions on how I can make this better? Maybe loading the images directly from the url instead of using a webclient?
I am adding the listview items in a loop from video feeds returned from a query using the YouTube API. The piece of code which I think is slowing it down is this:
Feed<Video> videoFeed = request.Get<Video>(query);
int i = 0;
foreach (Video entry in videoFeed.Entries)
{
string[] info = printVideoEntry(entry).Split(',');
WebClient wc = new WebClient();
wc.DownloadFile(#"http://img.youtube.com/vi/" + info[0].ToString() + "/hqdefault.jpg", info[0].ToString() + ".jpg");
string[] row1 = { "", info[0].ToString(), info[1].ToString() };
ListViewItem item = new ListViewItem(row1, i);
YoutubeList.Items.Add(item);
imageListSmall.Images.Add(Bitmap.FromFile(info[0].ToString() + #".jpg"));
imageListLarge.Images.Add(Bitmap.FromFile(info[0].ToString() + #".jpg"));
}
public static string printVideoEntry(Video video)
{
return video.VideoId + "," + video.Title;
}
As you can see I use a Webclient which downloads the images so then I can use them as image in my listview. It works but what I'm concerned about is speed..any suggestions? maybe a different control all together?
Ok, I hope this is the code you were looking for, I can't test it since I don't have dll that you are using but I think it's OK.
Feed<Video> videoFeed = request.Get<Video>(query);
Thread th = new Thread(new ParameterizedThreadStart( GetImages));
th.Start(videoFeed);
int i = 0;
foreach (Video entry in videoFeed.Entries)
{
string[] info = printVideoEntry(entry).Split(',');
string[] row1 = { "", info[0].ToString(), info[1].ToString() };
ListViewItem item = new ListViewItem(row1, i++);
YoutubeList.Items.Add(item);
}
}
void GetImages(object arg)
{
Feed<Video> videoFeed = Feed<Video> arg;
foreach (Video entry in videoFeed.Entries)
{
string[] info = printVideoEntry(entry).Split(',');
WebClient wc = new WebClient();
wc.DownloadFile(#"http://img.youtube.com/vi/" + info[0].ToString() + "/hqdefault.jpg", info[0].ToString() + ".jpg");
ImageAdd(info[0]+".jpg");
}
}
delegate void imageAdder(string imgName);
void AddImage(string imgName)
{
imageListSmall.Images.Add(Bitmap.FromFile(imgName + #".jpg"));
imageListLarge.Images.Add(Bitmap.FromFile(imgName + #".jpg"));
listView1.Refresh();
}
void ImageAdd(string imgName)
{
this.Invoke(new imageAdder(AddImage), new object[] { imgName });
}
Try it and I will comment it if you have some problems.