I've been playing around with the HtmlAgilityPack for a while, but I've run into a problem regarding the creation of a new HtmlDocument. I have a simple program that gets the data of films on a particular list. Some of the information is retrieved on the list page itself, and the rest is retrieved on the linking page for each item.
The problem i'm having is that for every time I wish to retrieve information from the linked page, i'm creating a new HtmlDocument. When I try to retrieve the complete list of films, the program just hangs on the console window. Here is my code:
namespace ConsoleApplication5
{
public class Scraper
{
private string _baseUrl = #"http://www.imdb.com";
private string _startingUrl = #"http://www.imdb.com/chart/top";
private HtmlWeb _webGet = new HtmlWeb();
public string StartingUrl
{
get { return _startingUrl; }
}
public string BaseUrl
{
get { return _baseUrl; }
}
public HtmlWeb WebGet
{
get { return _webGet; }
}
public List<Film> GetFilmData()
{
var allFilmData = new List<Film>();
var doc = WebGet.Load(StartingUrl);
var allFilmsInTable = doc.DocumentNode.SelectNodes("//div[#id='main']/table/tr");
foreach (var line in allFilmsInTable)
{
if (line.PreviousSibling != null)
{
var film = new Film();
film.Title = line.SelectSingleNode(".//td/font/a").InnerHtml;
film.Url = BaseUrl + line.SelectSingleNode(".//td/font/a").Attributes["href"].Value;
film.Rating = Convert.ToDecimal(line.SelectSingleNode(".//td[#align='center']/font").InnerText);
film.RankInTop250 = Convert.ToInt32(line.SelectSingleNode(".//td[#align='right']/font/b").InnerText.Replace(".",string.Empty));
allFilmData.Add(SingleFilmInformation(film));
}
}
return allFilmData;
}
public Film SingleFilmInformation(Film film)
{
var singleDoc = WebGet.Load(film.Url);
film.ReleaseYear = Convert.ToInt32(singleDoc.DocumentNode.SelectSingleNode("//h1[#class='header']/span/a").InnerText);
film.Director = singleDoc.DocumentNode.SelectSingleNode("//div[#itemprop='director']/a/span").InnerText;
foreach (var genre in singleDoc.DocumentNode.SelectNodes("//div[#class='infobar']/a/span[#itemprop='genre']"))
{
film.Genres.Add(genre.InnerText);
}
return film;
}
}
Any help would be greatly appreciated.
Related
While using VisualStudio 2019 with my own source generator. I'm having an exception inside visual studio and it prompts me to open a debugger.
This only happens while coding and not while building (which make me thinks that my source generators is fine).
When trying to debug I see a blank page and call stack does not have any informations.
I know it must be linked to my generator but I don't know how. Any tips on how to debug would be greatly appreciated.
Full source code is available here : https://github.com/kYann/StrongTypeId/tree/master/src/StrongType.Generators
[Generator]
public class StrongTypeIdGenerator : ISourceGenerator
{
Template template;
public void Initialize(GeneratorInitializationContext context)
{
var file = "StrongTypeId.sbntxt";
template = Template.Parse(EmbeddedResource.GetContent(file), file);
context.RegisterForSyntaxNotifications(() => new StrongTypeIdReceiver());
}
private string GenerateStrongTypeId(string #namespace, string className)
{
var model = new
{
Namespace = #namespace,
ClassName = className,
};
// apply the template
var output = template.Render(model, member => member.Name);
return output;
}
public bool IsStrongTypeId(INamedTypeSymbol recordSymbol)
{
var strongTypeIdType = typeof(StrongTypeId<>);
var originalBaseTypeDef = recordSymbol.BaseType.OriginalDefinition;
var baseTypeAssembly = originalBaseTypeDef.ContainingAssembly;
var isSameAssembly = baseTypeAssembly.ToDisplayString() == strongTypeIdType.Assembly.FullName;
var isSameTypeName = strongTypeIdType.Name == originalBaseTypeDef.MetadataName;
return isSameAssembly && isSameTypeName;
}
public void Execute(GeneratorExecutionContext context)
{
if (context.SyntaxReceiver is not StrongTypeIdReceiver receiver)
return;
foreach (var rds in receiver.RecordDeclarations)
{
var model = context.Compilation.GetSemanticModel(rds.SyntaxTree);
if (model.GetDeclaredSymbol(rds) is not INamedTypeSymbol recordSymbol)
continue;
if (!IsStrongTypeId(recordSymbol))
continue;
var ns = recordSymbol.ContainingNamespace.ToDisplayString();
var output = GenerateStrongTypeId(ns, recordSymbol.Name);
// add the file
context.AddSource($"{recordSymbol.Name}.generated.cs", SourceText.From(output, Encoding.UTF8));
}
}
private class StrongTypeIdReceiver : ISyntaxReceiver
{
public StrongTypeIdReceiver()
{
RecordDeclarations = new();
}
public List<RecordDeclarationSyntax> RecordDeclarations { get; private set; }
public void OnVisitSyntaxNode(SyntaxNode syntaxNode)
{
if (syntaxNode is RecordDeclarationSyntax rds &&
rds.BaseList is not null)
{
this.RecordDeclarations.Add(rds);
}
}
}
}
RoslynCodeAnalysisService has an aggresive caching strategy and was holding previous source generator in memory.
Restarting visual studio did the tricks
Rookie here needing help. I'm trying to build a prototype with the neo4j .NET driver using Bolt. My aim with the prototype is building multiple methods for creation and searches in the db, but only one method to connect to the db - here I'm continuously having problems. I've Googled all weekend for examples, tutorials and traversed through the documentation and now I need your help.
Programs.cs
using System;
using DTUneo4jConsoleApp.Db;
namespace DTUneo4jConsoleApp
{
public class Program
{
public static void Main(string[] args)
{
MyProperties something = new MyProperties();
neo4jdb session = new neo4jdb();
session.Run($"CREATE (a:Person {{name:'{something.Name}', title:'{something.Title}'}})");
var result = session.Run($"MATCH (a:Person) WHERE a.name = '{something.Name}' RETURN a.name AS name, a.title AS title");
foreach (var record in result)
{
Console.WriteLine($"{record["title"].As<string>()} {record["name"].As<string>()}");
}
Console.ReadKey();
}
}
public class MyProperties
{
public string Name { get; set; }
public string Title { get; set; }
}
}
db.cs
using Neo4j.Driver.V1;
namespace DTUneo4jConsoleApp.Db
{
public class neo4jdb
{
public static void Connection()
{
using (var driver = GraphDatabase.Driver("bolt://localhost", AuthTokens.Basic("user", "pass")))
using (var session = driver.Session())
{
}
}
}
}
When I instantiate the neo4jdb session = new neo4jdb(); I don't get i.e. the Run() method from the driver.
I hope someone can guide me in the right direction.
I am doing it like this:
public static List<IStatementResult> ExecuteCypher(List<Statement> statements)
{
List<IStatementResult> results = new List<IStatementResult>();
using (var driver = GraphDatabase.Driver("bolt://localhost", AuthTokens.Basic("user", "pass")))
{
using (var session = driver.Session())
{
using (var tx = session.BeginTransaction())
{
foreach (var statement in statements)
{
results.Add(tx.Run(statement));
}
tx.Success();
}
}
}
return results;
}
usage:
MyProperties something = new MyProperties();
var createCypher = new Statement($"CREATE (a:Person {{name:'{something.Name}', title:'{something.Title}'}})");
var matchCypher = new Statement($"MATCH (a:Person) WHERE a.name = '{something.Name}' RETURN a.name AS name, a.title AS title");
var statements = new List<Statement>();
statements.Add(createCypher);
statements.Add(matchCypher);
var results = ExecuteCypher(statements);
//you can now query result for each statement or
//your query your desired result
foreach (var record in results.Last())
{
Console.WriteLine($"{record["title"].As<string>()} {record["name"].As<string>()}");
}
In this way I can also create multiple records in a single transaction and get the result of all those as well.
I have this code for example that create a List
public static void CreateTextList(string filePath)
{
List<string> text;
text = new List<string>();
var htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.OptionFixNestedTags = true;
htmlDoc.Load(filePath, System.Text.Encoding.GetEncoding(65001));
if (htmlDoc.DocumentNode != null)
{
var nodes = htmlDoc.DocumentNode.SelectNodes("//a/b");
foreach (var node in nodes)
{
text.Add(node.InnerText);
}
}
TextList = Filters.filterNumbers(text);
}
And i have another two methods that also create each one another List
But instead creating 3 Lists i want to create one List of all 3 values.
In top of form1 i added:
static List<string> test = new List<string>();
List<Name> list = new List<Name>();
In the bottom of form1 i added:
public class Name
{
public string First { get; set; }
public string Middle { get; set; }
public string Last { get; set; }
}
Now i want in the method CreateTextList to do something like this:
public static void CreateTextList(string filePath)
{
List<string> text;
text = new List<string>();
var htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.OptionFixNestedTags = true;
htmlDoc.Load(filePath, System.Text.Encoding.GetEncoding(65001));
if (htmlDoc.DocumentNode != null)
{
var nodes = htmlDoc.DocumentNode.SelectNodes("//a/b");
foreach (var node in nodes)
{
text.Add(node.InnerText);
test.Add(new Name { First = "Brian"});
}
}
TextList = Filters.filterNumbers(text);
}
What i added is:
test.Add(new Name { First = "Brian"});
But i'm getting error:
Error 3 The best overloaded method match for 'System.Collections.Generic.List.Add(string)' has some invalid arguments
Error 4 Argument 1: cannot convert from 'ScrollLabelTest.ListsExtractions.Name' to 'string'
First how to fix the errors and second how can i make it realy by the logic i want to build the test List ?
So it will take from each method the value and in the end i will have a List of First Middle Last, First Middle Last, First Middle Last...
Error 3&4:You are Trying to Add an object(Name) to List of String .Its not possible.Second Change
test.Add(new Name { First = "Brian"});
to
list.Add( new Name(){First = "First",Last = "LastName",Middle = "MiddleName"});
And then you can use Foreach at the end to loop through list,like this
foreach (var value in list)
{
//var firstname = value.First;
}
While exploring Roslyn I put together a small app that should include a trace statement as the first statement in every method found in a Visual Studio Solution. My code is buggy and is only updating the first method.
The line that is not working as expected is flagged with a “TODO” comment. Please, advise.
I also welcome style recommendations that would create a more streamlined/readable solution.
Thanks in advance.
...
private void TraceBtn_Click(object sender, RoutedEventArgs e) {
var myWorkSpace = new MyWorkspace("...Visual Studio 2012\Projects\Tests.sln");
myWorkSpace.InjectTrace();
myWorkSpace.ApplyChanges();
}
...
using System;
using System.Linq;
using Roslyn.Compilers;
using Roslyn.Compilers.CSharp;
using Roslyn.Services;
namespace InjectTrace
{
public class MyWorkspace
{
private string solutionFile;
public string SolutionFile {
get { return solutionFile; }
set {
if (string.IsNullOrEmpty(value)) throw new Exception("Invalid Solution File");
solutionFile = value;
}
}
private IWorkspace loadedWorkSpace;
public IWorkspace LoadedWorkSpace { get { return loadedWorkSpace; } }
public ISolution CurrentSolution { get; private set; }
public IProject CurrentProject { get; private set; }
public IDocument CurrentDocument { get; private set; }
public ISolution NewSolution { get; private set; }
public MyWorkspace(string solutionFile) {
this.SolutionFile = solutionFile;
this.loadedWorkSpace = Workspace.LoadSolution(SolutionFile);
}
public void InjectTrace()
{
int projectCtr = 0;
int documentsCtr = 0;
int transformedMembers = 0;
int transformedClasses = 0;
this.CurrentSolution = this.LoadedWorkSpace.CurrentSolution;
this.NewSolution = this.CurrentSolution;
//For Each Project...
foreach (var projectId in LoadedWorkSpace.CurrentSolution.ProjectIds)
{
CurrentProject = NewSolution.GetProject(projectId);
//..for each Document in the Project..
foreach (var docId in CurrentProject.DocumentIds)
{
CurrentDocument = NewSolution.GetDocument(docId);
var docRoot = CurrentDocument.GetSyntaxRoot();
var newDocRoot = docRoot;
var classes = docRoot.DescendantNodes().OfType<ClassDeclarationSyntax>();
IDocument newDocument = null;
//..for each Class in the Document..
foreach (var #class in classes) {
var methods = #class.Members.OfType<MethodDeclarationSyntax>();
//..for each Member in the Class..
foreach (var currMethod in methods) {
//..insert a Trace Statement
var newMethod = InsertTrace(currMethod);
transformedMembers++;
//TODO: PROBLEM IS HERE
newDocRoot = newDocRoot.ReplaceNode(currMethod, newMethod);
}
if (transformedMembers != 0) {
newDocument = CurrentDocument.UpdateSyntaxRoot(newDocRoot);
transformedMembers = 0;
transformedClasses++;
}
}
if (transformedClasses != 0) {
NewSolution = NewSolution.UpdateDocument(newDocument);
transformedClasses = 0;
}
documentsCtr++;
}
projectCtr++;
if (projectCtr > 2) return;
}
}
public MethodDeclarationSyntax InsertTrace(MethodDeclarationSyntax currMethod) {
var traceText =
#"System.Diagnostics.Trace.WriteLine(""Tracing: '" + currMethod.Ancestors().OfType<NamespaceDeclarationSyntax>().Single().Name + "." + currMethod.Identifier.ValueText + "'\");";
var traceStatement = Syntax.ParseStatement(traceText);
var bodyStatementsWithTrace = currMethod.Body.Statements.Insert(0, traceStatement);
var newBody = currMethod.Body.Update(Syntax.Token(SyntaxKind.OpenBraceToken), bodyStatementsWithTrace,
Syntax.Token(SyntaxKind.CloseBraceToken));
var newMethod = currMethod.ReplaceNode(currMethod.Body, newBody);
return newMethod;
}
public void ApplyChanges() {
LoadedWorkSpace.ApplyChanges(CurrentSolution, NewSolution);
}
}
}
The root problem of you code is that newDocRoot = newDocRoot.ReplaceNode(currMethod, newMethod); somehow rebuilds newDocRoot internal representation of code so next currMethod elements won't be find in it and next ReplaceNode calls will do nothing. It is a situation similar to modifying a collection within its foreach loop.
The solution is to gather all necessary changes and apply them at once with ReplaceNodes method. And this in fact naturally leads to simplification of code, because we do not need to trace all those counters. We simply store all needed transformation and apply them for whole document at once.
Working code after changes:
public void InjectTrace()
{
this.CurrentSolution = this.LoadedWorkSpace.CurrentSolution;
this.NewSolution = this.CurrentSolution;
//For Each Project...
foreach (var projectId in LoadedWorkSpace.CurrentSolution.ProjectIds)
{
CurrentProject = NewSolution.GetProject(projectId);
//..for each Document in the Project..
foreach (var docId in CurrentProject.DocumentIds)
{
var dict = new Dictionary<CommonSyntaxNode, CommonSyntaxNode>();
CurrentDocument = NewSolution.GetDocument(docId);
var docRoot = CurrentDocument.GetSyntaxRoot();
var classes = docRoot.DescendantNodes().OfType<ClassDeclarationSyntax>();
//..for each Class in the Document..
foreach (var #class in classes)
{
var methods = #class.Members.OfType<MethodDeclarationSyntax>();
//..for each Member in the Class..
foreach (var currMethod in methods)
{
//..insert a Trace Statement
dict.Add(currMethod, InsertTrace(currMethod));
}
}
if (dict.Any())
{
var newDocRoot = docRoot.ReplaceNodes(dict.Keys, (n1, n2) => dict[n1]);
var newDocument = CurrentDocument.UpdateSyntaxRoot(newDocRoot);
NewSolution = NewSolution.UpdateDocument(newDocument);
}
}
}
}
public sealed class ImgurUpload
{
public event EventHandler<UploadCompleteEventArgs> UploadComplete;
public void PostToImgur(string location, string key, string name = "", string caption = "")
{
try
{
using (var webClient = new WebClient())
{
NameValueCollection values = new NameValueCollection
{
{ "image", ConvertToBase64(location) },
{ "key", key },
{ "name", name },
{ "caption", caption}
};
webClient.UploadValuesAsync(new Uri("http://api.imgur.com/2/upload.xml"), "POST", values);
webClient.UploadValuesCompleted += ((sender, eventArgs) =>
{
byte[] response = eventArgs.Result;
XDocument result = XDocument.Load(XmlReader.Create(new MemoryStream(response)));
if (UploadComplete != null)
UploadComplete(this, new UploadCompleteEventArgs(result));
});
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private string ConvertToBase64(string imageLocation)
{
byte[] imageData = null;
using (FileStream fileStream = File.OpenRead(imageLocation))
{
imageData = new byte[fileStream.Length];
fileStream.Read(imageData, 0, imageData.Length);
}
return Convert.ToBase64String(imageData);
}
}
public class UploadCompleteEventArgs : EventArgs
{
public string Original { get; private set; }
public string ImgurPage { get; private set; }
public string DeletePage { get; private set; }
public UploadCompleteEventArgs(XDocument xmlDoc)
{
var objLinks = from links in xmlDoc.Descendants("links")
select new
{
original = links.Element("original").Value,
imgurPage = links.Element("imgur_page").Value,
deletePage = links.Element("delete_page").Value
};
Original = objLinks.FirstOrDefault().original;
ImgurPage = objLinks.FirstOrDefault().imgurPage;
DeletePage = objLinks.FirstOrDefault().deletePage;
}
}
Above is a class I wrote to upload an image to imgur using the Anonymous API. I have used the API in the past and have always found it to be considerably slower than the website uploader and slower than other .NET applications that use web requests to effectively send data to the website directly rather than using the API.
I posted the full class above as it may be something I'm doing (or not doing) that's causing the issue. I'd really appreciate it if anybody can identify the issue for me.
I did some fair testing earlier today and one result for example, is as followed:
800kb image via the imgur website = 35 seconds
800kb image via using my class = 1minute 20 seconds
The one you are uploading is ~35% bigger because you're uploading it as a STRING.
Upload via bytes and it should be just as fast.