I have not used the HtmlAgilityPack often and I'm stuck on the following issue.
I'm checking to see if the browser supports WebP, if yes I then append a new parameter to the src of the image.
I have that working, but I cannot work out how to return the updated HTML, any help will be appreciated.
public static HtmlString AppendWebPString(HtmlString htmlText)
{
bool browserSupportsWebP = BrowserSupportsWebPHelper.WebPSupported();
if (!browserSupportsWebP) return htmlText;
var h = new HtmlDocument();
h.LoadHtml(htmlText.ToString());
const string webP = "&quality=80&format=webp";
if (h.DocumentNode.SelectNodes("//img[#src]") == null) return htmlText;
string imgOuterHtml = string.Empty;
foreach (HtmlNode image in h.DocumentNode.SelectNodes("//img[#src]"))
{
var src = image.Attributes["src"].Value.Split('&');
image.SetAttributeValue("src", src[1] + string.Format(webP));
imgOuterHtml = image.OuterHtml;
}
//How do I return the updated html here
return new HtmlString(h.ParsedText);
}
Ok, I could not find anything that was built into the agility pack to do what I wanted.
I have managed to achieve what I was after using the code below
public static HtmlString AppendWebPString(HtmlString htmlText)
{
bool browserSupportsWebP = BrowserSupportsWebPHelper.WebPSupported();
if (!browserSupportsWebP) return htmlText;
var h = new HtmlDocument();
h.LoadHtml(htmlText.ToString());
const string webP = "&quality=80&format=webp";
if (h.DocumentNode.SelectNodes("//img[#src]") == null) return htmlText;
string modifiedHtml = htmlText.ToString();
List<ReplaceImageValues> images = new List<ReplaceImageValues>();
foreach (HtmlNode image in h.DocumentNode.SelectNodes("//img[#src]"))
{
var src = image.Attributes["src"].Value.Split('&');
string oldSrcValue = image.OuterHtml;
image.SetAttributeValue("src", src[0] + src[1] + string.Format(webP));
string newSrcValue = image.OuterHtml;
images.Add(new ReplaceImageValues(oldSrcValue,newSrcValue));
}
foreach (var newImages in images)
{
modifiedHtml = modifiedHtml.Replace(newImages.OldVal, newImages.NewVal);
}
return new HtmlString(modifiedHtml);
}
I'm having a problem with a basic piece of code:
var objReader = new StreamReader(ofDialog.FileName);
while (objReader.Peek() >= 0)
{
Helpers.returnMessage(objReader.ReadLine());
// trim the url to root
var uri = new Uri(objReader.ReadLine());
var host = uri.Host;
}
I'm loading a .txt file of urls, the trying to trim to root using uri.host but i'm getting the error:
Value cannot be null.Parameter name: UriString
But if i hard code a url in: var uri = new Uri("https://stackoverflow.com/questions/ask");
It works fine, it seems to be when i'm loading from a .txt file.
any help would be appreciated.
Complete code:
private void btnInput_Click(object sender, EventArgs e)
{
// ofDialog settings
ofDialog.Filter = #"TXT Files|*.txt";
ofDialog.Title = #"Select your source backlink file...";
ofDialog.FileName = "URLs.txt";
// is cancel pressed?
if (ofDialog.ShowDialog() == DialogResult.Cancel)
return;
try
{
var objReader = new StreamReader(ofDialog.FileName);
while (objReader.Peek() >= 0)
{
//Helpers.returnMessage(objReader.ReadLine());
// trim the url to root
var x = objReader.ReadLine();
Helpers.returnMessage(x);
// trim the url to root
var uri = new Uri(x);
var host = uri.Host;
//Helpers.returnMessage(host);
// extract urls here
var wc = new WebClient();
var html = wc.DownloadString(objReader.ReadLine());
// 1. Find all matches in file
var m1 = Regex.Matches(html, #"(<a.*?>.*?</a>)",
RegexOptions.Singleline);
// 2. Loop over each match
foreach (Match m in m1)
{
var value = m.Groups[1].Value;
string href;
// 3. Get href attribute
var m2 = Regex.Match(value, #"href=\""(.*?)\""",
RegexOptions.Singleline);
if (m2.Success)
{
href = m2.Groups[1].Value;
}
else
{
continue;
}
// add to the results
if (href.StartsWith("http"))
{
//Helpers.returnMessage(href);
if (!href.Contains(host))
{
// add urls to the listview
var lvi = new ListViewItem(href);
listViewMain.Items.Add(lvi);
}
}
}
}
}
catch (Exception ex)
{
Helpers.returnMessage(ex.Message);
}
}
returnMessage() literally just returns a message box popup.
Try this:
var objReader = new StreamReader(ofDialog.FileName);
while (objReader.Peek() >= 0)
{
string x = objReader.ReadLine();
Helpers.returnMessage(x);
// trim the url to root
var uri = new Uri(x);
var host = uri.Host;
}
I'm using the following url and I need to delete the ?$format=xml from the url. Is there a simple way to achieve this ?
Uri uri = new Uri("https://ldcorp:435/mtp/op/ota/ind/Customer/?$format=xml);
Maybe with simple string methods:
uriString = uri.ToString();
int indexOfQuestionMark = uriString.IndexOf("?");
if(indexOfQuestionMark >= 0)
{
uri = new Uri(uriString.Substring(0, indexOfQuestionMark));
}
or with the Uri class itself and string.Format:
string pathWithoutQuery = String.Format("{0}{1}{2}{3}", uri.Scheme,
uri.Scheme, Uri.SchemeDelimiter, uri.Authority, uri.AbsolutePath);
uri = new Uri(pathWithoutQuery);
how to get host domain from a string URL?
GetDomain has 1 input "URL", 1 Output "Domain"
Example1
INPUT: http://support.domain.com/default.aspx?id=12345
OUTPUT: support.domain.com
Example2
INPUT: http://www.domain.com/default.aspx?id=12345
OUTPUT: www.domain.com
Example3
INPUT: http://localhost/default.aspx?id=12345
OUTPUT: localhost
You can use Request object or Uri object to get host of url.
Using Request.Url
string host = Request.Url.Host;
Using Uri
Uri myUri = new Uri("http://www.contoso.com:8080/");
string host = myUri.Host; // host is "www.contoso.com"
Try like this;
Uri.GetLeftPart( UriPartial.Authority )
Defines the parts of a URI for the Uri.GetLeftPart method.
http://www.contoso.com/index.htm?date=today --> http://www.contoso.com
http://www.contoso.com/index.htm#main --> http://www.contoso.com
nntp://news.contoso.com/123456#contoso.com --> nntp://news.contoso.com
file://server/filename.ext --> file://server
Uri uriAddress = new Uri("http://www.contoso.com/index.htm#search");
Console.WriteLine("The path of this Uri is {0}", uriAddress.GetLeftPart(UriPartial.Authority));
Demo
Use Uri class and use Host property
Uri url = new Uri(#"http://support.domain.com/default.aspx?id=12345");
Console.WriteLine(url.Host);
try following statement
Uri myuri = new Uri(System.Web.HttpContext.Current.Request.Url.AbsoluteUri);
string pathQuery = myuri.PathAndQuery;
string hostName = myuri.ToString().Replace(pathQuery , "");
Example1
Input : http://localhost:4366/Default.aspx?id=notlogin
Ouput : http://localhost:4366
Example2
Input : http://support.domain.com/default.aspx?id=12345
Output: support.domain.com
The best way, and the right way to do it is using Uri.Authority field
Load and use Uri like so :
Uri NewUri;
if (Uri.TryCreate([string with your Url], UriKind.Absolute, out NewUri))
{
Console.Writeline(NewUri.Authority);
}
Input : http://support.domain.com/default.aspx?id=12345
Output : support.domain.com
Input : http://www.domain.com/default.aspx?id=12345
output : www.domain.com
Input : http://localhost/default.aspx?id=12345
Output : localhost
If you want to manipulate Url, using Uri object is the good way to do it.
https://msdn.microsoft.com/en-us/library/system.uri(v=vs.110).aspx
var url = Regex.Match(url, #"(http:|https:)\/\/(.*?)\/");
INPUT = "https://stackoverflow.com/questions/";
OUTPUT = "https://stackoverflow.com/";
Try this
Console.WriteLine(GetDomain.GetDomainFromUrl("http://support.domain.com/default.aspx?id=12345"));
It will output support.domain.com
Or try
Uri.GetLeftPart( UriPartial.Authority )
You should construct your string as URI object and Authority property returns what you need.
public static string DownloadImage(string URL, string MetaIcon,string folder,string name)
{
try
{
WebClient oClient = new WebClient();
string LocalState = Windows.Storage.ApplicationData.Current.LocalFolder.Path;
string storesIcons = Directory.CreateDirectory(LocalState + folder).ToString();
string path = Path.Combine(storesIcons, name + ".png");
//si la imagen no es valida ej "/icon.png"
if (!TextBoxEvent.IsValidURL(MetaIcon))
{
Uri uri = new Uri(URL);
string DownloadImage = "https://" + uri.Host + MetaIcon;
oClient.DownloadFile(new Uri(DownloadImage), path);
}
//si la imagen tiene todo ej https://www.mercadolibre.com/icon.png
else
{
oClient.DownloadFile(new Uri(MetaIcon), path);
}
return path;
}
catch (Exception ex)
{
return ex.ToString();
}
}
Here's a solution that will work for all kinds of URLs.
public string GetDomainFromUrl(string url)
{
url = url.Replace("https://", "").Replace("http://", "").Replace("www.", ""); //Remove the prefix
string[] fragments = url.Split('/');
return fragments[0];
}
it will take only domain name (www.bla.com -> bla)
no Uri required
static string GetDomainNameOnly(string s)
{
string domainOnly = "";
if (!string.IsNullOrEmpty(s))
{
if (s.Contains("."))
{
string domain = s.Substring(s.LastIndexOf('.', s.LastIndexOf('.') - 1) + 1);
string countryDomain = s.Substring(s.LastIndexOf('.'));
domainOnly = domain.Replace(countryDomain, "");
}
else
domainOnly = s;
}
return domainOnly;
}
WWW is an alias, so you don't need it if you want a domain.
Here is my litllte function to get the real domain from a string
private string GetDomain(string url)
{
string[] split = url.Split('.');
if (split.Length > 2)
return split[split.Length - 2] + "." + split[split.Length - 1];
else
return url;
}
How to remove a query string by Key from a Url?
I have the below method which works fine but just wondering is there any better/shorter way? or a built-in .NET method which can do it more efficiently?
public static string RemoveQueryStringByKey(string url, string key)
{
var indexOfQuestionMark = url.IndexOf("?");
if (indexOfQuestionMark == -1)
{
return url;
}
var result = url.Substring(0, indexOfQuestionMark);
var queryStrings = url.Substring(indexOfQuestionMark + 1);
var queryStringParts = queryStrings.Split(new [] {'&'});
var isFirstAdded = false;
for (int index = 0; index <queryStringParts.Length; index++)
{
var keyValue = queryStringParts[index].Split(new char[] { '=' });
if (keyValue[0] == key)
{
continue;
}
if (!isFirstAdded)
{
result += "?";
isFirstAdded = true;
}
else
{
result += "&";
}
result += queryStringParts[index];
}
return result;
}
For example I can call it like:
Console.WriteLine(RemoveQueryStringByKey(#"http://www.domain.com/uk_pa/PostDetail.aspx?hello=hi&xpid=4578", "xpid"));
Hope the question is clear.
Thanks,
This works well:
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url);
// this gets all the query string key value pairs as a collection
var newQueryString = HttpUtility.ParseQueryString(uri.Query);
// this removes the key if exists
newQueryString.Remove(key);
// this gets the page path from root without QueryString
string pagePathWithoutQueryString = uri.GetLeftPart(UriPartial.Path);
return newQueryString.Count > 0
? String.Format("{0}?{1}", pagePathWithoutQueryString, newQueryString)
: pagePathWithoutQueryString;
}
an example:
RemoveQueryStringByKey("https://www.google.co.uk/search?#hl=en&output=search&sclient=psy-ab&q=cookie", "q");
and returns:
https://www.google.co.uk/search?#hl=en&output=search&sclient=psy-ab
var queryString = "hello=hi&xpid=4578";
var qs = System.Web.HttpUtility.ParseQueryString(queryString);
qs.Remove("xpid");
var newQuerystring = qs.ToString();
This still works in .NET 5.
There's a useful class called UriBuilder in the System namespace. We can use it along with a couple of extension methods to do the following:
Uri u = new Uri("http://example.com?key1=value1&key2=value2");
u = u.DropQueryItem("key1");
Or like this:
Uri u = new Uri("http://example.com?key1=value1&key2=value2");
UriBuilder b = new UriBuilder(u);
b.RemoveQueryItem("key1");
u = b.Uri;
The extension methods:
using System;
using System.Collections.Specialized;
using System.Text;
using System.Text.RegularExpressions;
public static class UriExtensions
{
public static Uri DropQueryItem(this Uri u, string key)
{
UriBuilder b = new UriBuilder(u);
b.RemoveQueryItem(key);
return b.Uri;
}
}
public static class UriBuilderExtensions
{
private static string _ParseQueryPattern = #"(?<key>[^&=]+)={0,1}(?<value>[^&]*)";
private static Regex _ParseQueryRegex = null;
private static Regex ParseQueryRegex
{
get
{
if (_ParseQueryRegex == null)
{
_ParseQueryRegex = new Regex(_ParseQueryPattern, RegexOptions.Compiled | RegexOptions.Singleline);
}
return _ParseQueryRegex;
}
}
public static void SetQueryItem(this UriBuilder b, string key, string value)
{
NameValueCollection parms = ParseQueryString(b.Query);
parms[key] = value;
b.Query = RenderQuery(parms);
}
public static void RemoveQueryItem(this UriBuilder b, string key)
{
NameValueCollection parms = ParseQueryString(b.Query);
parms.Remove(key);
b.Query = RenderQuery(parms);
}
private static string RenderQuery(NameValueCollection parms)
{
StringBuilder sb = new StringBuilder();
for (int i=0; i<parms.Count; i++)
{
string key = parms.Keys[i];
sb.Append(key + "=" + parms[key]);
if (i < parms.Count - 1)
{
sb.Append("&");
}
}
return sb.ToString();
}
public static NameValueCollection ParseQueryString(string query, bool caseSensitive = true)
{
NameValueCollection pairs = new NameValueCollection(caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase);
string q = query.Trim().TrimStart(new char[] {'?'});
MatchCollection matches = ParseQueryRegex.Matches(q);
foreach (Match m in matches)
{
string key = m.Groups["key"].Value;
string value = m.Groups["value"].Value;
if (pairs[key] != null)
{
pairs[key] = pairs[key] + "," + value;
}
else
{
pairs[key] = value;
}
}
return pairs;
}
}
I know this is a rather old question, but everything I read felt a bit complicated.
public Uri GetUriWithoutQueryParam( Uri originalUri, string paramKey ) {
NameValueCollection newQuery = HttpUtility.ParseQueryString( originalUri.Query );
newQuery.Remove( paramKey );
return new UriBuilder( originalUri ) { Query = newQuery.ToString() }.Uri;
}
We can also do it using regex
string queryString = "Default.aspx?Agent=10&Language=2"; //Request.QueryString.ToString();
string parameterToRemove="Language"; //parameter which we want to remove
string regex=string.Format("(&{0}=[^&\s]+|(?<=\?){0}=[^&\s]+&?)",parameterToRemove); //this will not work for javascript, for javascript you can do following
string finalQS = Regex.Replace(queryString, regex, "");
//javascript(following is not js syntex, just want to give idea how we can able do it in js)
string regex1 = string.Format("(&{0}=[^&\s]+)",parameterToRemove);
string regex2 = string.Format("(\?{0}=[^&\s]+&?)",parameterToRemove);
string finalQS = Regex.Replace(queryString, regex1, "").Replace(queryString, regex2, "");
https://regexr.com/3i9vj
How about this:
string RemoveQueryStringByKey(string url, string key)
{
string ret = string.Empty;
int index = url.IndexOf(key);
if (index > -1)
{
string post = string.Empty;
// Find end of key's value
int endIndex = url.IndexOf('&', index);
if (endIndex != -1) // Last query string value?
{
post = url.Substring(endIndex, url.Length - endIndex);
}
// Decrement for ? or & character
--index;
ret = url.Substring(0, index) + post;
}
return ret;
}
I found a way without using Regex:
private string RemoveQueryStringByKey(string sURL, string sKey) {
string sOutput = string.Empty;
int iQuestion = sURL.IndexOf('?');
if (iQuestion == -1) return (sURL);
int iKey = sURL.Substring(iQuestion).IndexOf(sKey) + iQuestion;
if (iKey == -1) return (sURL);
int iNextAnd = sURL.Substring(iKey).IndexOf('&') + iKey + 1;
if (iNextAnd == -1) {
sOutput = sURL.Substring(0, iKey - 1);
}
else {
sOutput = sURL.Remove(iKey, iNextAnd - iKey);
}
return (sOutput);
}
I did try this with adding another field at the end, and it works fine for that too.
I'm thinking the shortest way (that I believe produces a valid URL in all cases, assuming the URL was valid to begin with) would be to use this regex (where getRidOf is the variable name you are trying to remove) and the replacement is a zero-length string ""):
(?<=[?&])getRidOf=[^&]*(&|$)
or maybe even
\bgetRidOf=[^&]*(&|$)
while possibly not the absolute prettiest URLs, I think they are all valid:
INPUT OUTPUT
----------- ------------
blah.com/blah.php?getRidOf=d.co&blah=foo blah.com/blah.php?blah=foo
blah.com/blah.php?f=0&getRidOf=d.co&blah=foo blah.com/blah.php?f=0&blah=foo
blah.com/blah.php?hello=true&getRidOf=d.co blah.com/blah.php?hello=true&
blah.com/blah.php?getRidOf=d.co blah.com/blah.php?
and it's a simple regex replace:
Dim RegexObj as Regex = New Regex("(?<=[?&])getRidOf=[^&]*(&|$)")
RegexObj.Replace("source.url.com/find.htm?replace=true&getRidOf=PLEASE!!!", "")
...should result in the string:
"source.url.com/find.htm?replace=true&"
...which seems to be valid for an ASP.Net application, while replace does equal true (not true& or anything like that)
I'll try to adapt it if you have a case where it won't work :)
public static string RemoveQueryStringByKey(string sURL, string sKey)
{
string sOutput = string.Empty;
string sToReplace = string.Empty;
int iFindTheKey = sURL.IndexOf(sKey);
if (iFindTheKey == -1) return (sURL);
int iQuestion = sURL.IndexOf('?');
if (iQuestion == -1) return (sURL);
string sEverythingBehindQ = sURL.Substring(iQuestion);
List<string> everythingBehindQ = new List<string>(sEverythingBehindQ.Split('&'));
foreach (string OneParamPair in everythingBehindQ)
{
int iIsKeyInThisParamPair = OneParamPair.IndexOf(sKey);
if (iIsKeyInThisParamPair != -1)
{
sToReplace = "&" + OneParamPair;
}
}
sOutput = sURL.Replace(sToReplace, "");
return (sOutput);
}
Below code before deleting your QueryString.
PropertyInfo isreadonly =
typeof(System.Collections.Specialized.NameValueCollection).GetProperty(
"IsReadOnly", BindingFlags.Instance | BindingFlags.NonPublic);
// make collection editable
isreadonly.SetValue(this.Request.QueryString, false, null);
// remove
this.Request.QueryString.Remove("yourKey");
Sorry this is a bit dirty but should work in older framework
public String RemoveQueryString( String rawUrl , String keyName)
{
var currentURL_Split = rawUrl.Split('&').ToList();
currentURL_Split = currentURL_Split.Where(o => !o.ToLower().StartsWith(keyName.ToLower()+"=")).ToList();
String New_RemovedKey = String.Join("&", currentURL_Split.ToArray());
New_RemovedKey = New_RemovedKey.Replace("&&", "&");
return New_RemovedKey;
}
Here is my solution:
I'v added some extra input validation.
public static void TryRemoveQueryStringByKey(ref string url, string key)
{
if (string.IsNullOrEmpty(url) ||
string.IsNullOrEmpty(key) ||
Uri.IsWellFormedUriString(url, UriKind.RelativeOrAbsolute) == false)
{
return false;
}
try
{
Uri uri = new Uri(url);
// This gets all the query string key value pairs as a collection
NameValueCollection queryCollection = HttpUtility.ParseQueryString(uri.Query);
string keyValue = queryCollection.Get(key);
if (url.IndexOf("&" + key + "=" + keyValue, StringComparison.OrdinalIgnoreCase) >= 0)
{
url = url.Replace("&" + key + "=" + keyValue, String.Empty);
return true;
}
else if (url.IndexOf("?" + key + "=" + keyValue, StringComparison.OrdinalIgnoreCase) >= 0)
{
url = url.Replace("?" + key + "=" + keyValue, String.Empty);
return true;
}
else
{
return false;
}
}
catch
{
return false;
}
}
Some unit testing examples:
string url1 = "http://www.gmail.com?a=1&cookie=cookieValue"
Assert.IsTrue(TryRemoveQueryStringByKey(ref url1,"cookie")); //OUTPUT: "http://www.gmail.com?a=1"
string url2 = "http://www.gmail.com?cookie=cookieValue"
Assert.IsTrue(TryRemoveQueryStringByKey(ref url2,"cookie")); //OUTPUT: "http://www.gmail.com"
string url3 = "http://www.gmail.com?cookie="
Assert.IsTrue(TryRemoveQueryStringByKey(ref url2,"cookie")); //OUTPUT: "http://www.gmail.com"
Here's a full solution that works with >= 0 params specified, and any form of URL:
/// <summary>
/// Given a URL in any format, return URL with specified query string param removed if it exists
/// </summary>
public static string StripQueryStringParam(string url, string paramToRemove)
{
return StripQueryStringParams(url, new List<string> {paramToRemove});
}
/// <summary>
/// Given a URL in any format, return URL with specified query string params removed if it exists
/// </summary>
public static string StripQueryStringParams(string url, List<string> paramsToRemove)
{
if (paramsToRemove == null || !paramsToRemove.Any()) return url;
var splitUrl = url.Split('?');
if (splitUrl.Length == 1) return url;
var urlFirstPart = splitUrl[0];
var urlSecondPart = splitUrl[1];
// Even though in most cases # isn't available to context,
// we may be passing it in explicitly for helper urls
var secondPartSplit = urlSecondPart.Split('#');
var querystring = secondPartSplit[0];
var hashUrlPart = string.Empty;
if (secondPartSplit.Length > 1)
{
hashUrlPart = "#" + secondPartSplit[1];
}
var nvc = HttpUtility.ParseQueryString(querystring);
if (!nvc.HasKeys()) return url;
// Remove any matches
foreach (var key in nvc.AllKeys)
{
if (paramsToRemove.Contains(key))
{
nvc.Remove(key);
}
}
if (!nvc.HasKeys()) return urlFirstPart;
return urlFirstPart +
"?" + string.Join("&", nvc.AllKeys.Select(c => c.ToString() + "=" + nvc[c.ToString()])) +
hashUrlPart;
}
A more modern answer for this old question in case someone else stumbles across it like I did.
This is using the Uri class to parse the URL (can be skipped if your URL is already in a Uri object) and LINQ to filter the query string.
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url, UriKind.Absolute);
var queryParts = uri.Query
.TrimStart('?')
.Split('&')
.Where(item => string.CompareOrdinal(item, key) != 0);
return uri.Scheme + Uri.SchemeDelimiter
+ uri.Authority
+ uri.AbsolutePath
+ "?" + string.Join("&", queryParts);
}
With reusing the signature from the accepted answer, but preserving the fragment and using QueryHelpers from Microsoft.AspNetCore.WebUtilities.
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url);
var newQueryString = QueryHelpers.ParseQuery(uri.Query);
if (newQueryString.Remove(key))
{
var urlWithNewQuery = QueryHelpers.AddQueryString(
uri.GetLeftPart(UriPartial.Path),
newQueryString.ToDictionary(
queryParam => queryParam.Key,
queryParam => queryParam.Value.ToString()))
return $"{urlWithNewQuery}{uri.Fragment}";
}
return url;
}
string url = HttpContext.Current.Request.Url.AbsoluteUri;
string[] separateURL = url.Split('?');
NameValueCollection queryString = System.Web.HttpUtility.ParseQueryString(separateURL[1]);
queryString.Remove("param_toremove");
string revisedurl = separateURL[0] + "?" + queryString.ToString();