Related
I'm working on creating an open source project for creating .NET UML Sequence Diagrams that leverages a javascript library called js-sequence-diagrams. I am not sure Roslyn is the right tool for the job, but I thought I would give it a shot so I have put together some proof of concept code which attempts to get all methods and their invocations and then outputs these invocations in a form that can be interpreted by js-sequence-diagrams.
The code generates some output, but it does not capture everything. I cannot seem to capture invocations via extension methods, invocations of static methods in static classes.
I do see invocations of methods with out parameters, but not in any form that extends the BaseMethodDeclarationSyntax
Here is the code (keep in mind this is proof of concept code and so I did not entirely follow best-practices, but I am not requesting a code review here ... also, I am used to using Tasks so I am messing around with await, but am not entirely sure I am using it properly yet)
https://gist.github.com/SoundLogic/11193841
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection.Emit;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Formatting;
using Microsoft.CodeAnalysis.MSBuild;
using Microsoft.CodeAnalysis.FindSymbols;
using System.Collections.Immutable;
namespace Diagrams
{
class Program
{
static void Main(string[] args)
{
string solutionName = "Diagrams";
string solutionExtension = ".sln";
string solutionFileName = solutionName + solutionExtension;
string rootPath = #"C:\Workspace\";
string solutionPath = rootPath + solutionName + #"\" + solutionFileName;
MSBuildWorkspace workspace = MSBuildWorkspace.Create();
DiagramGenerator diagramGenerator = new DiagramGenerator( solutionPath, workspace );
diagramGenerator.ProcessSolution();
#region reference
//TODO: would ReferencedSymbol.Locations be a better way of accessing MethodDeclarationSyntaxes?
//INamedTypeSymbol programClass = compilation.GetTypeByMetadataName("DotNetDiagrams.Program");
//IMethodSymbol barMethod = programClass.GetMembers("Bar").First(s => s.Kind == SymbolKind.Method) as IMethodSymbol;
//IMethodSymbol fooMethod = programClass.GetMembers("Foo").First(s => s.Kind == SymbolKind.Method) as IMethodSymbol;
//ITypeSymbol fooSymbol = fooMethod.ContainingType;
//ITypeSymbol barSymbol = barMethod.ContainingType;
//Debug.Assert(barMethod != null);
//Debug.Assert(fooMethod != null);
//List<ReferencedSymbol> barReferencedSymbols = SymbolFinder.FindReferencesAsync(barMethod, solution).Result.ToList();
//List<ReferencedSymbol> fooReferencedSymbols = SymbolFinder.FindReferencesAsync(fooMethod, solution).Result.ToList();
//Debug.Assert(barReferencedSymbols.First().Locations.Count() == 1);
//Debug.Assert(fooReferencedSymbols.First().Locations.Count() == 0);
#endregion
Console.ReadKey();
}
}
class DiagramGenerator
{
private Solution _solution;
public DiagramGenerator( string solutionPath, MSBuildWorkspace workspace )
{
_solution = workspace.OpenSolutionAsync(solutionPath).Result;
}
public async void ProcessSolution()
{
foreach (Project project in _solution.Projects)
{
Compilation compilation = await project.GetCompilationAsync();
ProcessCompilation(compilation);
}
}
private async void ProcessCompilation(Compilation compilation)
{
var trees = compilation.SyntaxTrees;
foreach (var tree in trees)
{
var root = await tree.GetRootAsync();
var classes = root.DescendantNodes().OfType<ClassDeclarationSyntax>();
foreach (var #class in classes)
{
ProcessClass( #class, compilation, tree, root );
}
}
}
private void ProcessClass(
ClassDeclarationSyntax #class
, Compilation compilation
, SyntaxTree tree
, SyntaxNode root)
{
var methods = #class.DescendantNodes().OfType<MethodDeclarationSyntax>();
foreach (var method in methods)
{
var model = compilation.GetSemanticModel(tree);
// Get MethodSymbol corresponding to method
var methodSymbol = model.GetDeclaredSymbol(method);
// Get all InvocationExpressionSyntax in the above code.
var allInvocations = root.DescendantNodes().OfType<InvocationExpressionSyntax>();
// Use GetSymbolInfo() to find invocations of target method
var matchingInvocations =
allInvocations.Where(i => model.GetSymbolInfo(i).Symbol.Equals(methodSymbol));
ProcessMethod( matchingInvocations, method, #class);
}
var delegates = #class.DescendantNodes().OfType<DelegateDeclarationSyntax>();
foreach (var #delegate in delegates)
{
var model = compilation.GetSemanticModel(tree);
// Get MethodSymbol corresponding to method
var methodSymbol = model.GetDeclaredSymbol(#delegate);
// Get all InvocationExpressionSyntax in the above code.
var allInvocations = tree.GetRoot().DescendantNodes().OfType<InvocationExpressionSyntax>();
// Use GetSymbolInfo() to find invocations of target method
var matchingInvocations =
allInvocations.Where(i => model.GetSymbolInfo(i).Symbol.Equals(methodSymbol));
ProcessDelegates(matchingInvocations, #delegate, #class);
}
}
private void ProcessMethod(
IEnumerable<InvocationExpressionSyntax> matchingInvocations
, MethodDeclarationSyntax methodDeclarationSyntax
, ClassDeclarationSyntax classDeclarationSyntax )
{
foreach (var invocation in matchingInvocations)
{
MethodDeclarationSyntax actingMethodDeclarationSyntax = null;
if (SyntaxNodeHelper.TryGetParentSyntax(invocation, out actingMethodDeclarationSyntax))
{
var r = methodDeclarationSyntax;
var m = actingMethodDeclarationSyntax;
PrintCallerInfo(
invocation
, classDeclarationSyntax
, m.Identifier.ToFullString()
, r.ReturnType.ToFullString()
, r.Identifier.ToFullString()
, r.ParameterList.ToFullString()
, r.TypeParameterList != null ? r.TypeParameterList.ToFullString() : String.Empty
);
}
}
}
private void ProcessDelegates(
IEnumerable<InvocationExpressionSyntax> matchingInvocations
, DelegateDeclarationSyntax delegateDeclarationSyntax
, ClassDeclarationSyntax classDeclarationSyntax )
{
foreach (var invocation in matchingInvocations)
{
DelegateDeclarationSyntax actingMethodDeclarationSyntax = null;
if (SyntaxNodeHelper.TryGetParentSyntax(invocation, out actingMethodDeclarationSyntax))
{
var r = delegateDeclarationSyntax;
var m = actingMethodDeclarationSyntax;
PrintCallerInfo(
invocation
, classDeclarationSyntax
, m.Identifier.ToFullString()
, r.ReturnType.ToFullString()
, r.Identifier.ToFullString()
, r.ParameterList.ToFullString()
, r.TypeParameterList != null ? r.TypeParameterList.ToFullString() : String.Empty
);
}
}
}
private void PrintCallerInfo(
InvocationExpressionSyntax invocation
, ClassDeclarationSyntax classBeingCalled
, string callingMethodName
, string returnType
, string calledMethodName
, string calledMethodArguments
, string calledMethodTypeParameters = null )
{
ClassDeclarationSyntax parentClassDeclarationSyntax = null;
if (!SyntaxNodeHelper.TryGetParentSyntax(invocation, out parentClassDeclarationSyntax))
{
throw new Exception();
}
calledMethodTypeParameters = calledMethodTypeParameters ?? String.Empty;
var actedUpon = classBeingCalled.Identifier.ValueText;
var actor = parentClassDeclarationSyntax.Identifier.ValueText;
var callInfo = callingMethodName + "=>" + calledMethodName + calledMethodTypeParameters + calledMethodArguments;
var returnCallInfo = returnType;
string info = BuildCallInfo(
actor
, actedUpon
, callInfo
, returnCallInfo);
Console.Write(info);
}
private string BuildCallInfo(string actor, string actedUpon, string callInfo, string returnInfo)
{
const string calls = "->";
const string returns = "-->";
const string descriptionSeparator = ": ";
string callingInfo = actor + calls + actedUpon + descriptionSeparator + callInfo;
string returningInfo = actedUpon + returns + actor + descriptionSeparator + "returns " + returnInfo;
callingInfo = callingInfo.RemoveNewLines(true);
returningInfo = returningInfo.RemoveNewLines(true);
string result = callingInfo + Environment.NewLine;
result += returningInfo + Environment.NewLine;
return result;
}
}
static class SyntaxNodeHelper
{
public static bool TryGetParentSyntax<T>(SyntaxNode syntaxNode, out T result)
where T : SyntaxNode
{
// set defaults
result = null;
if (syntaxNode == null)
{
return false;
}
try
{
syntaxNode = syntaxNode.Parent;
if (syntaxNode == null)
{
return false;
}
if (syntaxNode.GetType() == typeof (T))
{
result = syntaxNode as T;
return true;
}
return TryGetParentSyntax<T>(syntaxNode, out result);
}
catch
{
return false;
}
}
}
public static class StringEx
{
public static string RemoveNewLines(this string stringWithNewLines, bool cleanWhitespace = false)
{
string stringWithoutNewLines = null;
List<char> splitElementList = Environment.NewLine.ToCharArray().ToList();
if (cleanWhitespace)
{
splitElementList.AddRange(" ".ToCharArray().ToList());
}
char[] splitElements = splitElementList.ToArray();
var stringElements = stringWithNewLines.Split(splitElements, StringSplitOptions.RemoveEmptyEntries);
if (stringElements.Any())
{
stringWithoutNewLines = stringElements.Aggregate(stringWithoutNewLines, (current, element) => current + (current == null ? element : " " + element));
}
return stringWithoutNewLines ?? stringWithNewLines;
}
}
}
Any guidance here would be much appreciated!
Using the methodSymbol in the ProcessClass method I took Andy's suggestion and came up with the below (although I imagine there may be an easier way to go about this):
private async Task<List<MethodDeclarationSyntax>> GetMethodSymbolReferences( IMethodSymbol methodSymbol )
{
var references = new List<MethodDeclarationSyntax>();
var referencingSymbols = await SymbolFinder.FindCallersAsync(methodSymbol, _solution);
var referencingSymbolsList = referencingSymbols as IList<SymbolCallerInfo> ?? referencingSymbols.ToList();
if (!referencingSymbolsList.Any(s => s.Locations.Any()))
{
return references;
}
foreach (var referenceSymbol in referencingSymbolsList)
{
foreach (var location in referenceSymbol.Locations)
{
var position = location.SourceSpan.Start;
var root = await location.SourceTree.GetRootAsync();
var nodes = root.FindToken(position).Parent.AncestorsAndSelf().OfType<MethodDeclarationSyntax>();
references.AddRange(nodes);
}
}
return references;
}
and the resulting image generated by plugging the output text into js-sequence-diagrams (I have updated the github gist with the full source for this should anyone find it useful - I excluded method parameters so the diagram was easy digest, but these can optionally be turned back on):
Edit:
I've updated the code (see the github gist) so now calls are shown in the order they were made (based on the span start location of a called method from within the calling method via results from FindCallersAsync):
How to remove a query string by Key from a Url?
I have the below method which works fine but just wondering is there any better/shorter way? or a built-in .NET method which can do it more efficiently?
public static string RemoveQueryStringByKey(string url, string key)
{
var indexOfQuestionMark = url.IndexOf("?");
if (indexOfQuestionMark == -1)
{
return url;
}
var result = url.Substring(0, indexOfQuestionMark);
var queryStrings = url.Substring(indexOfQuestionMark + 1);
var queryStringParts = queryStrings.Split(new [] {'&'});
var isFirstAdded = false;
for (int index = 0; index <queryStringParts.Length; index++)
{
var keyValue = queryStringParts[index].Split(new char[] { '=' });
if (keyValue[0] == key)
{
continue;
}
if (!isFirstAdded)
{
result += "?";
isFirstAdded = true;
}
else
{
result += "&";
}
result += queryStringParts[index];
}
return result;
}
For example I can call it like:
Console.WriteLine(RemoveQueryStringByKey(#"http://www.domain.com/uk_pa/PostDetail.aspx?hello=hi&xpid=4578", "xpid"));
Hope the question is clear.
Thanks,
This works well:
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url);
// this gets all the query string key value pairs as a collection
var newQueryString = HttpUtility.ParseQueryString(uri.Query);
// this removes the key if exists
newQueryString.Remove(key);
// this gets the page path from root without QueryString
string pagePathWithoutQueryString = uri.GetLeftPart(UriPartial.Path);
return newQueryString.Count > 0
? String.Format("{0}?{1}", pagePathWithoutQueryString, newQueryString)
: pagePathWithoutQueryString;
}
an example:
RemoveQueryStringByKey("https://www.google.co.uk/search?#hl=en&output=search&sclient=psy-ab&q=cookie", "q");
and returns:
https://www.google.co.uk/search?#hl=en&output=search&sclient=psy-ab
var queryString = "hello=hi&xpid=4578";
var qs = System.Web.HttpUtility.ParseQueryString(queryString);
qs.Remove("xpid");
var newQuerystring = qs.ToString();
This still works in .NET 5.
There's a useful class called UriBuilder in the System namespace. We can use it along with a couple of extension methods to do the following:
Uri u = new Uri("http://example.com?key1=value1&key2=value2");
u = u.DropQueryItem("key1");
Or like this:
Uri u = new Uri("http://example.com?key1=value1&key2=value2");
UriBuilder b = new UriBuilder(u);
b.RemoveQueryItem("key1");
u = b.Uri;
The extension methods:
using System;
using System.Collections.Specialized;
using System.Text;
using System.Text.RegularExpressions;
public static class UriExtensions
{
public static Uri DropQueryItem(this Uri u, string key)
{
UriBuilder b = new UriBuilder(u);
b.RemoveQueryItem(key);
return b.Uri;
}
}
public static class UriBuilderExtensions
{
private static string _ParseQueryPattern = #"(?<key>[^&=]+)={0,1}(?<value>[^&]*)";
private static Regex _ParseQueryRegex = null;
private static Regex ParseQueryRegex
{
get
{
if (_ParseQueryRegex == null)
{
_ParseQueryRegex = new Regex(_ParseQueryPattern, RegexOptions.Compiled | RegexOptions.Singleline);
}
return _ParseQueryRegex;
}
}
public static void SetQueryItem(this UriBuilder b, string key, string value)
{
NameValueCollection parms = ParseQueryString(b.Query);
parms[key] = value;
b.Query = RenderQuery(parms);
}
public static void RemoveQueryItem(this UriBuilder b, string key)
{
NameValueCollection parms = ParseQueryString(b.Query);
parms.Remove(key);
b.Query = RenderQuery(parms);
}
private static string RenderQuery(NameValueCollection parms)
{
StringBuilder sb = new StringBuilder();
for (int i=0; i<parms.Count; i++)
{
string key = parms.Keys[i];
sb.Append(key + "=" + parms[key]);
if (i < parms.Count - 1)
{
sb.Append("&");
}
}
return sb.ToString();
}
public static NameValueCollection ParseQueryString(string query, bool caseSensitive = true)
{
NameValueCollection pairs = new NameValueCollection(caseSensitive ? StringComparer.Ordinal : StringComparer.OrdinalIgnoreCase);
string q = query.Trim().TrimStart(new char[] {'?'});
MatchCollection matches = ParseQueryRegex.Matches(q);
foreach (Match m in matches)
{
string key = m.Groups["key"].Value;
string value = m.Groups["value"].Value;
if (pairs[key] != null)
{
pairs[key] = pairs[key] + "," + value;
}
else
{
pairs[key] = value;
}
}
return pairs;
}
}
I know this is a rather old question, but everything I read felt a bit complicated.
public Uri GetUriWithoutQueryParam( Uri originalUri, string paramKey ) {
NameValueCollection newQuery = HttpUtility.ParseQueryString( originalUri.Query );
newQuery.Remove( paramKey );
return new UriBuilder( originalUri ) { Query = newQuery.ToString() }.Uri;
}
We can also do it using regex
string queryString = "Default.aspx?Agent=10&Language=2"; //Request.QueryString.ToString();
string parameterToRemove="Language"; //parameter which we want to remove
string regex=string.Format("(&{0}=[^&\s]+|(?<=\?){0}=[^&\s]+&?)",parameterToRemove); //this will not work for javascript, for javascript you can do following
string finalQS = Regex.Replace(queryString, regex, "");
//javascript(following is not js syntex, just want to give idea how we can able do it in js)
string regex1 = string.Format("(&{0}=[^&\s]+)",parameterToRemove);
string regex2 = string.Format("(\?{0}=[^&\s]+&?)",parameterToRemove);
string finalQS = Regex.Replace(queryString, regex1, "").Replace(queryString, regex2, "");
https://regexr.com/3i9vj
How about this:
string RemoveQueryStringByKey(string url, string key)
{
string ret = string.Empty;
int index = url.IndexOf(key);
if (index > -1)
{
string post = string.Empty;
// Find end of key's value
int endIndex = url.IndexOf('&', index);
if (endIndex != -1) // Last query string value?
{
post = url.Substring(endIndex, url.Length - endIndex);
}
// Decrement for ? or & character
--index;
ret = url.Substring(0, index) + post;
}
return ret;
}
I found a way without using Regex:
private string RemoveQueryStringByKey(string sURL, string sKey) {
string sOutput = string.Empty;
int iQuestion = sURL.IndexOf('?');
if (iQuestion == -1) return (sURL);
int iKey = sURL.Substring(iQuestion).IndexOf(sKey) + iQuestion;
if (iKey == -1) return (sURL);
int iNextAnd = sURL.Substring(iKey).IndexOf('&') + iKey + 1;
if (iNextAnd == -1) {
sOutput = sURL.Substring(0, iKey - 1);
}
else {
sOutput = sURL.Remove(iKey, iNextAnd - iKey);
}
return (sOutput);
}
I did try this with adding another field at the end, and it works fine for that too.
I'm thinking the shortest way (that I believe produces a valid URL in all cases, assuming the URL was valid to begin with) would be to use this regex (where getRidOf is the variable name you are trying to remove) and the replacement is a zero-length string ""):
(?<=[?&])getRidOf=[^&]*(&|$)
or maybe even
\bgetRidOf=[^&]*(&|$)
while possibly not the absolute prettiest URLs, I think they are all valid:
INPUT OUTPUT
----------- ------------
blah.com/blah.php?getRidOf=d.co&blah=foo blah.com/blah.php?blah=foo
blah.com/blah.php?f=0&getRidOf=d.co&blah=foo blah.com/blah.php?f=0&blah=foo
blah.com/blah.php?hello=true&getRidOf=d.co blah.com/blah.php?hello=true&
blah.com/blah.php?getRidOf=d.co blah.com/blah.php?
and it's a simple regex replace:
Dim RegexObj as Regex = New Regex("(?<=[?&])getRidOf=[^&]*(&|$)")
RegexObj.Replace("source.url.com/find.htm?replace=true&getRidOf=PLEASE!!!", "")
...should result in the string:
"source.url.com/find.htm?replace=true&"
...which seems to be valid for an ASP.Net application, while replace does equal true (not true& or anything like that)
I'll try to adapt it if you have a case where it won't work :)
public static string RemoveQueryStringByKey(string sURL, string sKey)
{
string sOutput = string.Empty;
string sToReplace = string.Empty;
int iFindTheKey = sURL.IndexOf(sKey);
if (iFindTheKey == -1) return (sURL);
int iQuestion = sURL.IndexOf('?');
if (iQuestion == -1) return (sURL);
string sEverythingBehindQ = sURL.Substring(iQuestion);
List<string> everythingBehindQ = new List<string>(sEverythingBehindQ.Split('&'));
foreach (string OneParamPair in everythingBehindQ)
{
int iIsKeyInThisParamPair = OneParamPair.IndexOf(sKey);
if (iIsKeyInThisParamPair != -1)
{
sToReplace = "&" + OneParamPair;
}
}
sOutput = sURL.Replace(sToReplace, "");
return (sOutput);
}
Below code before deleting your QueryString.
PropertyInfo isreadonly =
typeof(System.Collections.Specialized.NameValueCollection).GetProperty(
"IsReadOnly", BindingFlags.Instance | BindingFlags.NonPublic);
// make collection editable
isreadonly.SetValue(this.Request.QueryString, false, null);
// remove
this.Request.QueryString.Remove("yourKey");
Sorry this is a bit dirty but should work in older framework
public String RemoveQueryString( String rawUrl , String keyName)
{
var currentURL_Split = rawUrl.Split('&').ToList();
currentURL_Split = currentURL_Split.Where(o => !o.ToLower().StartsWith(keyName.ToLower()+"=")).ToList();
String New_RemovedKey = String.Join("&", currentURL_Split.ToArray());
New_RemovedKey = New_RemovedKey.Replace("&&", "&");
return New_RemovedKey;
}
Here is my solution:
I'v added some extra input validation.
public static void TryRemoveQueryStringByKey(ref string url, string key)
{
if (string.IsNullOrEmpty(url) ||
string.IsNullOrEmpty(key) ||
Uri.IsWellFormedUriString(url, UriKind.RelativeOrAbsolute) == false)
{
return false;
}
try
{
Uri uri = new Uri(url);
// This gets all the query string key value pairs as a collection
NameValueCollection queryCollection = HttpUtility.ParseQueryString(uri.Query);
string keyValue = queryCollection.Get(key);
if (url.IndexOf("&" + key + "=" + keyValue, StringComparison.OrdinalIgnoreCase) >= 0)
{
url = url.Replace("&" + key + "=" + keyValue, String.Empty);
return true;
}
else if (url.IndexOf("?" + key + "=" + keyValue, StringComparison.OrdinalIgnoreCase) >= 0)
{
url = url.Replace("?" + key + "=" + keyValue, String.Empty);
return true;
}
else
{
return false;
}
}
catch
{
return false;
}
}
Some unit testing examples:
string url1 = "http://www.gmail.com?a=1&cookie=cookieValue"
Assert.IsTrue(TryRemoveQueryStringByKey(ref url1,"cookie")); //OUTPUT: "http://www.gmail.com?a=1"
string url2 = "http://www.gmail.com?cookie=cookieValue"
Assert.IsTrue(TryRemoveQueryStringByKey(ref url2,"cookie")); //OUTPUT: "http://www.gmail.com"
string url3 = "http://www.gmail.com?cookie="
Assert.IsTrue(TryRemoveQueryStringByKey(ref url2,"cookie")); //OUTPUT: "http://www.gmail.com"
Here's a full solution that works with >= 0 params specified, and any form of URL:
/// <summary>
/// Given a URL in any format, return URL with specified query string param removed if it exists
/// </summary>
public static string StripQueryStringParam(string url, string paramToRemove)
{
return StripQueryStringParams(url, new List<string> {paramToRemove});
}
/// <summary>
/// Given a URL in any format, return URL with specified query string params removed if it exists
/// </summary>
public static string StripQueryStringParams(string url, List<string> paramsToRemove)
{
if (paramsToRemove == null || !paramsToRemove.Any()) return url;
var splitUrl = url.Split('?');
if (splitUrl.Length == 1) return url;
var urlFirstPart = splitUrl[0];
var urlSecondPart = splitUrl[1];
// Even though in most cases # isn't available to context,
// we may be passing it in explicitly for helper urls
var secondPartSplit = urlSecondPart.Split('#');
var querystring = secondPartSplit[0];
var hashUrlPart = string.Empty;
if (secondPartSplit.Length > 1)
{
hashUrlPart = "#" + secondPartSplit[1];
}
var nvc = HttpUtility.ParseQueryString(querystring);
if (!nvc.HasKeys()) return url;
// Remove any matches
foreach (var key in nvc.AllKeys)
{
if (paramsToRemove.Contains(key))
{
nvc.Remove(key);
}
}
if (!nvc.HasKeys()) return urlFirstPart;
return urlFirstPart +
"?" + string.Join("&", nvc.AllKeys.Select(c => c.ToString() + "=" + nvc[c.ToString()])) +
hashUrlPart;
}
A more modern answer for this old question in case someone else stumbles across it like I did.
This is using the Uri class to parse the URL (can be skipped if your URL is already in a Uri object) and LINQ to filter the query string.
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url, UriKind.Absolute);
var queryParts = uri.Query
.TrimStart('?')
.Split('&')
.Where(item => string.CompareOrdinal(item, key) != 0);
return uri.Scheme + Uri.SchemeDelimiter
+ uri.Authority
+ uri.AbsolutePath
+ "?" + string.Join("&", queryParts);
}
With reusing the signature from the accepted answer, but preserving the fragment and using QueryHelpers from Microsoft.AspNetCore.WebUtilities.
public static string RemoveQueryStringByKey(string url, string key)
{
var uri = new Uri(url);
var newQueryString = QueryHelpers.ParseQuery(uri.Query);
if (newQueryString.Remove(key))
{
var urlWithNewQuery = QueryHelpers.AddQueryString(
uri.GetLeftPart(UriPartial.Path),
newQueryString.ToDictionary(
queryParam => queryParam.Key,
queryParam => queryParam.Value.ToString()))
return $"{urlWithNewQuery}{uri.Fragment}";
}
return url;
}
string url = HttpContext.Current.Request.Url.AbsoluteUri;
string[] separateURL = url.Split('?');
NameValueCollection queryString = System.Web.HttpUtility.ParseQueryString(separateURL[1]);
queryString.Remove("param_toremove");
string revisedurl = separateURL[0] + "?" + queryString.ToString();
How to get the first directory name in a relative path, given that they can be different accepted directory separators?
For example:
foo\bar\abc.txt -> foo
bar/foo/foobar -> bar
Works with both forward and back slash
static string GetRootFolder(string path)
{
while (true)
{
string temp = Path.GetDirectoryName(path);
if (String.IsNullOrEmpty(temp))
break;
path = temp;
}
return path;
}
Seems like you could just use the string.Split() method on the string, then grab the first element.
example (untested):
string str = "foo\bar\abc.txt";
string str2 = "bar/foo/foobar";
string[] items = str.split(new char[] {'/', '\'}, StringSplitOptions.RemoveEmptyEntries);
Console.WriteLine(items[0]); // prints "foo"
items = str2.split(new char[] {'/', '\'}, StringSplitOptions.RemoveEmptyEntries);
Console.WriteLine(items[0]); // prints "bar"
The most robust solution would be to use DirectoryInfo and FileInfo. On a Windows NT-based system it should accept either forward or backslashes for separators.
using System;
using System.IO;
internal class Program
{
private static void Main(string[] args)
{
Console.WriteLine(GetTopRelativeFolderName(#"foo\bar\abc.txt")); // prints 'foo'
Console.WriteLine(GetTopRelativeFolderName("bar/foo/foobar")); // prints 'bar'
Console.WriteLine(GetTopRelativeFolderName("C:/full/rooted/path")); // ** throws
}
private static string GetTopRelativeFolderName(string relativePath)
{
if (Path.IsPathRooted(relativePath))
{
throw new ArgumentException("Path is not relative.", "relativePath");
}
FileInfo fileInfo = new FileInfo(relativePath);
DirectoryInfo workingDirectoryInfo = new DirectoryInfo(".");
string topRelativeFolderName = string.Empty;
DirectoryInfo current = fileInfo.Directory;
bool found = false;
while (!found)
{
if (current.FullName == workingDirectoryInfo.FullName)
{
found = true;
}
else
{
topRelativeFolderName = current.Name;
current = current.Parent;
}
}
return topRelativeFolderName;
}
}
Based on the answer provided by Hasan Khan ...
private static string GetRootFolder(string path)
{
var root = Path.GetPathRoot(path);
while (true)
{
var temp = Path.GetDirectoryName(path);
if (temp != null && temp.Equals(root))
break;
path = temp;
}
return path;
}
This will give the the top level folder
Based on the question you ask, the following should work:
public string GetTopLevelDir(string filePath)
{
string temp = Path.GetDirectoryName(filePath);
if(temp.Contains("\\"))
{
temp = temp.Substring(0, temp.IndexOf("\\"));
}
else if (temp.Contains("//"))
{
temp = temp.Substring(0, temp.IndexOf("\\"));
}
return temp;
}
When passed foo\bar\abc.txt it will foo as wanted- same for the / case
Here is another example in case your path if following format:
string path = "c:\foo\bar\abc.txt"; // or c:/foo/bar/abc.txt
string root = Path.GetPathRoot(path); // root == c:\
This should work fine
string str = "foo\bar\abc.txt";
string str2 = "bar/foo/foobar";
str.Replace("/", "\\").Split('\\').First(); // foo
str2.Replace("/", "\\").Split('\\').First(); // bar
Here my example, with no memory footprint (without creating new strings in memory):
var slashIndex = relativePath.IndexOf('/');
var backslashIndex = relativePath.IndexOf('\\');
var firstSlashIndex = (slashIndex > 0) ? (slashIndex < backslashIndex ? slashIndex : (backslashIndex == -1) ? slashIndex : backslashIndex) : backslashIndex;
var rootDirectory = relativePath.Substring(0, firstSlashIndex);
Path.Combine is handy, but is there a similar function in the .NET framework for URLs?
I'm looking for syntax like this:
Url.Combine("http://MyUrl.com/", "/Images/Image.jpg")
which would return:
"http://MyUrl.com/Images/Image.jpg"
Uri has a constructor that should do this for you: new Uri(Uri baseUri, string relativeUri)
Here's an example:
Uri baseUri = new Uri("http://www.contoso.com");
Uri myUri = new Uri(baseUri, "catalog/shownew.htm");
Note from editor: Beware, this method does not work as expected. It can cut part of baseUri in some cases. See comments and other answers.
This may be a suitably simple solution:
public static string Combine(string uri1, string uri2)
{
uri1 = uri1.TrimEnd('/');
uri2 = uri2.TrimStart('/');
return string.Format("{0}/{1}", uri1, uri2);
}
There's already some great answers here. Based on mdsharpe suggestion, here's an extension method that can easily be used when you want to deal with Uri instances:
using System;
using System.Linq;
public static class UriExtensions
{
public static Uri Append(this Uri uri, params string[] paths)
{
return new Uri(paths.Aggregate(uri.AbsoluteUri, (current, path) => string.Format("{0}/{1}", current.TrimEnd('/'), path.TrimStart('/'))));
}
}
And usage example:
var url = new Uri("http://example.com/subpath/").Append("/part1/", "part2").AbsoluteUri;
This will produce http://example.com/subpath/part1/part2
If you want to work with strings instead of Uris then the following will also produce the same result, simply adapt it to suit your needs:
public string JoinUriSegments(string uri, params string[] segments)
{
if (string.IsNullOrWhiteSpace(uri))
return null;
if (segments == null || segments.Length == 0)
return uri;
return segments.Aggregate(uri, (current, segment) => $"{current.TrimEnd('/')}/{segment.TrimStart('/')}");
}
var uri = JoinUriSegements("http://example.com/subpath/", "/part1/", "part2");
You use Uri.TryCreate( ... ) :
Uri result = null;
if (Uri.TryCreate(new Uri("http://msdn.microsoft.com/en-us/library/"), "/en-us/library/system.uri.trycreate.aspx", out result))
{
Console.WriteLine(result);
}
Will return:
http://msdn.microsoft.com/en-us/library/system.uri.trycreate.aspx
There is a Todd Menier's comment above that Flurl includes a Url.Combine.
More details:
Url.Combine is basically a Path.Combine for URLs, ensuring one
and only one separator character between parts:
var url = Url.Combine(
"http://MyUrl.com/",
"/too/", "/many/", "/slashes/",
"too", "few?",
"x=1", "y=2"
// result: "http://www.MyUrl.com/too/many/slashes/too/few?x=1&y=2"
Get Flurl.Http on NuGet:
PM> Install-Package Flurl.Http
Or get the stand-alone URL builder without the HTTP features:
PM> Install-Package Flurl
Ryan Cook's answer is close to what I'm after and may be more appropriate for other developers. However, it adds http:// to the beginning of the string and in general it does a bit more formatting than I'm after.
Also, for my use cases, resolving relative paths is not important.
mdsharp's answer also contains the seed of a good idea, although that actual implementation needed a few more details to be complete. This is an attempt to flesh it out (and I'm using this in production):
C#
public string UrlCombine(string url1, string url2)
{
if (url1.Length == 0) {
return url2;
}
if (url2.Length == 0) {
return url1;
}
url1 = url1.TrimEnd('/', '\\');
url2 = url2.TrimStart('/', '\\');
return string.Format("{0}/{1}", url1, url2);
}
VB.NET
Public Function UrlCombine(ByVal url1 As String, ByVal url2 As String) As String
If url1.Length = 0 Then
Return url2
End If
If url2.Length = 0 Then
Return url1
End If
url1 = url1.TrimEnd("/"c, "\"c)
url2 = url2.TrimStart("/"c, "\"c)
Return String.Format("{0}/{1}", url1, url2)
End Function
This code passes the following test, which happens to be in VB:
<TestMethod()> Public Sub UrlCombineTest()
Dim target As StringHelpers = New StringHelpers()
Assert.IsTrue(target.UrlCombine("test1", "test2") = "test1/test2")
Assert.IsTrue(target.UrlCombine("test1/", "test2") = "test1/test2")
Assert.IsTrue(target.UrlCombine("test1", "/test2") = "test1/test2")
Assert.IsTrue(target.UrlCombine("test1/", "/test2") = "test1/test2")
Assert.IsTrue(target.UrlCombine("/test1/", "/test2/") = "/test1/test2/")
Assert.IsTrue(target.UrlCombine("", "/test2/") = "/test2/")
Assert.IsTrue(target.UrlCombine("/test1/", "") = "/test1/")
End Sub
Path.Combine does not work for me because there can be characters like "|" in QueryString arguments and therefore the URL, which will result in an ArgumentException.
I first tried the new Uri(Uri baseUri, string relativeUri) approach, which failed for me because of URIs like http://www.mediawiki.org/wiki/Special:SpecialPages:
new Uri(new Uri("http://www.mediawiki.org/wiki/"), "Special:SpecialPages")
will result in Special:SpecialPages, because of the colon after Special that denotes a scheme.
So I finally had to take mdsharpe/Brian MacKays route and developed it a bit further to work with multiple URI parts:
public static string CombineUri(params string[] uriParts)
{
string uri = string.Empty;
if (uriParts != null && uriParts.Length > 0)
{
char[] trims = new char[] { '\\', '/' };
uri = (uriParts[0] ?? string.Empty).TrimEnd(trims);
for (int i = 1; i < uriParts.Length; i++)
{
uri = string.Format("{0}/{1}", uri.TrimEnd(trims), (uriParts[i] ?? string.Empty).TrimStart(trims));
}
}
return uri;
}
Usage: CombineUri("http://www.mediawiki.org/", "wiki", "Special:SpecialPages")
Based on the sample URL you provided, I'm going to assume you want to combine URLs that are relative to your site.
Based on this assumption I'll propose this solution as the most appropriate response to your question which was: "Path.Combine is handy, is there a similar function in the framework for URLs?"
Since there the is a similar function in the framework for URLs I propose the correct is: "VirtualPathUtility.Combine" method.
Here's the MSDN reference link: VirtualPathUtility.Combine Method
There is one caveat: I believe this only works for URLs relative to your site (that is, you cannot use it to generate links to another web site. For example, var url = VirtualPathUtility.Combine("www.google.com", "accounts/widgets");).
Path.Combine("Http://MyUrl.com/", "/Images/Image.jpg").Replace("\\", "/")
I just put together a small extension method:
public static string UriCombine (this string val, string append)
{
if (String.IsNullOrEmpty(val)) return append;
if (String.IsNullOrEmpty(append)) return val;
return val.TrimEnd('/') + "/" + append.TrimStart('/');
}
It can be used like this:
"www.example.com/".UriCombine("/images").UriCombine("first.jpeg");
Witty example, Ryan, to end with a link to the function. Well done.
One recommendation Brian: if you wrap this code in a function, you may want to use a UriBuilder to wrap the base URL prior to the TryCreate call.
Otherwise, the base URL MUST include the scheme (where the UriBuilder will assume http://). Just a thought:
public string CombineUrl(string baseUrl, string relativeUrl) {
UriBuilder baseUri = new UriBuilder(baseUrl);
Uri newUri;
if (Uri.TryCreate(baseUri.Uri, relativeUrl, out newUri))
return newUri.ToString();
else
throw new ArgumentException("Unable to combine specified url values");
}
An easy way to combine them and ensure it's always correct is:
string.Format("{0}/{1}", Url1.Trim('/'), Url2);
I think this should give you more flexibility as you can deal with as many path segments as you want:
public static string UrlCombine(this string baseUrl, params string[] segments)
=> string.Join("/", new[] { baseUrl.TrimEnd('/') }.Concat(segments.Select(s => s.Trim('/'))));
Combining multiple parts of a URL could be a little bit tricky. You can use the two-parameter constructor Uri(baseUri, relativeUri), or you can use the Uri.TryCreate() utility function.
In either case, you might end up returning an incorrect result because these methods keep on truncating the relative parts off of the first parameter baseUri, i.e. from something like http://google.com/some/thing to http://google.com.
To be able to combine multiple parts into a final URL, you can copy the two functions below:
public static string Combine(params string[] parts)
{
if (parts == null || parts.Length == 0) return string.Empty;
var urlBuilder = new StringBuilder();
foreach (var part in parts)
{
var tempUrl = tryCreateRelativeOrAbsolute(part);
urlBuilder.Append(tempUrl);
}
return VirtualPathUtility.RemoveTrailingSlash(urlBuilder.ToString());
}
private static string tryCreateRelativeOrAbsolute(string s)
{
System.Uri uri;
System.Uri.TryCreate(s, UriKind.RelativeOrAbsolute, out uri);
string tempUrl = VirtualPathUtility.AppendTrailingSlash(uri.ToString());
return tempUrl;
}
Full code with unit tests to demonstrate usage can be found at https://uricombine.codeplex.com/SourceControl/latest#UriCombine/Uri.cs
I have unit tests to cover the three most common cases:
As found in other answers, either new Uri() or TryCreate() can do the tick.
However, the base Uri has to end with / and the relative has to NOT begin with /; otherwise it will remove the trailing part of the base Url
I think this is best done as an extension method, i.e.
public static Uri Append(this Uri uri, string relativePath)
{
var baseUri = uri.AbsoluteUri.EndsWith('/') ? uri : new Uri(uri.AbsoluteUri + '/');
var relative = relativePath.StartsWith('/') ? relativePath.Substring(1) : relativePath;
return new Uri(baseUri, relative);
}
and to use it:
var baseUri = new Uri("http://test.com/test/");
var combinedUri = baseUri.Append("/Do/Something");
In terms of performance, this consumes more resources than it needs, because of the Uri class which does a lot of parsing and validation; a very rough profiling (Debug) did a million operations in about 2 seconds.
This will work for most scenarios, however to be more efficient, it's better to manipulate everything as strings, this takes 125 milliseconds for 1 million operations.
I.e.
public static string Append(this Uri uri, string relativePath)
{
//avoid the use of Uri as it's not needed, and adds a bit of overhead.
var absoluteUri = uri.AbsoluteUri; //a calculated property, better cache it
var baseUri = absoluteUri.EndsWith('/') ? absoluteUri : absoluteUri + '/';
var relative = relativePath.StartsWith('/') ? relativePath.Substring(1) : relativePath;
return baseUri + relative;
}
And if you still want to return a URI, it takes around 600 milliseconds for 1 million operations.
public static Uri AppendUri(this Uri uri, string relativePath)
{
//avoid the use of Uri as it's not needed, and adds a bit of overhead.
var absoluteUri = uri.AbsoluteUri; //a calculated property, better cache it
var baseUri = absoluteUri.EndsWith('/') ? absoluteUri : absoluteUri + '/';
var relative = relativePath.StartsWith('/') ? relativePath.Substring(1) : relativePath;
return new Uri(baseUri + relative);
}
I hope this helps.
I found UriBuilder worked really well for this sort of thing:
UriBuilder urlb = new UriBuilder("http", _serverAddress, _webPort, _filePath);
Uri url = urlb.Uri;
return url.AbsoluteUri;
See UriBuilder Class - MSDN for more constructors and documentation.
If you don't want to have a dependency like Flurl, you can use its source code:
/// <summary>
/// Basically a Path.Combine for URLs. Ensures exactly one '/' separates each segment,
/// and exactly on '&' separates each query parameter.
/// URL-encodes illegal characters but not reserved characters.
/// </summary>
/// <param name="parts">URL parts to combine.</param>
public static string Combine(params string[] parts) {
if (parts == null)
throw new ArgumentNullException(nameof(parts));
string result = "";
bool inQuery = false, inFragment = false;
string CombineEnsureSingleSeparator(string a, string b, char separator) {
if (string.IsNullOrEmpty(a)) return b;
if (string.IsNullOrEmpty(b)) return a;
return a.TrimEnd(separator) + separator + b.TrimStart(separator);
}
foreach (var part in parts) {
if (string.IsNullOrEmpty(part))
continue;
if (result.EndsWith("?") || part.StartsWith("?"))
result = CombineEnsureSingleSeparator(result, part, '?');
else if (result.EndsWith("#") || part.StartsWith("#"))
result = CombineEnsureSingleSeparator(result, part, '#');
else if (inFragment)
result += part;
else if (inQuery)
result = CombineEnsureSingleSeparator(result, part, '&');
else
result = CombineEnsureSingleSeparator(result, part, '/');
if (part.Contains("#")) {
inQuery = false;
inFragment = true;
}
else if (!inFragment && part.Contains("?")) {
inQuery = true;
}
}
return EncodeIllegalCharacters(result);
}
/// <summary>
/// URL-encodes characters in a string that are neither reserved nor unreserved. Avoids encoding reserved characters such as '/' and '?'. Avoids encoding '%' if it begins a %-hex-hex sequence (i.e. avoids double-encoding).
/// </summary>
/// <param name="s">The string to encode.</param>
/// <param name="encodeSpaceAsPlus">If true, spaces will be encoded as + signs. Otherwise, they'll be encoded as %20.</param>
/// <returns>The encoded URL.</returns>
public static string EncodeIllegalCharacters(string s, bool encodeSpaceAsPlus = false) {
if (string.IsNullOrEmpty(s))
return s;
if (encodeSpaceAsPlus)
s = s.Replace(" ", "+");
// Uri.EscapeUriString mostly does what we want - encodes illegal characters only - but it has a quirk
// in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600
// no % characters, so avoid the regex overhead
if (!s.Contains("%"))
return Uri.EscapeUriString(s);
// pick out all %-hex-hex matches and avoid double-encoding
return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => {
var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters
var b = c.Groups[2].Value; // group 2 is a valid 3-character %-encoded sequence - leave it alone!
return Uri.EscapeUriString(a) + b;
});
}
I find the following useful and has the following features :
Throws on null or white space
Takes multiple params parameter for multiple Url segments
throws on null or empty
Class
public static class UrlPath
{
private static string InternalCombine(string source, string dest)
{
if (string.IsNullOrWhiteSpace(source))
throw new ArgumentException("Cannot be null or white space", nameof(source));
if (string.IsNullOrWhiteSpace(dest))
throw new ArgumentException("Cannot be null or white space", nameof(dest));
return $"{source.TrimEnd('/', '\\')}/{dest.TrimStart('/', '\\')}";
}
public static string Combine(string source, params string[] args)
=> args.Aggregate(source, InternalCombine);
}
Tests
UrlPath.Combine("test1", "test2");
UrlPath.Combine("test1//", "test2");
UrlPath.Combine("test1", "/test2");
// Result = test1/test2
UrlPath.Combine(#"test1\/\/\/", #"\/\/\\\\\//test2", #"\/\/\\\\\//test3\") ;
// Result = test1/test2/test3
UrlPath.Combine("/test1/", "/test2/", null);
UrlPath.Combine("", "/test2/");
UrlPath.Combine("/test1/", null);
// Throws an ArgumentException
So I have another approach, similar to everyone who used UriBuilder.
I did not want to split my BaseUrl (which can contain a part of the path - e.g. http://mybaseurl.com/dev/) as javajavajavajavajava did.
The following snippet shows the code + Tests.
Beware: This solution lowercases the host and appends a port. If this is not desired, one can write a string representation by e.g. leveraging the Uri Property of UriBuilder.
public class Tests
{
public static string CombineUrl (string baseUrl, string path)
{
var uriBuilder = new UriBuilder (baseUrl);
uriBuilder.Path = Path.Combine (uriBuilder.Path, path);
return uriBuilder.ToString();
}
[TestCase("http://MyUrl.com/", "/Images/Image.jpg", "http://myurl.com:80/Images/Image.jpg")]
[TestCase("http://MyUrl.com/basePath", "/Images/Image.jpg", "http://myurl.com:80/Images/Image.jpg")]
[TestCase("http://MyUrl.com/basePath", "Images/Image.jpg", "http://myurl.com:80/basePath/Images/Image.jpg")]
[TestCase("http://MyUrl.com/basePath/", "Images/Image.jpg", "http://myurl.com:80/basePath/Images/Image.jpg")]
public void Test1 (string baseUrl, string path, string expected)
{
var result = CombineUrl (baseUrl, path);
Assert.That (result, Is.EqualTo (expected));
}
}
Tested with .NET Core 2.1 on Windows 10.
Why does this work?
Even though Path.Combine will return Backslashes (on Windows atleast), the UriBuilder handles this case in the Setter of Path.
Taken from https://github.com/dotnet/corefx/blob/master/src/System.Private.Uri/src/System/UriBuilder.cs (mind the call to string.Replace)
[AllowNull]
public string Path
{
get
{
return _path;
}
set
{
if ((value == null) || (value.Length == 0))
{
value = "/";
}
_path = Uri.InternalEscapeString(value.Replace('\\', '/'));
_changed = true;
}
}
Is this the best approach?
Certainly this solution is pretty self describing (at least in my opinion). But you are relying on undocumented (at least I found nothing with a quick google search) "feature" from the .NET API. This may change with a future release so please cover the Method with Tests.
There are tests in https://github.com/dotnet/corefx/blob/master/src/System.Private.Uri/tests/FunctionalTests/UriBuilderTests.cs (Path_Get_Set) which check, if the \ is correctly transformed.
Side Note: One could also work with the UriBuilder.Uri property directly, if the uri will be used for a System.Uri ctor.
For anyone who is looking for a one-liner and simply wants to join parts of a path without creating a new method or referencing a new library or construct a URI value and convert that to a string, then...
string urlToImage = String.Join("/", "websiteUrl", "folder1", "folder2", "folder3", "item");
It's pretty basic, but I don't see what more you need. If you're afraid of doubled '/' then you can simply do a .Replace("//", "/") afterward. If you're afraid of replacing the doubled '//' in 'https://', then instead do one join, replace the doubled '/', then join the website url (however I'm pretty sure most browsers will automatically convert anything with 'https:' in the front of it to read in the correct format). This would look like:
string urlToImage = String.Join("/","websiteUrl", String.Join("/", "folder1", "folder2", "folder3", "item").Replace("//","/"));
There are plenty of answers here that will handle all the above, but in my case, I only needed it once in one location and won't need to heavily rely on it. Also, it's really easy to see what is going on here.
See: https://learn.microsoft.com/en-us/dotnet/api/system.string.join?view=netframework-4.8
My generic solution:
public static string Combine(params string[] uriParts)
{
string uri = string.Empty;
if (uriParts != null && uriParts.Any())
{
char[] trims = new char[] { '\\', '/' };
uri = (uriParts[0] ?? string.Empty).TrimEnd(trims);
for (int i = 1; i < uriParts.Length; i++)
{
uri = string.Format("{0}/{1}", uri.TrimEnd(trims), (uriParts[i] ?? string.Empty).TrimStart(trims));
}
}
return uri;
}
Here's Microsoft's (OfficeDev PnP) method UrlUtility.Combine:
const char PATH_DELIMITER = '/';
/// <summary>
/// Combines a path and a relative path.
/// </summary>
/// <param name="path"></param>
/// <param name="relative"></param>
/// <returns></returns>
public static string Combine(string path, string relative)
{
if(relative == null)
relative = String.Empty;
if(path == null)
path = String.Empty;
if(relative.Length == 0 && path.Length == 0)
return String.Empty;
if(relative.Length == 0)
return path;
if(path.Length == 0)
return relative;
path = path.Replace('\\', PATH_DELIMITER);
relative = relative.Replace('\\', PATH_DELIMITER);
return path.TrimEnd(PATH_DELIMITER) + PATH_DELIMITER + relative.TrimStart(PATH_DELIMITER);
}
Source: GitHub
// Read all above samples and as result created my self:
static string UrlCombine(params string[] items)
{
if (items?.Any() != true)
{
return string.Empty;
}
return string.Join("/", items.Where(u => !string.IsNullOrWhiteSpace(u)).Select(u => u.Trim('/', '\\')));
}
// usage
UrlCombine("https://microsoft.com","en-us")
I have an allocation-free string creation version that I've been using with great success.
NOTE:
For the first string: it trims the separator using TrimEnd(separator) - so only from the end of the string.
For the remainders: it trims the separator using Trim(separator) - so both start and end of paths
It does not append a trailing slash/separator. Though a simple modification can be done to add this ability.
Hope you find this useful!
/// <summary>
/// This implements an allocation-free string creation to construct the path.
/// This uses 3.5x LESS memory and is 2x faster than some alternate methods (StringBuilder, interpolation, string.Concat, etc.).
/// </summary>
/// <param name="str"></param>
/// <param name="paths"></param>
/// <returns></returns>
public static string ConcatPath(this string str, params string[] paths)
{
const char separator = '/';
if (str == null) throw new ArgumentNullException(nameof(str));
var list = new List<ReadOnlyMemory<char>>();
var first = str.AsMemory().TrimEnd(separator);
// get length for intial string after it's trimmed
var length = first.Length;
list.Add(first);
foreach (var path in paths)
{
var newPath = path.AsMemory().Trim(separator);
length += newPath.Length + 1;
list.Add(newPath);
}
var newString = string.Create(length, list, (chars, state) =>
{
// NOTE: We don't access the 'list' variable in this delegate since
// it would cause a closure and allocation. Instead we access the state parameter.
// track our position within the string data we are populating
var position = 0;
// copy the first string data to index 0 of the Span<char>
state[0].Span.CopyTo(chars);
// update the position to the new length
position += state[0].Span.Length;
// start at index 1 when slicing
for (var i = 1; i < state.Count; i++)
{
// add a separator in the current position and increment position by 1
chars[position++] = separator;
// copy each path string to a slice at current position
state[i].Span.CopyTo(chars.Slice(position));
// update the position to the new length
position += state[i].Length;
}
});
return newString;
}
with Benchmark DotNet output:
| Method | Mean | Error | StdDev | Median | Ratio | RatioSD | Gen 0 | Allocated |
|---------------------- |---------:|---------:|---------:|---------:|------:|--------:|-------:|----------:|
| ConcatPathWithBuilder | 404.1 ns | 27.35 ns | 78.48 ns | 380.3 ns | 1.00 | 0.00 | 0.3347 | 1,400 B |
| ConcatPath | 187.2 ns | 5.93 ns | 16.44 ns | 183.2 ns | 0.48 | 0.10 | 0.0956 | 400 B |
A simple one liner:
public static string Combine(this string uri1, string uri2) => $"{uri1.TrimEnd('/')}/{uri2.TrimStart('/')}";
Inspired by #Matt Sharpe's answer.
Here is my approach and I will use it for myself too:
public static string UrlCombine(string part1, string part2)
{
string newPart1 = string.Empty;
string newPart2 = string.Empty;
string seperator = "/";
// If either part1 or part 2 is empty,
// we don't need to combine with seperator
if (string.IsNullOrEmpty(part1) || string.IsNullOrEmpty(part2))
{
seperator = string.Empty;
}
// If part1 is not empty,
// remove '/' at last
if (!string.IsNullOrEmpty(part1))
{
newPart1 = part1.TrimEnd('/');
}
// If part2 is not empty,
// remove '/' at first
if (!string.IsNullOrEmpty(part2))
{
newPart2 = part2.TrimStart('/');
}
// Now finally combine
return string.Format("{0}{1}{2}", newPart1, seperator, newPart2);
}
I created this function that will make your life easier:
/// <summary>
/// The ultimate Path combiner of all time
/// </summary>
/// <param name="IsURL">
/// true - if the paths are Internet URLs, false - if the paths are local URLs, this is very important as this will be used to decide which separator will be used.
/// </param>
/// <param name="IsRelative">Just adds the separator at the beginning</param>
/// <param name="IsFixInternal">Fix the paths from within (by removing duplicate separators and correcting the separators)</param>
/// <param name="parts">The paths to combine</param>
/// <returns>the combined path</returns>
public static string PathCombine(bool IsURL , bool IsRelative , bool IsFixInternal , params string[] parts)
{
if (parts == null || parts.Length == 0) return string.Empty;
char separator = IsURL ? '/' : '\\';
if (parts.Length == 1 && IsFixInternal)
{
string validsingle;
if (IsURL)
{
validsingle = parts[0].Replace('\\' , '/');
}
else
{
validsingle = parts[0].Replace('/' , '\\');
}
validsingle = validsingle.Trim(separator);
return (IsRelative ? separator.ToString() : string.Empty) + validsingle;
}
string final = parts
.Aggregate
(
(string first , string second) =>
{
string validfirst;
string validsecond;
if (IsURL)
{
validfirst = first.Replace('\\' , '/');
validsecond = second.Replace('\\' , '/');
}
else
{
validfirst = first.Replace('/' , '\\');
validsecond = second.Replace('/' , '\\');
}
var prefix = string.Empty;
if (IsFixInternal)
{
if (IsURL)
{
if (validfirst.Contains("://"))
{
var tofix = validfirst.Substring(validfirst.IndexOf("://") + 3);
prefix = validfirst.Replace(tofix , string.Empty).TrimStart(separator);
var tofixlist = tofix.Split(new[] { separator } , StringSplitOptions.RemoveEmptyEntries);
validfirst = separator + string.Join(separator.ToString() , tofixlist);
}
else
{
var firstlist = validfirst.Split(new[] { separator } , StringSplitOptions.RemoveEmptyEntries);
validfirst = string.Join(separator.ToString() , firstlist);
}
var secondlist = validsecond.Split(new[] { separator } , StringSplitOptions.RemoveEmptyEntries);
validsecond = string.Join(separator.ToString() , secondlist);
}
else
{
var firstlist = validfirst.Split(new[] { separator } , StringSplitOptions.RemoveEmptyEntries);
var secondlist = validsecond.Split(new[] { separator } , StringSplitOptions.RemoveEmptyEntries);
validfirst = string.Join(separator.ToString() , firstlist);
validsecond = string.Join(separator.ToString() , secondlist);
}
}
return prefix + validfirst.Trim(separator) + separator + validsecond.Trim(separator);
}
);
return (IsRelative ? separator.ToString() : string.Empty) + final;
}
It works for URLs as well as normal paths.
Usage:
// Fixes internal paths
Console.WriteLine(PathCombine(true , true , true , #"\/\/folder 1\/\/\/\\/\folder2\///folder3\\/" , #"/\somefile.ext\/\//\"));
// Result: /folder 1/folder2/folder3/somefile.ext
// Doesn't fix internal paths
Console.WriteLine(PathCombine(true , true , false , #"\/\/folder 1\/\/\/\\/\folder2\///folder3\\/" , #"/\somefile.ext\/\//\"));
//result : /folder 1//////////folder2////folder3/somefile.ext
// Don't worry about URL prefixes when fixing internal paths
Console.WriteLine(PathCombine(true , false , true , #"/\/\/https:/\/\/\lul.com\/\/\/\\/\folder2\///folder3\\/" , #"/\somefile.ext\/\//\"));
// Result: https://lul.com/folder2/folder3/somefile.ext
Console.WriteLine(PathCombine(false , true , true , #"../../../\\..\...\./../somepath" , #"anotherpath"));
// Result: \..\..\..\..\...\.\..\somepath\anotherpath
I found that the Uri constructor flips '\' into '/'. So you can also use Path.Combine, with the Uri constructor.
Uri baseUri = new Uri("http://MyUrl.com");
string path = Path.Combine("Images", "Image.jpg");
Uri myUri = new Uri(baseUri, path);
Why not just use the following.
System.IO.Path.Combine(rootUrl, subPath).Replace(#"\", "/")
For what it's worth, here a couple of extension methods. The first one will combine paths and the second one adds parameters to the URL.
public static string CombineUrl(this string root, string path, params string[] paths)
{
if (string.IsNullOrWhiteSpace(path))
{
return root;
}
Uri baseUri = new Uri(root);
Uri combinedPaths = new Uri(baseUri, path);
foreach (string extendedPath in paths)
{
combinedPaths = new Uri(combinedPaths, extendedPath);
}
return combinedPaths.AbsoluteUri;
}
public static string AddUrlParams(this string url, Dictionary<string, string> parameters)
{
if (parameters == null || !parameters.Keys.Any())
{
return url;
}
var tempUrl = new StringBuilder($"{url}?");
int count = 0;
foreach (KeyValuePair<string, string> parameter in parameters)
{
if (count > 0)
{
tempUrl.Append("&");
}
tempUrl.Append($"{WebUtility.UrlEncode(parameter.Key)}={WebUtility.UrlEncode(parameter.Value)}");
count++;
}
return tempUrl.ToString();
}
I recently have been moving a bunch of MP3s from various locations into a repository. I had been constructing the new file names using the ID3 tags (thanks, TagLib-Sharp!), and I noticed that I was getting a System.NotSupportedException:
"The given path's format is not supported."
This was generated by either File.Copy() or Directory.CreateDirectory().
It didn't take long to realize that my file names needed to be sanitized. So I did the obvious thing:
public static string SanitizePath_(string path, char replaceChar)
{
string dir = Path.GetDirectoryName(path);
foreach (char c in Path.GetInvalidPathChars())
dir = dir.Replace(c, replaceChar);
string name = Path.GetFileName(path);
foreach (char c in Path.GetInvalidFileNameChars())
name = name.Replace(c, replaceChar);
return dir + name;
}
To my surprise, I continued to get exceptions. It turned out that ':' is not in the set of Path.GetInvalidPathChars(), because it is valid in a path root. I suppose that makes sense - but this has to be a pretty common problem. Does anyone have some short code that sanitizes a path? The most thorough I've come up with this, but it feels like it is probably overkill.
// replaces invalid characters with replaceChar
public static string SanitizePath(string path, char replaceChar)
{
// construct a list of characters that can't show up in filenames.
// need to do this because ":" is not in InvalidPathChars
if (_BadChars == null)
{
_BadChars = new List<char>(Path.GetInvalidFileNameChars());
_BadChars.AddRange(Path.GetInvalidPathChars());
_BadChars = Utility.GetUnique<char>(_BadChars);
}
// remove root
string root = Path.GetPathRoot(path);
path = path.Remove(0, root.Length);
// split on the directory separator character. Need to do this
// because the separator is not valid in a filename.
List<string> parts = new List<string>(path.Split(new char[]{Path.DirectorySeparatorChar}));
// check each part to make sure it is valid.
for (int i = 0; i < parts.Count; i++)
{
string part = parts[i];
foreach (char c in _BadChars)
{
part = part.Replace(c, replaceChar);
}
parts[i] = part;
}
return root + Utility.Join(parts, Path.DirectorySeparatorChar.ToString());
}
Any improvements to make this function faster and less baroque would be much appreciated.
To clean up a file name you could do this
private static string MakeValidFileName( string name )
{
string invalidChars = System.Text.RegularExpressions.Regex.Escape( new string( System.IO.Path.GetInvalidFileNameChars() ) );
string invalidRegStr = string.Format( #"([{0}]*\.+$)|([{0}]+)", invalidChars );
return System.Text.RegularExpressions.Regex.Replace( name, invalidRegStr, "_" );
}
A shorter solution:
var invalids = System.IO.Path.GetInvalidFileNameChars();
var newName = String.Join("_", origFileName.Split(invalids, StringSplitOptions.RemoveEmptyEntries) ).TrimEnd('.');
Based on Andre's excellent answer but taking into account Spud's comment on reserved words, I made this version:
/// <summary>
/// Strip illegal chars and reserved words from a candidate filename (should not include the directory path)
/// </summary>
/// <remarks>
/// http://stackoverflow.com/questions/309485/c-sharp-sanitize-file-name
/// </remarks>
public static string CoerceValidFileName(string filename)
{
var invalidChars = Regex.Escape(new string(Path.GetInvalidFileNameChars()));
var invalidReStr = string.Format(#"[{0}]+", invalidChars);
var reservedWords = new []
{
"CON", "PRN", "AUX", "CLOCK$", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4",
"COM5", "COM6", "COM7", "COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4",
"LPT5", "LPT6", "LPT7", "LPT8", "LPT9"
};
var sanitisedNamePart = Regex.Replace(filename, invalidReStr, "_");
foreach (var reservedWord in reservedWords)
{
var reservedWordPattern = string.Format("^{0}\\.", reservedWord);
sanitisedNamePart = Regex.Replace(sanitisedNamePart, reservedWordPattern, "_reservedWord_.", RegexOptions.IgnoreCase);
}
return sanitisedNamePart;
}
And these are my unit tests
[Test]
public void CoerceValidFileName_SimpleValid()
{
var filename = #"thisIsValid.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual(filename, result);
}
[Test]
public void CoerceValidFileName_SimpleInvalid()
{
var filename = #"thisIsNotValid\3\\_3.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("thisIsNotValid_3__3.txt", result);
}
[Test]
public void CoerceValidFileName_InvalidExtension()
{
var filename = #"thisIsNotValid.t\xt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("thisIsNotValid.t_xt", result);
}
[Test]
public void CoerceValidFileName_KeywordInvalid()
{
var filename = "aUx.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("_reservedWord_.txt", result);
}
[Test]
public void CoerceValidFileName_KeywordValid()
{
var filename = "auxillary.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("auxillary.txt", result);
}
string clean = String.Concat(dirty.Split(Path.GetInvalidFileNameChars()));
there are a lot of working solutions here. just for the sake of completeness, here's an approach that doesn't use regex, but uses LINQ:
var invalids = Path.GetInvalidFileNameChars();
filename = invalids.Aggregate(filename, (current, c) => current.Replace(c, '_'));
Also, it's a very short solution ;)
I'm using the System.IO.Path.GetInvalidFileNameChars() method to check invalid characters and I've got no problems.
I'm using the following code:
foreach( char invalidchar in System.IO.Path.GetInvalidFileNameChars())
{
filename = filename.Replace(invalidchar, '_');
}
I wanted to retain the characters in some way, not just simply replace the character with an underscore.
One way I thought was to replace the characters with similar looking characters which are (in my situation), unlikely to be used as regular characters. So I took the list of invalid characters and found look-a-likes.
The following are functions to encode and decode with the look-a-likes.
This code does not include a complete listing for all System.IO.Path.GetInvalidFileNameChars() characters. So it is up to you to extend or utilize the underscore replacement for any remaining characters.
private static Dictionary<string, string> EncodeMapping()
{
//-- Following characters are invalid for windows file and folder names.
//-- \/:*?"<>|
Dictionary<string, string> dic = new Dictionary<string, string>();
dic.Add(#"\", "Ì"); // U+OOCC
dic.Add("/", "Í"); // U+OOCD
dic.Add(":", "¦"); // U+00A6
dic.Add("*", "¤"); // U+00A4
dic.Add("?", "¿"); // U+00BF
dic.Add(#"""", "ˮ"); // U+02EE
dic.Add("<", "«"); // U+00AB
dic.Add(">", "»"); // U+00BB
dic.Add("|", "│"); // U+2502
return dic;
}
public static string Escape(string name)
{
foreach (KeyValuePair<string, string> replace in EncodeMapping())
{
name = name.Replace(replace.Key, replace.Value);
}
//-- handle dot at the end
if (name.EndsWith(".")) name = name.CropRight(1) + "°";
return name;
}
public static string UnEscape(string name)
{
foreach (KeyValuePair<string, string> replace in EncodeMapping())
{
name = name.Replace(replace.Value, replace.Key);
}
//-- handle dot at the end
if (name.EndsWith("°")) name = name.CropRight(1) + ".";
return name;
}
You can select your own look-a-likes. I used the Character Map app in windows to select mine %windir%\system32\charmap.exe
As I make adjustments through discovery, I will update this code.
I think the problem is that you first call Path.GetDirectoryName on the bad string. If this has non-filename characters in it, .Net can't tell which parts of the string are directories and throws. You have to do string comparisons.
Assuming it's only the filename that is bad, not the entire path, try this:
public static string SanitizePath(string path, char replaceChar)
{
int filenamePos = path.LastIndexOf(Path.DirectorySeparatorChar) + 1;
var sb = new System.Text.StringBuilder();
sb.Append(path.Substring(0, filenamePos));
for (int i = filenamePos; i < path.Length; i++)
{
char filenameChar = path[i];
foreach (char c in Path.GetInvalidFileNameChars())
if (filenameChar.Equals(c))
{
filenameChar = replaceChar;
break;
}
sb.Append(filenameChar);
}
return sb.ToString();
}
I have had success with this in the past.
Nice, short and static :-)
public static string returnSafeString(string s)
{
foreach (char character in Path.GetInvalidFileNameChars())
{
s = s.Replace(character.ToString(),string.Empty);
}
foreach (char character in Path.GetInvalidPathChars())
{
s = s.Replace(character.ToString(), string.Empty);
}
return (s);
}
Here's an efficient lazy loading extension method based on Andre's code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LT
{
public static class Utility
{
static string invalidRegStr;
public static string MakeValidFileName(this string name)
{
if (invalidRegStr == null)
{
var invalidChars = System.Text.RegularExpressions.Regex.Escape(new string(System.IO.Path.GetInvalidFileNameChars()));
invalidRegStr = string.Format(#"([{0}]*\.+$)|([{0}]+)", invalidChars);
}
return System.Text.RegularExpressions.Regex.Replace(name, invalidRegStr, "_");
}
}
}
Your code would be cleaner if you appended the directory and filename together and sanitized that rather than sanitizing them independently. As for sanitizing away the :, just take the 2nd character in the string. If it is equal to "replacechar", replace it with a colon. Since this app is for your own use, such a solution should be perfectly sufficient.
using System;
using System.IO;
using System.Linq;
using System.Text;
public class Program
{
public static void Main()
{
try
{
var badString = "ABC\\DEF/GHI<JKL>MNO:PQR\"STU\tVWX|YZA*BCD?EFG";
Console.WriteLine(badString);
Console.WriteLine(SanitizeFileName(badString, '.'));
Console.WriteLine(SanitizeFileName(badString));
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
}
private static string SanitizeFileName(string fileName, char? replacement = null)
{
if (fileName == null) { return null; }
if (fileName.Length == 0) { return ""; }
var sb = new StringBuilder();
var badChars = Path.GetInvalidFileNameChars().ToList();
foreach (var #char in fileName)
{
if (badChars.Contains(#char))
{
if (replacement.HasValue)
{
sb.Append(replacement.Value);
}
continue;
}
sb.Append(#char);
}
return sb.ToString();
}
}
Based #fiat's and #Andre's approach, I'd like to share my solution too.
Main difference:
its an extension method
regex is compiled at first use to save some time with a lot executions
reserved words are preserved
public static class StringPathExtensions
{
private static Regex _invalidPathPartsRegex;
static StringPathExtensions()
{
var invalidReg = System.Text.RegularExpressions.Regex.Escape(new string(Path.GetInvalidFileNameChars()));
_invalidPathPartsRegex = new Regex($"(?<reserved>^(CON|PRN|AUX|CLOCK\\$|NUL|COM0|COM1|COM2|COM3|COM4|COM5|COM6|COM7|COM8|COM9|LPT0|LPT1|LPT2|LPT3|LPT4|LPT5|LPT6|LPT7|LPT8|LPT9))|(?<invalid>[{invalidReg}:]+|\\.$)", RegexOptions.Compiled);
}
public static string SanitizeFileName(this string path)
{
return _invalidPathPartsRegex.Replace(path, m =>
{
if (!string.IsNullOrWhiteSpace(m.Groups["reserved"].Value))
return string.Concat("_", m.Groups["reserved"].Value);
return "_";
});
}
}