I am trying to take file names that look like:
MAX_1.01.01.03.pdf look like Max_1010103.pdf.
Currently I have this code:
public void Sanitizer(List<string> paths)
{
string regPattern = (#"[~#&!%+{}]+");
string replacement = " ";
Regex regExPattern = new Regex(regPattern);
Regex regExPattern2 = new Regex(#"\s{2,}");
Regex regExPattern3 = new Regex(#"\.(?=.*\.)");
string replace = "";
var filesCount = new Dictionary<string, int>();
dataGridView1.Rows.Clear();
try
{
foreach (string files2 in paths)
{
string filenameOnly = System.IO.Path.GetFileName(files2);
string pathOnly = System.IO.Path.GetDirectoryName(files2);
string sanitizedFileName = regExPattern.Replace(filenameOnly, replacement);
sanitizedFileName = regExPattern2.Replace(sanitizedFileName, replacement);
string sanitized = System.IO.Path.Combine(pathOnly, sanitizedFileName);
if (!System.IO.File.Exists(sanitized))
{
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value = pathOnly;
clean.Cells[1].Value = filenameOnly;
clean.Cells[2].Value = sanitizedFileName;
dataGridView1.Rows.Add(clean);
System.IO.File.Move(files2, sanitized);
}
else
{
if (filesCount.ContainsKey(sanitized))
{
filesCount[sanitized]++;
}
else
{
filesCount.Add(sanitized, 1);
string newFileName = String.Format("{0}{1}{2}",
System.IO.Path.GetFileNameWithoutExtension(sanitized),
filesCount[sanitized].ToString(),
System.IO.Path.GetExtension(sanitized));
string newFilePath = System.IO.Path.Combine(
System.IO.Path.GetDirectoryName(sanitized), newFileName);
newFileName = regExPattern2.Replace(newFileName, replacement);
System.IO.File.Move(files2, newFilePath);
sanitized = newFileName;
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value = pathOnly;
clean.Cells[1].Value = filenameOnly;
clean.Cells[2].Value = newFileName;
dataGridView1.Rows.Add(clean);
}
//HERE IS WHERE I AM TRYING TO GET RID OF DOUBLE PERIODS//
if (regExPattern3.IsMatch(files2))
{
string filewithDoublePName = System.IO.Path.GetFileName(files2);
string doublepPath = System.IO.Path.GetDirectoryName(files2);
string name = System.IO.Path.GetFileNameWithoutExtension(files2);
string newName = name.Replace(".", "");
string filesDir = System.IO.Path.GetDirectoryName(files2);
string fileExt = System.IO.Path.GetExtension(files2);
string newPath = System.IO.Path.Combine(filesDir, newName+fileExt);
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value =doublepPath;
clean.Cells[1].Value = filewithDoublePName;
clean.Cells[2].Value = newName;
dataGridView1.Rows.Add(clean);
}
}
}
catch (Exception e)
{
throw;
//errors.Write(e);
}
}
I ran this and instead of getting rid of ALL period (minus the period before a file extension), I get results like: MAX_1.0103.pdf
If there are multiple periods like: Test....1.txt I get these results: Test...1.txt
It seems to only get rid of ONE period. I am pretty new to Regular Expressions and it is a REQUIREMENT for this project. Can anybody help me figure out what I'm doing wrong here?
Thanks!
EDITED to show changes made in code
Why not use the Path class:
string name = Path.GetFileNameWithoutExtension(yourPath);
string newName = name.Replace(".", "");
string newPath = Path.Combine(Path.GetDirectoryName(yourPath),
newName + Path.GetExtension(yourPath));
Each step separated for clarity.
So for the input
"C:\Users\Fred\MAX_1.01.01.03.pdf"
I get the output
"C:\Users\Fred\MAX_1010103.pdf"
which is what I'd expect.
If I supply:
"C:\Users\Fred.Flintstone\MAX_1.01.01.03.pdf"
I get:
"C:\Users\Fred.Flintstone\MAX_1010103.pdf"
again what I expect as I'm not processing the "DirectoryName" part of the path.
NOTE I missed the bit about RegEx being a REQUIREMENT. Still sticking by this answer though.
Say, didn't you already ask this question?
Anyway, I stick by my original answer:
string RemovePeriodsFromFilename(string fullPath)
{
string dir = Path.GetDirectoryName(fullPath);
string filename = Path.GetFileNameWithoutExtension(fullPath);
string sanitized = filename.Replace(".", string.Empty);
string ext = Path.GetExtension(fullPath);
return Path.Combine(dir, sanitized + ext);
}
Now, since you specified that you must use RegEx, I suppose you could always force it in there:
string RemovePeriodsFromFilename(string fullPath)
{
string dir = Path.GetDirectoryName(fullPath);
string filename = Path.GetFileNameWithoutExtension(fullPath);
// Look! Now the solution uses RegEx!
string sanitized = Regex.Replace(filename, #"\.", string.Empty);
string ext = Path.GetExtension(fullPath);
return Path.Combine(dir, sanitized + ext);
}
Note: This is basically the exact same approach that ChrisF suggested.
Whoever is requiring that you use RegEx, I suggest you request an explanation why.
I'd forgo regexes all together, do it like this:
Replace all periods with empty
strings
Replace the last 3
characters with ("."+Last 3
characters)
This regex will remove all periods except for the period before the 3 or 4 letter extension.
string filename = "test.test......t.test.pdf";
string newFilename = new Regex(#"\.(?!(\w{3,4}$))").Replace(filename, "");
If you want it to work with 2 letter extensions just change the {3,4} to {2,4}
Good luck!
Something like this, maybe:
string fileName = "MAX_1.01.01.03.pdf";
fileName = fileName.Substring(0, 1).ToUpper() + fileName.Substring(1).ToLower();
fileName = fileName.Replace(".", "");
Related
I Have one text file and I want to replaces all matches in each line, so I defined Pattern and I loop through to the text file after I want to write the result in another file, unfortunately my pattern is only replace first occurrence of the word what did |I do in a wrong way?
Content of text file:
"testebook kok o testebook\ntestbbb1232 joj ds testbbb1232"
using System.Text.RegularExpressions;
string filePath = "test.txt";
string fileNewPath = "test1.txt";
string ma = #"^test[0-9a-zA-Z]+";
string newString = string.Empty;
using(StreamReader sr = new(filePath)){
string line = sr.ReadLine();
while (line != null){
while(Regex.IsMatch(line, ma) != false){
line = Regex.Replace(line, ma, "");
}
newString += line + "\n";
line = sr.ReadLine();
}
}
using(StreamWriter sw = new(fileNewPath)){
sw.WriteLine(newString);
}
Your code is correct but your regex pattern is not correct.
you should write this:
string ma = #"test[0-9a-zA-Z]+";
The letter "^" has removed from pattern
So I modified My pattern and remove start with character and everything works now as desired
using System.Text.RegularExpressions;
string filePath = "test.txt";
string fileNewPath = "test1.txt";
MatchesFinder test = new(filePath, fileNewPath);
test.RunTheProcess();
class MatchesFinder{
private string filePath;
private string fileNewPath;
private string ma = #"test[a-zA-Z0-9]+";
public MatchesFinder(string filePath,string fileNewPath){
this.filePath = filePath;
this.fileNewPath = fileNewPath;
}
public void RunTheProcess(){
string newString = string.Empty;
using(StreamReader sr = new(filePath)){
string line = sr.ReadLine();
while (line != null){
while(Regex.IsMatch(line, ma) != false){
line = Regex.Replace(line, ma, string.Empty);
}
newString += line.TrimStart() + "\n";
line = sr.ReadLine();
}
}
using(StreamWriter sw = new(fileNewPath)){
sw.WriteLine(newString);
}
}
}
I think you don´t need to check IsMatch separately, just calling Regex.Replace should yield the same result.
Also, newString += line.TrimStart() + "\n"; means you´re copying all the lines you´ve already checked every time you append a new line. I´d either write directly to the output stream or at least use a StringBuilder if you really want to have the full file in memory for some reason.
Something like this:
using var sw = new StreamWriter(fileNewPath);
using var sr = new StreamReader(filePath);
var line = sr.ReadLine();
while (line != null){
line = Regex.Replace(line, ma, string.Empty);
sw.WriteLine(line.TrimStart());
line = sr.ReadLine();
}
I have a string (from a filename) like this: Mytext_edit1345.jpg
I just want to cut the "_edit1345" so I can get Mytext.jpg as a result.
Is Regex.Replace the best way to go for me?
string result = Regex.Replace(Bildname, pattern, "");
What pattern do i need?
You can use the Path class and string methods like String.Remove
string fileNameWOE = Path.GetFileNameWithoutExtension(fileName);
int indexOfUnderscore = fileNameWOE.IndexOf('_');
if(indexOfUnderscore >= 0)
fileNameWOE = fileNameWOE.Remove(indexOfUnderscore);
fileName = fileNameWOE + Path.GetExtension(fileName);
Use String.Substring with Path.GetExtension like:
string fileName = "Mytext_edit1345.jpg";
string newFileName = fileName;
if (fileName.Contains('_'))
{
newFileName = fileName.Substring(0, fileName.IndexOf('_')) +
Path.GetExtension(fileName);
}
Use the below pattern to match the substring(from_ upto the next .) you want to cut-down,
_[^.]*
DEMO
Your code would be,
string str = "Mytext_edit1345.jpg";
string result = Regex.Replace(str, #"_[^.]*", "");
Console.WriteLine(result);
Console.ReadLine();
IDEONE
Getting this error The given path's format is not supported. at this line
System.IO.Directory.CreateDirectory(visit_Path);
Where I am doing mistake in below code
void Create_VisitDateFolder()
{
this.pid = Convert.ToInt32(db.GetPatientID(cmbPatientName.SelectedItem.ToString()));
String strpath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
String path = strpath + "\\Patients\\Patient_" + pid + "\\";
string visitdate = db.GetPatient_visitDate(pid);
this.visitNo = db.GetPatientID_visitNo(pid);
string visit_Path = path +"visit_" + visitNo + "_" + visitdate+"\\";
bool IsVisitExist = System.IO.Directory.Exists(path);
bool IsVisitPath=System.IO.Directory.Exists(visit_Path);
if (!IsVisitExist)
{
System.IO.Directory.CreateDirectory(path);
}
if (!IsVisitPath)
{
System.IO.Directory.CreateDirectory(visit_Path);\\error here
}
}
getting this value for visit_Path
C:\Users\Monika\Documents\Visual Studio 2010\Projects\SonoRepo\SonoRepo\bin\Debug\Patients\Patient_16\visit_4_16-10-2013 00:00:00\
You can not have : in directory name, I suggest you to use this to string to get date in directory name:
DateTime.Now.ToString("yyyy-MM-dd hh_mm_ss");
it will create timestamp like:
2013-10-17 05_41_05
additional note:
use Path.Combine to make full path, like:
var path = Path.Combine(strpath , "Patients", "Patient_" + pid);
and last
string suffix = "visit_"+visitNo+"_" + visitdate;
var visit_Path = Path.Combine(path, suffix);
In general always use Path.Combine to create paths:
String strPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
String path = Path.Combine(strPath,"Patients","Patient_" + pid);
string visitdate = db.GetPatient_visitDate(pid);
this.visitNo = db.GetPatientID_visitNo(pid);
string fileName = string.Format("visit_{0}_{1}", visitNo, visitdate);
string visit_Path = Path.Combine(path, fileName);
bool IsVisitExist = System.IO.Directory.Exists(path);
bool IsVisitPath=System.IO.Directory.Exists(visit_Path);
To replace invalid characters from a filename you could use this loop:
string invalidChars = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars());
foreach (char c in invalidChars)
{
visit_Path = visit_Path.Replace(c.ToString(), ""); // or with "."
}
You can't have colons : in file paths
You can't use colons (:) in a path. You can for example Replace() them with dots (.).
Just wanted to add my two cents.
I assigned the path from a text box to string and also adding additional strings, but I forgot to add the .Text to the text box variable.
So instead of
strFinalPath = TextBox1.Text + strIntermediatePath + strFilename
I wrote
strFinalPath = TextBox1 + strIntermediatePath + strFilename
So the path became invalid because it contained invalid characters.
I was surprised that c# instead of rejecting the assignment because of type mismatch, assigned invalid value to the final string.
So look at the path assignment string closely.
I need to use a string for path for a file but sometimes there are forbidden characters in this string and I must replace them. For example, my string _title is rumbaton jonathan \"racko\" contreras.
Well I should replace the chars \ and ".
I tried this but it doesn't work:
_title.Replace(#"/", "");
_title.Replace(#"\", "");
_title.Replace(#"*", "");
_title.Replace(#"?", "");
_title.Replace(#"<", "");
_title.Replace(#">", "");
_title.Replace(#"|", "");
Since strings are immutable, the Replace method returns a new string, it doesn't modify the instance you are calling it on. So try this:
_title = _title
.Replace(#"/", "")
.Replace(#"""", "")
.Replace(#"*", "")
.Replace(#"?", "")
.Replace(#"<", "")
.Replace(#">", "")
.Replace(#"|", "");
Also if you want to replace " make sure you have properly escaped it.
Try regex
string illegal = "\"M\"\\a/ry/ h**ad:>> a\\/:*?\"| li*tt|le|| la\"mb.?";
string regexSearch = new string(Path.GetInvalidFileNameChars()) + new string(Path.GetInvalidPathChars());
Regex r = new Regex(string.Format("[{0}]", Regex.Escape(regexSearch)));
illegal = r.Replace(illegal, "");
Before: "M"\a/ry/ h**ad:>> a/:?"| litt|le|| la"mb.?
After: Mary had a little lamb.
Also another answer from same post is much cleaner
private static string CleanFileName(string fileName)
{
return Path.GetInvalidFileNameChars().Aggregate(fileName, (current, c) => current.Replace(c.ToString(), string.Empty));
}
from How to remove illegal characters from path and filenames?
Or you could try this (probably terribly inefficient) method:
string inputString = #"File ~!##$%^&*()_+|`1234567890-=\[];',./{}:""<>? name";
var badchars = Path.GetInvalidFileNameChars();
foreach (var c in badchars)
inputString = inputString.Replace(c.ToString(), "");
The result will be:
File ~!##$%^&()_+`1234567890-=[];',.{} name
But feel free to add more chars to the badchars before running the foreach loop on them.
See http://msdn.microsoft.com/cs-cz/library/fk49wtc1.aspx:
Returns a string that is equivalent to the current string except that all instances of oldValue are replaced with newValue.
I have written a method to do the exact operation that you want and with much cleaner code.
The method
public static string Delete(this string target, string samples) {
if (string.IsNullOrEmpty(target) || string.IsNullOrEmpty(samples))
return target;
var tar = target.ToCharArray();
const char deletechar = '♣'; //a char that most likely never to be used in the input
for (var i = 0; i < tar.Length; i++) {
for (var j = 0; j < samples.Length; j++) {
if (tar[i] == samples[j]) {
tar[i] = deletechar;
break;
}
}
}
return tar.ConvertToString().Replace(deletechar.ToString(CultureInfo.InvariantCulture), string.Empty);
}
Sample
var input = "rumbaton jonathan \"racko\" contreras";
var cleaned = input.Delete("\"\\/*?><|");
Will result in:
rumbaton jonathan racko contreras
Ok ! I've solved my issue thanks to all your indications. This is my correction :
string newFileName = _artist + " - " + _title;
char[] invalidFileChars = Path.GetInvalidFileNameChars();
char[] invalidPathChars = Path.GetInvalidPathChars();
foreach (char invalidChar in invalidFileChars)
{
newFileName = newFileName.Replace(invalidChar.ToString(), string.Empty);
}
foreach (char invalidChar in invalidPathChars)
{
newFilePath = newFilePath.Replace(invalidChar.ToString(), string.Empty);
}
Thank you so musch everybody :)
I'm having a bit of trouble passing this parameter to a class i have. Does anybody have any ideas?
Class 1's code:
public void DriveRecursion(string retPath)
{
//recurse through files. Let user press 'ok' to move onto next step
// string[] files = Directory.GetFiles(retPath, "*.*", SearchOption.AllDirectories);
string pattern = " *[\\~#%&*{}/<>?|\"-]+ *";
//string replacement = "";
Regex regEx = new Regex(pattern);
string[] fileDrive = Directory.GetFiles(retPath, "*.*", SearchOption.AllDirectories);
List<string> filePath = new List<string>();
dataGridView1.Rows.Clear();
try
{
foreach (string fileNames in fileDrive)
{
if (regEx.IsMatch(fileNames))
{
string fileNameOnly = Path.GetFileName(fileNames);
string pathOnly = Path.GetDirectoryName(fileNames);
DataGridViewRow dgr = new DataGridViewRow();
filePath.Add(fileNames);
dgr.CreateCells(dataGridView1);
dgr.Cells[0].Value = pathOnly;
dgr.Cells[1].Value = fileNameOnly;
dataGridView1.Rows.Add(dgr);
\\I want to pass fileNames to my FileCleanup Method
\\I tried this:
\\SanitizeFileNames sf = new SanitizeFileNames();
\\sf.Add(fileNames); <-- this always gets an error..plus it is not an action i could find in intellisense
}
else
{
continue;
}
}
}
catch (Exception e)
{
StreamWriter sw = new StreamWriter(retPath + "ErrorLog.txt");
sw.Write(e);
}
}
Class 2's code:
public class SanitizeFileNames
{
public void FileCleanup(string fileNames)
{
string regPattern = " *[\\~#%&*{}/<>?|\"-]+ *";
string replacement = "";
Regex regExPattern = new Regex(regPattern);
}
What i want to do in SanitizeFileNames is do a foreach through the FileNames & FilePath and replace invalid chars (as defined in my Regex pattern). So, something along the lines of this:
using (StreamWriter sw = new StreamWriter(#"S:\File_Renames.txt"))
{
//Sanitize and remove invalid chars
foreach (string Files2 in filePath)
{
try
{
string filenameOnly = Path.GetFileName(Files2);
string pathOnly = Path.GetDirectoryName(Files2);
string sanitizedFilename = regEx.Replace(filenameOnly, replacement);
string sanitized = Path.Combine(pathOnly, sanitizedFilename);
sw.Write(sanitized + "\r\n");
System.IO.File.Move(Files2, sanitized);
}
//error logging
catch(Exception ex)
{
StreamWriter sw2 = new StreamWriter(#"S:\Error_Log.txt");
sw2.Write("ERROR LOG");
sw2.WriteLine(DateTime.Now.ToString() + ex + "\r\n");
sw2.Flush();
sw2.Close();
}
}
}
However, I'm having trouble passing the fileNames into my SanitizeFileNames class. Can anybody help me?
dataGridView1.Rows.Clear();
try
{
foreach (string fileNames in fileDrive)
{
if (regEx.IsMatch(fileNames))
{
string fileNameOnly = Path.GetFileName(fileNames);
string pathOnly = Path.GetDirectoryName(fileNames);
DataGridViewRow dgr = new DataGridViewRow();
filePath.Add(fileNames);
dgr.CreateCells(dataGridView1);
dgr.Cells[0].Value = pathOnly;
dgr.Cells[1].Value = fileNameOnly;
dataGridView1.Rows.Add(dgr);
new SanitizeFileNames().FileCleanup(fileNames);
}
else
{
continue;
}
}
}
I suppose you want to pass a dirty name to the FileCleanup function and get a clean out. Here is how you can do that :
public String FileCleanup(string fileNames)
{
string regPattern = " *[\\~#%&*{}/<>?|\"-]+ *";
string replacement = "";
Regex regExPattern = new Regex(regPattern);
...
return cleanName;
}
and use it in your code like this:
String cleanName = new SanitizeFileNames().FileCleanup(fileNames);
where you put the comment.
You can create a third class static class and add static variable called files “public static List<string> Files= new List<string>()” as example.
When you create the files add the same files to the static variable.
When you clean the files loop throw the static variable, and at the end clear it.
The parameter type should be an enumerable collection of some sort: a list or an array would do. Also, strings are immutable so you could return a list of cleaned up filenames:
public class SanitizeFilenames
{
public List<string> FileCleanUp(IEnumerable<string> filenames)
{
var cleanedFileNames = new List<string>();
var invalidChars = Path.GetInvalidFileNameChars();
foreach(string file in filenames)
{
if(file.IndexOfAny(invalidChars) != -1)
{
// clean the file name and add it to the cleanedFileNames list
}
else
{
// nothing to clean here
cleanedFileNames.Add(file);
}
}
return cleanedFileNames;
}
}