Is this code even complex enough to deserve a higher level of abstraction?
public static JsonStructure Parse(string jsonText)
{
var result = default(JsonStructure);
var structureStack = new Stack<JsonStructure>();
var keyStack = new Stack<string>();
var current = default(JsonStructure);
var currentState = ParserState.Begin;
var key = default(string);
var value = default(object);
foreach (var token in Lexer.Tokenize(jsonText))
{
switch (currentState)
{
case ParserState.Begin:
switch (token.Type)
{
case TokenType.BeginObject:
currentState = ParserState.Name;
current = result = new JsonObject();
break;
case TokenType.BeginArray:
currentState = ParserState.Value;
current = result = new JsonArray();
break;
default:
throw new JsonException(token, currentState);
}
break;
case ParserState.Name:
switch (token.Type)
{
case TokenType.String:
currentState = ParserState.NameSeparator;
key = (string)token.Value;
break;
default:
throw new JsonException(token, currentState);
}
break;
case ParserState.NameSeparator:
switch (token.Type)
{
case TokenType.NameSeparator:
currentState = ParserState.Value;
break;
default:
throw new JsonException(token, currentState);
}
break;
case ParserState.Value:
switch (token.Type)
{
case TokenType.Number:
case TokenType.String:
case TokenType.True:
case TokenType.False:
case TokenType.Null:
currentState = ParserState.ValueSeparator;
value = token.Value;
break;
case TokenType.BeginObject:
structureStack.Push(current);
keyStack.Push(key);
currentState = ParserState.Name;
current = new JsonObject();
break;
case TokenType.BeginArray:
structureStack.Push(current);
currentState = ParserState.Value;
current = new JsonArray();
break;
default:
throw new JsonException(token, currentState);
}
break;
case ParserState.ValueSeparator:
var jsonObject = (current as JsonObject);
var jsonArray = (current as JsonArray);
if (jsonObject != null)
{
jsonObject.Add(key, value);
currentState = ParserState.Name;
}
if (jsonArray != null)
{
jsonArray.Add(value);
currentState = ParserState.Value;
}
switch (token.Type)
{
case TokenType.EndObject:
case TokenType.EndArray:
currentState = ParserState.End;
break;
case TokenType.ValueSeparator:
break;
default:
throw new JsonException(token, currentState);
}
break;
case ParserState.End:
switch (token.Type)
{
case TokenType.EndObject:
case TokenType.EndArray:
case TokenType.ValueSeparator:
var previous = structureStack.Pop();
var previousJsonObject = (previous as JsonObject);
var previousJsonArray = (previous as JsonArray);
if (previousJsonObject != null)
{
previousJsonObject.Add(keyStack.Pop(), current);
currentState = ParserState.Name;
}
if (previousJsonArray != null)
{
previousJsonArray.Add(current);
currentState = ParserState.Value;
}
if (token.Type != TokenType.ValueSeparator)
{
currentState = ParserState.End;
}
current = previous;
break;
default:
throw new JsonException(token, currentState);
}
break;
default:
break;
}
}
return result;
}
Without looking at it in detail, as you are parsing based on state, could you use the state pattern to break it up and parse each bit in a separate class based on the state?
something like this might be a start, although this is just pseudo code...
public interface IParserState
{
IParserState ParseToken (IToken token);
}
public class BeginState : IParserState
{
private readonly Stack<JsonStructure> m_structureStack;
private readonly Stack<String> m_keyStack;
public BeginState (Stack<JsonStructure> structureStack, Stack<String> keyStack)
{
m_structureStack = structureStack;
m_keyStack = keyStack;
}
public IParserState ParseToken(IToken token)
{
switch (token.Type)
{
case TokenType.OpenBrace:
return new ObjectKeyParserState(m_structureStack,m_keyStack);
case TokenType.OpenBracket:
return new ArrayValueParserState(m_structureStack, m_keyStack);
default:
throw new JsonException (token);
}
}
}
public class ObjectKeyParserState : IParserState
{
private readonly Stack<JsonStructure> m_structureStack;
private readonly Stack<String> m_keyStack;
private readonly JsonObject m_current;
public ObjectKeyParserState (Stack<JsonStructure> structureStack, Stack<String> keyStack)
{
m_current = new JsonObject();
}
public IParserState ParseToken (IToken token)
{
switch (token.Type)
{
case TokenType.StringLiteral:
key = (string)token.Value;
return new ColonSeperatorParserState(m_structureStack, m_keyStack, m_current,key);
default:
throw new JsonException(token);
}
}
The 'conceptual design' in this case is production rules. If you were to design json yourself, would you think in terms of "A pair is a key followed by a colon followed by a value" or would you think in terms like "Colons will do 'a' in this case 'A' and do 'b' in case 'B' and do 'c' in case 'C'"? Look at http://www.json.org/. You'll see the 'conceptual design' stated in terms of production rules.
Since the 'structural design' of your code does not have the form of the 'conceptual design', no amount of refactoring will help. Changing the 'conceptual design' a small amount, would lead a code change that is hard to code and hard to test. You need a rewrite the code in terms of the 'conceptual design'.
// object
// "{" "}"
// "{" members "}"
private static JsonObject ProduceJsonObject(Tokens tokens)
{
var result = new JsonObject();
tokens.Accept( TokenType.OpenBrace );
result.members = ProduceJsonMembers(tokens);
tokens.Accept( TokenType.CloseBrace );
return result;
}
// members
// pair
// pair { "," pair }
private static JsonMembers ProduceJsonMembers(Tokens tokens)
{
var result = new JsonMembers();
result.Add( ProduceJsonPair(tokens) );
while (tokens.LookAhead == TokenTag.Comma)
{
tokens.Accept( TokenType.Comma );
result.Add( ProduceJsonPair(tokens) );
}
return result;
}
//pair
// string ":" value
private static JsonPair ProduceJsonPair(Tokens tokens)
{
var result = new JsonPair();
result.String = tokens.Accept( TokenType.ID );
tokens.Accept( TokenType.Colon );
result.Value = ProduceJsonValue( tokens );
return result;
}
// and so forth
Related
General task/idea
I'm currently trying to create a couple of lexers to extract functions/method names as well as class names and namespaces from different languages.
The goal is to be able to extract all defined classes, functions/methods, and namespaces to easily generate a overview for documentation and statistics.
As a starting point i chose PowerShell.
I created a basic lexer to tokenize my input.
I'm unsure where to go from here and if i'm doing it correctly.
Since I don't want to actually parse the language and generate code / interpret it, I won't really need to implement a full syntax analyzer and AST in my understanding.
My idea is to only check for certain pre-defined keywords like function, class, namespace, etc...
Question
Please help me stay in the correct direction as this is the first time I'm doing something like that and I'm really unsure how to proceed from now on.
Is my tokenizer correct, or did I mess up something?
Code
internal class TestLexer
{
private int _pos;
private string _input;
public class Token
{
public TokenType TokenType { get; }
public string Value { get; }
public Token(TokenType tokenType, string value)
{
TokenType = tokenType;
Value = value;
}
}
public enum TokenType
{
Variable,
OpenBrace,
CloseBrace,
Assignment,
Identifier,
Comment,
Equals,
NotEquals,
MultiLineCommentStart,
MultiLineCommentEnd,
Type,
Number
}
public IEnumerable<Token> Tokenize()
{
while (_pos < _input.Length)
{
var currentChar = _input[_pos];
if (char.IsWhiteSpace(currentChar)) {
_pos++;
continue;
}
switch (currentChar)
{
case '$':
{
_pos++;
yield return new Token(TokenType.Variable, "$");
break;
}
case '=':
{
_pos++;
yield return new Token(TokenType.Assignment, "=");
break;
}
case '#':
{
if (currentChar + 1 < _input.Length) {
if (currentChar + 1 == '>') {
_pos++;
_pos++;
yield return new Token(TokenType.MultiLineCommentEnd, "#>");
}
else {
_pos++;
yield return new Token(TokenType.Comment, "#");
}
}
else {
_pos++;
yield return new Token(TokenType.Comment, "#");
}
break;
}
case '{':
{
_pos++;
yield return new Token(TokenType.OpenBrace, "{");
break;
}
case '}':
{
_pos++;
yield return new Token(TokenType.CloseBrace, "}");
break;
}
case '[':
{
_pos++;
yield return new Token(TokenType.OpenBrace, "[");
break;
}
case ']':
{
_pos++;
yield return new Token(TokenType.OpenBrace, "]");
break;
}
case '(':
{
_pos++;
yield return new Token(TokenType.OpenBrace, "(");
break;
}
case ')':
{
_pos++;
yield return new Token(TokenType.OpenBrace, ")");
break;
}
case '<':
{
if (currentChar + 1 < _input.Length) {
if (currentChar + 1 == '#') {
_pos++;
_pos++;
yield return new Token(TokenType.MultiLineCommentStart, "<#");
}
else {
_pos++;
}
}
else {
_pos++;
}
break;
}
default:
if (char.IsLetter(currentChar)) {
var identifierBuilder = new StringBuilder();
while (char.IsLetter(currentChar)) {
identifierBuilder.Append(currentChar);
_pos++;
if (_pos >= _input.Length) {
break;
}
currentChar = _input[_pos];
}
yield return new Token(TokenType.Identifier, identifierBuilder.ToString());
}
else if (char.IsDigit(currentChar)) {
var numericBuilder = new StringBuilder();
while (char.IsDigit(currentChar)) {
numericBuilder.Append(currentChar);
_pos++;
if (_pos >= _input.Length) {
break;
}
currentChar = _input[_pos];
}
decimal d;
if (decimal.TryParse(numericBuilder.ToString(), out d)) {
yield return new Token(TokenType.Number, numericBuilder.ToString());
}
}
_pos++;
break;
};
}
}
public TestLexer(string input)
{
_input = input;
_pos = 0;
}
}
User input must be equal to one of 5 different strings, if not the user must input again, until the input is equal to one of the 5 strings.
I wrote some code, it works the way it should if the first entered input is equal to one of the 5 strings, if it is not, the program is stuck in an endless loop.
novaDrzava.PrevladujocaVera = Console.ReadLine();
var vera = novaDrzava.PrevladujocaVera;
var prvacrkaVera = vera.Substring(0, 1);
var ostaloVera = vera.Substring(1, vera.Length - 1);
prvacrkaVera = prvacrkaVera.ToUpper();
ostaloVera = ostaloVera.ToLower();
vera = prvacrkaVera + ostaloVera;
while (true)
{
if(vera == "Krščanstvo")
{
break;
}
if (vera == "Krscanstvo")
{
break;
}
if (vera == "Hinduizem")
{
break;
}
if (vera == "Islam")
{
break;
}
if (vera == "Budizem")
{
break;
}
Console.WriteLine("Vnesite ustrezno vero");
vera = Console.ReadLine();
vera = prvacrkaVera + ostaloVera;
}
I can't fully read your code since the identifiers aren't in English. But, given the rest of your question, I think this might be what you want:
var words = new List<string>
{
"Krščanstvo",
"Krscanstvo",
"Hinduizem",
"Islam",
"Budizem"
};
while (true)
{
var input = Console.ReadLine();
if (words.Contains(input, StringComparer.InvariantCultureIgnoreCase))
break;
Console.WriteLine("Invalid selection. Please try again");
}
You forgot to re-assign the values of prvacrkaVera and ostaloVera in the loop
while (true)
{
if(vera == "Krščanstvo")
{
break;
}
if (vera == "Krscanstvo")
{
break;
}
if (vera == "Hinduizem")
{
break;
}
if (vera == "Islam")
{
break;
}
if (vera == "Budizem")
{
break;
}
Console.WriteLine("Vnesite ustrezno vero");
vera = Console.ReadLine();
prvacrkaVera = vera.Substring(0, 1);
ostaloVera = vera.Substring(1, vera.Length - 1);
prvacrkaVera = prvacrkaVera.ToUpper();
ostaloVera = ostaloVera.ToLower();
vera = prvacrkaVera + ostaloVera;
}
Use a switch statement so you can easily detect when other values are entered
while (true)
{
switch(vera)
{
case "Krscanstvo" :
break;
case "Krščanstvo" :
break;
case "Hinduizem" :
break;
case "Islam" :
break;
case "Budizem" :
break;
default :
break; //exit while loop
break;
}
}
have a requirement in C# switch in string compare.
string result = getResponse();
switch (result)
{
case "S0": ret = "true";
break;
case "S1": ret = "fail";
break;
// ? can be any digit number, ex, M:0, M:100, M:1000
case "M:?": ret = result;
break;
default: ret = "wrong";
break;
}
how to make the switch statement in "M:?" case?
If you are using C# 7, you can use switch pattern matching.
string ret = string.Empty;
string result = getResponse();
switch (result)
{
case "S0":
ret = "true";
break;
case "S1":
ret = "fail";
break;
case var val when new Regex(#"^M:\d+").IsMatch(val):
ret = val;
break;
default:
ret = "wrong";
break;
}
As #KingKing commented in the question that many devs does not have luxury of using C# 7, at that time you can use following code :
string result = getResponse();
switch (Regex.IsMatch(result, #"^M:\d+") ? "M:XXX" : result)
{
case "S0":
ret = "true";
break;
case "S1":
ret = "fail";
break;
case "M:XXX": // make it unique so that it will not collide with other case statement and update this value in ternary condition accordingly
ret = result;
break;
default:
ret = "wrong";
break;
}
Another option instead of using switch pattern matching is to handle more complex case in the default bloc with if statements like so
string ret = string.Empty;
string result = getResponse();
switch (result)
{
case "S0":
ret = "true";
break;
case "S1":
ret = "fail";
break;
default:
Regex rgx = new Regex(#"^M:\d+");
if (rgx.IsMatch(result))
{
ret = result;
}
else {
ret = "wrong";
}
break;
}
try with pattern matching :-
public string Convert(string result)
{
switch (result)
{
case "S0": return "true";
case "S1": return "fail";
case var m when Regex.IsMatch(m, #"^M:\d+$"): return m;
default: return "wrong";
}
}
if you are using an older C# without pattern matching, move your clause to before the switch
public string Convert(string result)
{
if(Regex.IsMatch(result, #"^M:\d+$")) return result;
switch (result)
{
case "S0": return "true";
case "S1": return "fail";
default: return "wrong";
}
How about using LINQ:
var switches = new Func<string, string>[]
{
x => x == "S0" ? "true" : null,
x => x == "S1" ? "fail" : null,
x =>
{
var match = Regex.Match(x, #"^M:(\d+)");
if (match.Success)
return match.Groups[1].Captures[0].Value;
return null;
},
x => "wrong",
};
string result = "M:47";
string ret = switches.Select(s => s(result)).Where(r => r != null).First();
That gives me "47" for ret.
All of these if conditions in the below method have similar pattern,
Any ideas to come up with a common method to reduce the duplication in this method?
public override Func<JObject, dynamic, string> version => (jobject, parameters) =>
{
bool hasValidObject = false;
if (jobject["Version1"] != null)
{
_radio.Version1 = new VersionInfo(jobject["Version1"].Value<string>());
hasValidObject = true;
}
if (jobject["Version2"] != null)
{
_radio.Version2 = new VersionInfo(jobject["Version2"].Value<string>());
hasValidObject = true;
}
if (jobject["Version3"] != null)
{
_radio.Version3 = new VersionInfo(jobject["Version3"].Value<string>());
hasValidObject = true;
}
if (jobject["Version4"] != null)
{
_radio.Version4 = new VersionInfo(jobject["Version4"].Value<string>());
hasValidObject = true;
}
if (jobject["Version6"] != null)
{
_radio.Version6 = new VersionInfo(jobject["Version6"].Value<string>());
hasValidObject = true;
}
if (hasValidObject)
{
return GenerateSuccess();
}
return GenerateUnsuccessful( try again.");
};
One way is to use reflection and a loop
public override Func version => (jobject, parameters) =>
{
bool hasValidObject = false;
for (int i = 1; i<7;i++)
{
hasValidObject = this.SetVersionInfo(i) || hasValidObject;
}
if (hasValidObject)
{
return GenerateSuccess();
}
return GenerateUnsuccessful( "try again.");
};
private bool SetVersionInfo(int i)
{
if (jobject["Version" + i] == null) return false;
_radio.GetType().GetProperty(propName)
.SetValue(_radio, new VersionInfo(jobject["Version" + i].Value<string>()));
return true;
}
Another way would be to create a Dictionary<int,VersionInfo> Versions in your _radio class and then a you don't need the reflection:
private bool SetVersionInfo(int i)
{
if (jobject["Version" + i] == null) return false;
_radio.Versions[i] = new VersionInfo(jobject["Version" + i].Value<string>());
return true;
}
Another alternative can be with a switch statement:
public override Func<JObject, dynamic, string> version => (jobject, parameters) =>
{
bool hasValidObject = false;
foreach (char n in "12346")
{
var jObj = jobject["Version" + n];
if (jObj != null)
{
var versionInfo = new VersionInfo(jObj.Value<string>());
switch (n)
{
case '1': _radio.Version1 = versionInfo; break;
case '2': _radio.Version2 = versionInfo; break;
case '3': _radio.Version3 = versionInfo; break;
case '4': _radio.Version4 = versionInfo; break;
case '6': _radio.Version6 = versionInfo; break;
}
hasValidObject = true;
}
}
return hasValidObject ? GenerateSuccess() : GenerateUnsuccessful(" try again.");
};
or a more advanced version with array of delegates .. because I like vertical space :] (also not tested):
public override Func<JObject, dynamic, string> version => (jobject, parameters) =>
{
Func<VersionInfo, VersionInfo>[] a = { null, _radio.Version1 = v,
v => _radio.Version2 = v, v => _radio.Version3 = v,
v => _radio.Version4 = v, null, v => _radio.Version6 = v };
var q = from n in new[] { 1, 2, 3, 4, 6 }
let j = jobject["Version" + n] where j != null
select a[n](new VersionInfo(j.Value<string>()));
return q.Count() > 0 ? GenerateSuccess() : GenerateUnsuccessful(" try again.");
};
if jobject is of Dictionary type, you can run on it using
foreach(string version in jobject.keys)
{
if(jobject[version]!=null){
//do something
}
}
the problem is with the radio, but if it your class, you can use an array of versions in the class instead of so many, and access to them with a counter that you will use to count the loop's number
I am currently trying to write some code which turns C# Expressions into text.
To do this, I need to not only walk through the Expression tree, but also evaluate just a little part of it - in order to get the current value of a local variable.
I am finding very hard to put into words, so here is the pseudo-code instead. The missing part is in the first method:
public class Program
{
private static void DumpExpression(Expression expression)
{
// how do I dump out here some text like:
// set T2 = Perform "ExternalCalc" on input.T1
// I can easily get to:
// set T2 = Perform "Invoke" on input.T1
// but how can I substitute Invoke with the runtime value "ExternalCalc"?
}
static void Main(string[] args)
{
var myEvaluator = new Evaluator() {Name = "ExternalCalc"};
Expression<Func<Input, Output>> myExpression = (input) => new Output() {T2 = myEvaluator.Invoke(input.T1)};
DumpExpression(myExpression);
}
}
class Evaluator
{
public string Name { get; set; }
public string Invoke(string input)
{
throw new NotImplementedException("Never intended to be implemented");
}
}
class Input
{
public string T1 { get; set; }
}
class Output
{
public string T2 { get; set; }
}
I have started investigating this using code like:
foreach (MemberAssignment memberAssignment in body.Bindings)
{
Console.WriteLine("assign to {0}", memberAssignment.Member);
Console.WriteLine("assign to {0}", memberAssignment.BindingType);
Console.WriteLine("assign to {0}", memberAssignment.Expression);
var expression = memberAssignment.Expression;
if (expression is MethodCallExpression)
{
var methodCall = expression as MethodCallExpression;
Console.WriteLine("METHOD CALL: " + methodCall.Method.Name);
Console.WriteLine("METHOD CALL: " + expression.Type.Name);
var target = methodCall.Object;
// ?
}
}
but once I get to that MethodCallExpression level then I am feeling a bit lost about how to parse it and to then get the actual instance.
Any pointers/suggestions on how to do this very much appreciated.
Parsing expression trees is... complex and time-consuming. Here's a very incomplete version that just-about handles your example. In particular, note that we need to:
hard-code to Evaluator, since "ExternalCalc" is not part of the expression
manually evaluate some of the tree
Output:
a new Output set T2 to: call ExternalCalc on get myEvaluator from capture-context with input = get T1 from #input
Code:
private static void DumpExpression(Expression expression)
{
var sb = new StringBuilder();
Walk(expression, sb);
string s = sb.ToString();
}
static object Evaluate(Expression expr)
{
switch (expr.NodeType)
{
case ExpressionType.Constant:
return ((ConstantExpression)expr).Value;
case ExpressionType.MemberAccess:
var me = (MemberExpression)expr;
object target = Evaluate(me.Expression);
switch (me.Member.MemberType)
{
case System.Reflection.MemberTypes.Field:
return ((FieldInfo)me.Member).GetValue(target);
case System.Reflection.MemberTypes.Property:
return ((PropertyInfo)me.Member).GetValue(target, null);
default:
throw new NotSupportedException(me.Member.MemberType.ToString());
}
default:
throw new NotSupportedException(expr.NodeType.ToString());
}
}
static void Walk(Expression expr, StringBuilder output)
{
switch (expr.NodeType)
{
case ExpressionType.New:
var ne = (NewExpression)expr;
var ctor = ne.Constructor;
output.Append(" a new ").Append(ctor.DeclaringType.Name);
if(ne.Arguments != null && ne.Arguments.Count != 0)
{
var parameters = ctor.GetParameters();
for(int i = 0 ;i < ne.Arguments.Count ; i++)
{
output.Append(i == 0 ? " with " : ", ")
.Append(parameters[i].Name).Append(" =");
Walk(ne.Arguments[i], output);
}
}
break;
case ExpressionType.Lambda:
Walk(((LambdaExpression)expr).Body, output);
break;
case ExpressionType.Call:
var mce = (MethodCallExpression)expr;
if (mce.Method.DeclaringType == typeof(Evaluator))
{
object target = Evaluate(mce.Object);
output.Append(" call ").Append(((Evaluator)target).Name);
}
else
{
output.Append(" call ").Append(mce.Method.Name);
}
if (mce.Object != null)
{
output.Append(" on");
Walk(mce.Object, output);
}
if (mce.Arguments != null && mce.Arguments.Count != 0)
{
var parameters = mce.Method.GetParameters();
for (int i = 0; i < mce.Arguments.Count; i++)
{
output.Append(i == 0 ? " with " : ", ")
.Append(parameters[i].Name).Append(" =");
Walk(mce.Arguments[i], output);
}
}
break;
case ExpressionType.MemberInit:
var mei = (MemberInitExpression)expr;
Walk(mei.NewExpression, output);
foreach (var member in mei.Bindings)
{
switch(member.BindingType) {
case MemberBindingType.Assignment:
output.Append(" set ").Append(member.Member.Name)
.Append(" to:");
Walk(((MemberAssignment)member).Expression, output);
break;
default:
throw new NotSupportedException(member.BindingType.ToString());
}
}
break;
case ExpressionType.Constant:
var ce = (ConstantExpression)expr;
if (Attribute.IsDefined(ce.Type, typeof(CompilerGeneratedAttribute)))
{
output.Append(" capture-context");
}
else
{
output.Append(" ").Append(((ConstantExpression)expr).Value);
}
break;
case ExpressionType.MemberAccess:
var me = (MemberExpression)expr;
output.Append(" get ").Append(me.Member.Name).Append(" from");
if (me.Expression == null)
{ // static
output.Append(me.Member.DeclaringType.Name);
}
else
{
Walk(me.Expression, output);
}
break;
case ExpressionType.Parameter:
var pe = (ParameterExpression)expr;
output.Append(" #").Append(pe.Name);
break;
default:
throw new NotSupportedException(expr.NodeType.ToString());
}
}
If I understand correctly, you're wondering how to get properties from the instance of the object the method in your example was being called on. As Marc mentions in his answer, expression trees are complex and time consuming to work with, this addresses specifically your example (and likely nothing else).
private static void DumpExpression(Expression expression)
{
var lambda = expression as LambdaExpression;
if(lambda != null)
{
DumpExpression(lambda.Body);
return;
}
var init = expression as MemberInitExpression;
if(init != null)
{
foreach(var binding in init.Bindings)
{
var assignment = (MemberAssignment) binding;
DumpExpression(assignment.Expression);
return;
}
}
var methodCallExpression = expression as MethodCallExpression;
if(methodCallExpression != null)
{
//Create a func that retrieves the real value of the object the method call
// is being evaluated on and get the Name property from it
var objectGetExpression = Expression.Lambda<Func<Evaluator>>(methodCallExpression.Object);
var objectGetFunc = objectGetExpression.Compile();
Console.WriteLine(objectGetFunc().Name);
return;
}
}