1 /// Contains a pull parser for command line arguments. 2 module jaster.cli.parser; 3 4 private 5 { 6 import std.typecons : Flag; 7 } 8 9 /// What type of data an `ArgToken` stores. 10 enum ArgTokenType 11 { 12 /// None. If this ever gets returned by the `ArgPullParser`, it's an error. 13 None, 14 15 /// Plain text. Note that these values usually do have some kind of meaning (e.g. the value of a named argument) but it's 16 /// too inaccurate for the parser to determine their meanings. So it's up to whatever is using the parser. 17 Text, 18 19 /// The name of a short hand argument ('-h', '-c', etc.) $(B without) the leading '-'. 20 ShortHandArgument, 21 22 /// The name of a long hand argument ('--help', '--config', etc.) $(B without) the leading '--'. 23 LongHandArgument, 24 25 /// End of file/input. 26 EOF 27 } 28 29 /// Contains information about a token. 30 struct ArgToken 31 { 32 /// The value making up the token. 33 string value; 34 35 /// The type of data this token represents. 36 ArgTokenType type; 37 } 38 39 /++ 40 + A pull parser for command line arguments. 41 + 42 + Notes: 43 + The input is given as a `string[]`. This mostly only matters for `ArgTokenType.Text` values. 44 + This is because the parser does not split up plain text by spaces like a shell would. 45 + 46 + e.g. There will be different results between `ArgPullParser(["env set OAUTH_SECRET 29ef"])` and 47 + `ArgPullParser(["env", "set", "OAUTH_SECRET", "29ef"])` 48 + 49 + The former is given back as a single token containing the entire string. The latter will return 4 tokens, containing the individual strings. 50 + 51 + This behaviour is used because this parser is designed to take its input directly from the main function's args, which have already been 52 + processed by a shell. 53 + 54 + Argument Formats: 55 + The following named argument formats are supported. 56 + 57 + '-n' - Shorthand with no argument. (returns `ArgTokenTypes.ShortHandArgument`) 58 + '-n ARG' - Shorthand with argument. (`ArgTokenTypes.ShortHandArgument` and `ArgTokenTypes.Text`) 59 + '-n=ARG' - Shorthand with argument with an equals sign. The equals sign is removed from the token output. (`ArgTokenTypes.ShortHandArgument` and `ArgTokenTypes.Text`) 60 + '-nARG - Shorthand with argument with no space between them. (`ArgTokenTypes.ShortHandArgument` and `ArgTokenTypes.Text`) 61 + 62 + '--name' - Longhand with no argument. 63 + '--name ARG' - Longhand with argument. 64 + '--name=ARG' - Longhand with argument with an equals sign. The equals sign is removed from the token output. 65 + ++/ 66 @safe 67 struct ArgPullParser 68 { 69 /// Variables /// 70 private 71 { 72 alias OrEqualSign = Flag!"equalSign"; 73 alias OrSpace = Flag!"space"; 74 75 string[] _args; 76 size_t _currentArgIndex; // Current index into _args. 77 size_t _currentCharIndex; // Current index into the current arg. 78 ArgToken _currentToken = ArgToken(null, ArgTokenType.EOF); 79 } 80 81 /++ 82 + Params: 83 + args = The arguments to parse. Please see the 'notes' section for `ArgPullParser`. 84 + ++/ 85 this(string[] args) 86 { 87 this._args = args; 88 this.popFront(); 89 } 90 91 /// Range interface /// 92 public 93 { 94 /// Parses the next token. 95 void popFront() 96 { 97 this.nextToken(); 98 } 99 100 /// Returns: the last parsed token. 101 ArgToken front() 102 { 103 return this._currentToken; 104 } 105 106 /// Returns: Whether there's no more characters to parse. 107 bool empty() 108 { 109 return this._currentToken.type == ArgTokenType.EOF; 110 } 111 112 /// Returns: A copy of the pull parser in it's current state. 113 ArgPullParser save() 114 { 115 ArgPullParser parser; 116 parser._args = this._args; 117 parser._currentArgIndex = this._currentArgIndex; 118 parser._currentCharIndex = this._currentCharIndex; 119 parser._currentToken = this._currentToken; 120 121 return parser; 122 } 123 124 /// Returns: The args that have yet to be parsed. 125 @property 126 string[] unparsedArgs() 127 { 128 return (this._currentArgIndex + 1 < this._args.length) 129 ? this._args[this._currentArgIndex + 1..$] 130 : null; 131 } 132 } 133 134 /// Parsing /// 135 private 136 { 137 @property 138 string currentArg() 139 { 140 return this._args[this._currentArgIndex]; 141 } 142 143 @property 144 string currentArgSlice() 145 { 146 return this.currentArg[this._currentCharIndex..$]; 147 } 148 149 void skipWhitespace() 150 { 151 import std.ascii : isWhite; 152 153 if(this._currentArgIndex >= this._args.length) 154 return; 155 156 // Current arg could be empty, so get next arg. 157 // *Next* arg could also be empty, so repeat until we either run out of args, or we find a non-empty one. 158 while(this.currentArgSlice.length == 0) 159 { 160 this.nextArg(); 161 162 if(this._currentArgIndex >= this._args.length) 163 return; 164 } 165 166 auto arg = this.currentArg; 167 while(arg[this._currentCharIndex].isWhite) 168 { 169 this._currentCharIndex++; 170 if(this._currentCharIndex >= arg.length) 171 { 172 // Next arg might start with whitespace, so we have to keep going. 173 // We recursively call this function so we don't have to copy the empty-check logic at the start of this function. 174 this.nextArg(); 175 return this.skipWhitespace(); 176 } 177 } 178 } 179 180 string readToEnd(OrSpace orSpace = OrSpace.no, OrEqualSign orEqualSign = OrEqualSign.no) 181 { 182 import std.ascii : isWhite; 183 184 this.skipWhitespace(); 185 if(this._currentArgIndex >= this._args.length) 186 return null; 187 188 // Small optimisation: If we're at the very start, and we only need to read until the end, then just 189 // return the entire arg. 190 if(this._currentCharIndex == 0 && !orSpace && !orEqualSign) 191 { 192 auto arg = this.currentArg; 193 194 // Force skipWhitespace to call nextArg on its next call. 195 // We can't call nextArg here, as it breaks assumptions that unparsedArgs relies on. 196 this._currentCharIndex = this.currentArg.length; 197 return arg; 198 } 199 200 auto slice = this.currentArgSlice; 201 size_t end = 0; 202 while(end < slice.length) 203 { 204 if((slice[end].isWhite && orSpace) 205 || (slice[end] == '=' && orEqualSign) 206 ) 207 { 208 break; 209 } 210 211 end++; 212 this._currentCharIndex++; 213 } 214 215 // Skip over whatever char we ended up on. 216 // This is mostly to skip over the '=' sign if we're using that, but also saves 'skipWhitespace' a bit of hassle. 217 if(end < slice.length) 218 this._currentCharIndex++; 219 220 return slice[0..end]; 221 } 222 223 void nextArg() 224 { 225 this._currentArgIndex++; 226 this._currentCharIndex = 0; 227 } 228 229 void nextToken() 230 { 231 import std.exception : enforce; 232 233 this.skipWhitespace(); 234 if(this._currentArgIndex >= this._args.length) 235 { 236 this._currentToken = ArgToken("", ArgTokenType.EOF); 237 return; 238 } 239 240 auto slice = this.currentArgSlice; 241 if(slice.length >= 2 && slice[0..2] == "--") 242 { 243 this._currentCharIndex += 2; 244 245 // Edge case: Since readToEnd can advance the "currentArgSlice", we get into this common situation 246 // of ["--", "b"] where this should be an unnamed long hand arg followed by the text "b", but 247 // instead it gets treated as "--b", which we don't want. So we're just checking for this here. 248 if(this._currentCharIndex >= this.currentArg.length || this.currentArg[this._currentCharIndex] == ' ') 249 this._currentToken = ArgToken("", ArgTokenType.LongHandArgument); 250 else 251 this._currentToken = ArgToken(this.readToEnd(OrSpace.yes, OrEqualSign.yes), ArgTokenType.LongHandArgument); 252 return; 253 } 254 else if(slice.length >= 1 && slice[0] == '-') 255 { 256 this._currentCharIndex += (slice.length == 1) ? 1 : 2; // += 2 so we skip over the arg name. 257 this._currentToken = ArgToken((slice.length == 1) ? "" : slice[1..2], ArgTokenType.ShortHandArgument); 258 259 // Skip over the equals sign if there is one. 260 if(this._currentCharIndex < this.currentArg.length 261 && this.currentArg[this._currentCharIndex] == '=') 262 this._currentCharIndex++; 263 264 // If it's unnamed, then sometimes the "name" can be a space, so we'll just handle that here 265 if(this._currentToken.value == " ") 266 this._currentToken.value = null; 267 268 return; 269 } 270 else if(slice.length != 0) 271 { 272 this._currentToken = ArgToken(this.readToEnd(), ArgTokenType.Text); 273 return; 274 } 275 276 assert(false, "EOF should've been returned. SkipWhitespace might not be working."); 277 } 278 } 279 } 280 /// 281 @safe 282 unittest 283 { 284 import std.array : array; 285 286 auto args = 287 [ 288 // Some plain text. 289 "env", "set", 290 291 // Long hand named arguments. 292 "--config=MyConfig.json", "--config MyConfig.json", 293 294 // Short hand named arguments. 295 "-cMyConfig.json", "-c=MyConfig.json", "-c MyConfig.json", 296 297 // Simple example to prove that you don't need the arg name and value in the same string. 298 "-c", "MyConfig.json", 299 300 // Plain text. 301 "Some Positional Argument", 302 303 // Raw Nameless named args 304 "- a", "-", "a", 305 "-- a", "--", "a" 306 ]; 307 auto tokens = ArgPullParser(args).array; 308 309 // import std.stdio; 310 // writeln(tokens); 311 312 // Plain text. 313 assert(tokens[0] == ArgToken("env", ArgTokenType.Text)); 314 assert(tokens[1] == ArgToken("set", ArgTokenType.Text)); 315 316 // Long hand named arguments. 317 assert(tokens[2] == ArgToken("config", ArgTokenType.LongHandArgument)); 318 assert(tokens[3] == ArgToken("MyConfig.json", ArgTokenType.Text)); 319 assert(tokens[4] == ArgToken("config", ArgTokenType.LongHandArgument)); 320 assert(tokens[5] == ArgToken("MyConfig.json", ArgTokenType.Text)); 321 322 // Short hand named arguments. 323 assert(tokens[6] == ArgToken("c", ArgTokenType.ShortHandArgument)); 324 assert(tokens[7] == ArgToken("MyConfig.json", ArgTokenType.Text)); 325 assert(tokens[8] == ArgToken("c", ArgTokenType.ShortHandArgument)); 326 assert(tokens[9] == ArgToken("MyConfig.json", ArgTokenType.Text)); 327 assert(tokens[10] == ArgToken("c", ArgTokenType.ShortHandArgument)); 328 assert(tokens[11] == ArgToken("MyConfig.json", ArgTokenType.Text)); 329 assert(tokens[12] == ArgToken("c", ArgTokenType.ShortHandArgument)); 330 assert(tokens[13] == ArgToken("MyConfig.json", ArgTokenType.Text)); 331 332 // Plain text. 333 assert(tokens[14] == ArgToken("Some Positional Argument", ArgTokenType.Text)); 334 335 // Raw Nameless named args. 336 assert(tokens[15] == ArgToken("", ArgTokenType.ShortHandArgument)); 337 assert(tokens[16] == ArgToken("a", ArgTokenType.Text)); 338 assert(tokens[17] == ArgToken("", ArgTokenType.ShortHandArgument)); 339 assert(tokens[18] == ArgToken("a", ArgTokenType.Text)); 340 assert(tokens[19] == ArgToken("", ArgTokenType.LongHandArgument)); 341 assert(tokens[20] == ArgToken("a", ArgTokenType.Text)); 342 assert(tokens[21] == ArgToken("", ArgTokenType.LongHandArgument)); 343 assert(tokens[22] == ArgToken("a", ArgTokenType.Text)); 344 } 345 346 @("Issue: .init.empty must be true") 347 @safe 348 unittest 349 { 350 assert(ArgPullParser.init.empty); 351 } 352 353 @("Test unparsedArgs") 354 @safe 355 unittest 356 { 357 auto args = 358 [ 359 "one", "-t", "--three", "--unfortunate=edgeCase" // Despite this containing two tokens, they currently both get skipped over, even only one was parsed so far ;/ 360 ]; 361 auto parser = ArgPullParser(args); 362 363 assert(parser.unparsedArgs == args[1..$]); 364 foreach(i; 0..3) 365 { 366 parser.popFront(); 367 assert(parser.unparsedArgs == args[2 + i..$]); 368 } 369 370 assert(parser.unparsedArgs is null); 371 }