1 2 // Copyright Ferdinand Majerech 2011-2014. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 /// YAML scanner. 8 /// Code based on PyYAML: http://www.pyyaml.org 9 module dyaml.scanner; 10 11 12 import core.stdc..string; 13 14 import std.algorithm; 15 import std.array; 16 import std.container; 17 import std.conv; 18 import std.ascii : isAlphaNum, isDigit, isHexDigit; 19 import std.exception; 20 import std..string; 21 import std.typecons; 22 import std.traits : Unqual; 23 24 import dyaml.fastcharsearch; 25 import dyaml.escapes; 26 import dyaml.exception; 27 import dyaml.nogcutil; 28 import dyaml.queue; 29 import dyaml.reader; 30 import dyaml.style; 31 import dyaml.token; 32 33 package: 34 /// Scanner produces tokens of the following types: 35 /// STREAM-START 36 /// STREAM-END 37 /// DIRECTIVE(name, value) 38 /// DOCUMENT-START 39 /// DOCUMENT-END 40 /// BLOCK-SEQUENCE-START 41 /// BLOCK-MAPPING-START 42 /// BLOCK-END 43 /// FLOW-SEQUENCE-START 44 /// FLOW-MAPPING-START 45 /// FLOW-SEQUENCE-END 46 /// FLOW-MAPPING-END 47 /// BLOCK-ENTRY 48 /// FLOW-ENTRY 49 /// KEY 50 /// VALUE 51 /// ALIAS(value) 52 /// ANCHOR(value) 53 /// TAG(value) 54 /// SCALAR(value, plain, style) 55 56 57 /// Marked exception thrown at scanner errors. 58 /// 59 /// See_Also: MarkedYAMLException 60 class ScannerException : MarkedYAMLException 61 { 62 mixin MarkedExceptionCtors; 63 } 64 65 /// Generates tokens from data provided by a Reader. 66 final class Scanner 67 { 68 private: 69 /// A simple key is a key that is not denoted by the '?' indicator. 70 /// For example: 71 /// --- 72 /// block simple key: value 73 /// ? not a simple key: 74 /// : { flow simple key: value } 75 /// We emit the KEY token before all keys, so when we find a potential simple 76 /// key, we try to locate the corresponding ':' indicator. Simple keys should be 77 /// limited to a single line and 1024 characters. 78 /// 79 /// 16 bytes on 64-bit. 80 static struct SimpleKey 81 { 82 /// Character index in reader where the key starts. 83 uint charIndex = uint.max; 84 /// Index of the key token from start (first token scanned being 0). 85 uint tokenIndex; 86 /// Line the key starts at. 87 uint line; 88 /// Column the key starts at. 89 ushort column; 90 /// Is this required to be a simple key? 91 bool required; 92 /// Is this struct "null" (invalid)?. 93 bool isNull; 94 } 95 96 /// Block chomping types. 97 enum Chomping 98 { 99 /// Strip all trailing line breaks. '-' indicator. 100 Strip, 101 /// Line break of the last line is preserved, others discarded. Default. 102 Clip, 103 /// All trailing line breaks are preserved. '+' indicator. 104 Keep 105 } 106 107 /// Reader used to read from a file/stream. 108 Reader reader_; 109 /// Are we done scanning? 110 bool done_; 111 112 /// Level of nesting in flow context. If 0, we're in block context. 113 uint flowLevel_; 114 /// Current indentation level. 115 int indent_ = -1; 116 /// Past indentation levels. Used as a stack. 117 Array!int indents_; 118 119 /// Processed tokens not yet emitted. Used as a queue. 120 Queue!Token tokens_; 121 122 /// Number of tokens emitted through the getToken method. 123 uint tokensTaken_; 124 125 /// Can a simple key start at the current position? A simple key may start: 126 /// - at the beginning of the line, not counting indentation spaces 127 /// (in block context), 128 /// - after '{', '[', ',' (in the flow context), 129 /// - after '?', ':', '-' (in the block context). 130 /// In the block context, this flag also signifies if a block collection 131 /// may start at the current position. 132 bool allowSimpleKey_ = true; 133 134 /// Possible simple keys indexed by flow levels. 135 SimpleKey[] possibleSimpleKeys_; 136 137 138 /// Set on error by nothrow/@nogc inner functions along with errorData_. 139 /// 140 /// Non-nothrow/GC-using caller functions can then throw an exception using 141 /// data stored in errorData_. 142 bool error_; 143 144 /// Data for the exception to throw if error_ is true. 145 MarkedYAMLExceptionData errorData_; 146 147 /// Error messages can be built in this buffer without using the GC. 148 /// 149 /// ScannerException (MarkedYAMLException) copies string data passed to its 150 /// constructor so it's safe to use slices of this buffer as parameters for 151 /// exceptions that may outlive the Scanner. The GC allocation when creating the 152 /// error message is removed, but the allocation when creating an exception is 153 /// not. 154 char[256] msgBuffer_; 155 156 /// Used to detect if a character is any whitespace plus '\0' 157 mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace; 158 /// Used to detect if a character is any line break plus '\0' 159 mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks; 160 161 /// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch. 162 alias canFind = std.algorithm.canFind; 163 164 public: 165 /// Construct a Scanner using specified Reader. 166 this(Reader reader) @safe nothrow 167 { 168 // Return the next token, but do not delete it from the queue 169 reader_ = reader; 170 fetchStreamStart(); 171 } 172 173 /// Destroy the scanner. 174 @trusted ~this() 175 { 176 tokens_.destroy(); 177 indents_.destroy(); 178 possibleSimpleKeys_.destroy(); 179 possibleSimpleKeys_ = null; 180 reader_ = null; 181 } 182 183 /// Check if the next token is one of specified types. 184 /// 185 /// If no types are specified, checks if any tokens are left. 186 /// 187 /// Params: ids = Token IDs to check for. 188 /// 189 /// Returns: true if the next token is one of specified types, or if there are 190 /// any tokens left if no types specified, false otherwise. 191 bool checkToken(const TokenID[] ids ...) @safe 192 { 193 // Check if the next token is one of specified types. 194 while(needMoreTokens()) { fetchToken(); } 195 if(!tokens_.empty) 196 { 197 if(ids.length == 0) { return true; } 198 else 199 { 200 const nextId = tokens_.peek().id; 201 foreach(id; ids) 202 { 203 if(nextId == id) { return true; } 204 } 205 } 206 } 207 return false; 208 } 209 210 /// Return the next token, but keep it in the queue. 211 /// 212 /// Must not be called if there are no tokens left. 213 ref const(Token) peekToken() @safe 214 { 215 while(needMoreTokens) { fetchToken(); } 216 if(!tokens_.empty) { return tokens_.peek(); } 217 assert(false, "No token left to peek"); 218 } 219 220 /// Return the next token, removing it from the queue. 221 /// 222 /// Must not be called if there are no tokens left. 223 Token getToken() @safe 224 { 225 while(needMoreTokens){fetchToken();} 226 if(!tokens_.empty) 227 { 228 ++tokensTaken_; 229 return tokens_.pop(); 230 } 231 assert(false, "No token left to get"); 232 } 233 234 private: 235 /// Build an error message in msgBuffer_ and return it as a string. 236 string buildMsg(S ...)(S args) @trusted pure nothrow @nogc 237 { 238 return cast(string)msgBuffer_.printNoGC(args); 239 } 240 241 /// Most scanning error messages have the same format; so build them with this 242 /// function. 243 string expected(T)(string expected, T found) @safe pure nothrow @nogc 244 { 245 return buildMsg("expected ", expected, ", but found ", found); 246 } 247 248 /// If error_ is true, throws a ScannerException constructed from errorData_ and 249 /// sets error_ to false. 250 void throwIfError() @safe pure 251 { 252 if(!error_) { return; } 253 error_ = false; 254 throw new ScannerException(errorData_); 255 } 256 257 /// Called by internal nothrow/@nogc methods to set an error to be thrown by 258 /// their callers. 259 /// 260 /// See_Also: dyaml.exception.MarkedYamlException 261 void error(string context, const Mark contextMark, string problem, 262 const Mark problemMark) @safe pure nothrow @nogc 263 { 264 assert(error_ == false, 265 "Setting an error when there already is a not yet thrown error"); 266 error_ = true; 267 errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark); 268 } 269 270 /// Determine whether or not we need to fetch more tokens before peeking/getting a token. 271 bool needMoreTokens() @safe pure 272 { 273 if(done_) { return false; } 274 if(tokens_.empty) { return true; } 275 276 /// The current token may be a potential simple key, so we need to look further. 277 stalePossibleSimpleKeys(); 278 return nextPossibleSimpleKey() == tokensTaken_; 279 } 280 281 /// Fetch at token, adding it to tokens_. 282 void fetchToken() @safe 283 { 284 // Eat whitespaces and comments until we reach the next token. 285 scanToNextToken(); 286 287 // Remove obsolete possible simple keys. 288 stalePossibleSimpleKeys(); 289 290 // Compare current indentation and column. It may add some tokens 291 // and decrease the current indentation level. 292 unwindIndent(reader_.column); 293 294 // Get the next character. 295 const dchar c = reader_.peekByte(); 296 297 // Fetch the token. 298 if(c == '\0') { return fetchStreamEnd(); } 299 if(checkDirective()) { return fetchDirective(); } 300 if(checkDocumentStart()) { return fetchDocumentStart(); } 301 if(checkDocumentEnd()) { return fetchDocumentEnd(); } 302 // Order of the following checks is NOT significant. 303 switch(c) 304 { 305 case '[': return fetchFlowSequenceStart(); 306 case '{': return fetchFlowMappingStart(); 307 case ']': return fetchFlowSequenceEnd(); 308 case '}': return fetchFlowMappingEnd(); 309 case ',': return fetchFlowEntry(); 310 case '!': return fetchTag(); 311 case '\'': return fetchSingle(); 312 case '\"': return fetchDouble(); 313 case '*': return fetchAlias(); 314 case '&': return fetchAnchor(); 315 case '?': if(checkKey()) { return fetchKey(); } goto default; 316 case ':': if(checkValue()) { return fetchValue(); } goto default; 317 case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default; 318 case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break; 319 case '>': if(flowLevel_ == 0) { return fetchFolded(); } break; 320 default: if(checkPlain()) { return fetchPlain(); } 321 } 322 323 throw new ScannerException("While scanning for the next token, found character " ~ 324 "\'%s\', index %s that cannot start any token" 325 .format(c, to!int(c)), reader_.mark); 326 } 327 328 329 /// Return the token number of the nearest possible simple key. 330 uint nextPossibleSimpleKey() @safe pure nothrow @nogc 331 { 332 uint minTokenNumber = uint.max; 333 foreach(k, ref simpleKey; possibleSimpleKeys_) 334 { 335 if(simpleKey.isNull) { continue; } 336 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); 337 } 338 return minTokenNumber; 339 } 340 341 /// Remove entries that are no longer possible simple keys. 342 /// 343 /// According to the YAML specification, simple keys 344 /// - should be limited to a single line, 345 /// - should be no longer than 1024 characters. 346 /// Disabling this will allow simple keys of any length and 347 /// height (may cause problems if indentation is broken though). 348 void stalePossibleSimpleKeys() @safe pure 349 { 350 foreach(level, ref key; possibleSimpleKeys_) 351 { 352 if(key.isNull) { continue; } 353 if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) 354 { 355 enforce(!key.required, 356 new ScannerException("While scanning a simple key", 357 Mark(key.line, key.column), 358 "could not find expected ':'", reader_.mark)); 359 key.isNull = true; 360 } 361 } 362 } 363 364 /// Check if the next token starts a possible simple key and if so, save its position. 365 /// 366 /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. 367 void savePossibleSimpleKey() @safe pure 368 { 369 // Check if a simple key is required at the current position. 370 const required = (flowLevel_ == 0 && indent_ == reader_.column); 371 assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~ 372 "the first token in the current line. Therefore it is always allowed."); 373 374 if(!allowSimpleKey_) { return; } 375 376 // The next token might be a simple key, so save its number and position. 377 removePossibleSimpleKey(); 378 const tokenCount = tokensTaken_ + cast(uint)tokens_.length; 379 380 const line = reader_.line; 381 const column = reader_.column; 382 const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line, 383 cast(ushort)min(column, ushort.max), required); 384 385 if(possibleSimpleKeys_.length <= flowLevel_) 386 { 387 const oldLength = possibleSimpleKeys_.length; 388 possibleSimpleKeys_.length = flowLevel_ + 1; 389 //No need to initialize the last element, it's already done in the next line. 390 possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init; 391 } 392 possibleSimpleKeys_[flowLevel_] = key; 393 } 394 395 /// Remove the saved possible key position at the current flow level. 396 void removePossibleSimpleKey() @safe pure 397 { 398 if(possibleSimpleKeys_.length <= flowLevel_) { return; } 399 400 if(!possibleSimpleKeys_[flowLevel_].isNull) 401 { 402 const key = possibleSimpleKeys_[flowLevel_]; 403 enforce(!key.required, 404 new ScannerException("While scanning a simple key", 405 Mark(key.line, key.column), 406 "could not find expected ':'", reader_.mark)); 407 possibleSimpleKeys_[flowLevel_].isNull = true; 408 } 409 } 410 411 /// Decrease indentation, removing entries in indents_. 412 /// 413 /// Params: column = Current column in the file/stream. 414 void unwindIndent(const int column) @trusted 415 { 416 if(flowLevel_ > 0) 417 { 418 // In flow context, tokens should respect indentation. 419 // The condition should be `indent >= column` according to the spec. 420 // But this condition will prohibit intuitively correct 421 // constructions such as 422 // key : { 423 // } 424 425 // In the flow context, indentation is ignored. We make the scanner less 426 // restrictive than what the specification requires. 427 // if(pedantic_ && flowLevel_ > 0 && indent_ > column) 428 // { 429 // throw new ScannerException("Invalid intendation or unclosed '[' or '{'", 430 // reader_.mark) 431 // } 432 return; 433 } 434 435 // In block context, we may need to issue the BLOCK-END tokens. 436 while(indent_ > column) 437 { 438 indent_ = indents_.back; 439 indents_.length = indents_.length - 1; 440 tokens_.push(blockEndToken(reader_.mark, reader_.mark)); 441 } 442 } 443 444 /// Increase indentation if needed. 445 /// 446 /// Params: column = Current column in the file/stream. 447 /// 448 /// Returns: true if the indentation was increased, false otherwise. 449 bool addIndent(int column) @trusted 450 { 451 if(indent_ >= column){return false;} 452 indents_ ~= indent_; 453 indent_ = column; 454 return true; 455 } 456 457 458 /// Add STREAM-START token. 459 void fetchStreamStart() @safe nothrow 460 { 461 tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding)); 462 } 463 464 ///Add STREAM-END token. 465 void fetchStreamEnd() @safe 466 { 467 //Set intendation to -1 . 468 unwindIndent(-1); 469 removePossibleSimpleKey(); 470 allowSimpleKey_ = false; 471 possibleSimpleKeys_.destroy; 472 473 tokens_.push(streamEndToken(reader_.mark, reader_.mark)); 474 done_ = true; 475 } 476 477 /// Add DIRECTIVE token. 478 void fetchDirective() @safe 479 { 480 // Set intendation to -1 . 481 unwindIndent(-1); 482 // Reset simple keys. 483 removePossibleSimpleKey(); 484 allowSimpleKey_ = false; 485 486 auto directive = scanDirective(); 487 throwIfError(); 488 tokens_.push(directive); 489 } 490 491 /// Add DOCUMENT-START or DOCUMENT-END token. 492 void fetchDocumentIndicator(TokenID id)() @safe 493 if(id == TokenID.DocumentStart || id == TokenID.DocumentEnd) 494 { 495 // Set indentation to -1 . 496 unwindIndent(-1); 497 // Reset simple keys. Note that there can't be a block collection after '---'. 498 removePossibleSimpleKey(); 499 allowSimpleKey_ = false; 500 501 Mark startMark = reader_.mark; 502 reader_.forward(3); 503 tokens_.push(simpleToken!id(startMark, reader_.mark)); 504 } 505 506 /// Aliases to add DOCUMENT-START or DOCUMENT-END token. 507 alias fetchDocumentIndicator!(TokenID.DocumentStart) fetchDocumentStart; 508 alias fetchDocumentIndicator!(TokenID.DocumentEnd) fetchDocumentEnd; 509 510 /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 511 void fetchFlowCollectionStart(TokenID id)() @trusted 512 { 513 // '[' and '{' may start a simple key. 514 savePossibleSimpleKey(); 515 // Simple keys are allowed after '[' and '{'. 516 allowSimpleKey_ = true; 517 ++flowLevel_; 518 519 Mark startMark = reader_.mark; 520 reader_.forward(); 521 tokens_.push(simpleToken!id(startMark, reader_.mark)); 522 } 523 524 /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 525 alias fetchFlowCollectionStart!(TokenID.FlowSequenceStart) fetchFlowSequenceStart; 526 alias fetchFlowCollectionStart!(TokenID.FlowMappingStart) fetchFlowMappingStart; 527 528 /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 529 void fetchFlowCollectionEnd(TokenID id)() @safe 530 { 531 // Reset possible simple key on the current level. 532 removePossibleSimpleKey(); 533 // No simple keys after ']' and '}'. 534 allowSimpleKey_ = false; 535 --flowLevel_; 536 537 Mark startMark = reader_.mark; 538 reader_.forward(); 539 tokens_.push(simpleToken!id(startMark, reader_.mark)); 540 } 541 542 /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/ 543 alias fetchFlowCollectionEnd!(TokenID.FlowSequenceEnd) fetchFlowSequenceEnd; 544 alias fetchFlowCollectionEnd!(TokenID.FlowMappingEnd) fetchFlowMappingEnd; 545 546 /// Add FLOW-ENTRY token; 547 void fetchFlowEntry() @safe 548 { 549 // Reset possible simple key on the current level. 550 removePossibleSimpleKey(); 551 // Simple keys are allowed after ','. 552 allowSimpleKey_ = true; 553 554 Mark startMark = reader_.mark; 555 reader_.forward(); 556 tokens_.push(flowEntryToken(startMark, reader_.mark)); 557 } 558 559 /// Additional checks used in block context in fetchBlockEntry and fetchKey. 560 /// 561 /// Params: type = String representing the token type we might need to add. 562 /// id = Token type we might need to add. 563 void blockChecks(string type, TokenID id)() @safe 564 { 565 enum context = type ~ " keys are not allowed here"; 566 // Are we allowed to start a key (not neccesarily a simple one)? 567 enforce(allowSimpleKey_, new ScannerException(context, reader_.mark)); 568 569 if(addIndent(reader_.column)) 570 { 571 tokens_.push(simpleToken!id(reader_.mark, reader_.mark)); 572 } 573 } 574 575 /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process. 576 void fetchBlockEntry() @safe 577 { 578 if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.BlockSequenceStart)(); } 579 580 // It's an error for the block entry to occur in the flow context, 581 // but we let the parser detect this. 582 583 // Reset possible simple key on the current level. 584 removePossibleSimpleKey(); 585 // Simple keys are allowed after '-'. 586 allowSimpleKey_ = true; 587 588 Mark startMark = reader_.mark; 589 reader_.forward(); 590 tokens_.push(blockEntryToken(startMark, reader_.mark)); 591 } 592 593 /// Add KEY token. Might add BLOCK-MAPPING-START in the process. 594 void fetchKey() @safe 595 { 596 if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.BlockMappingStart)(); } 597 598 // Reset possible simple key on the current level. 599 removePossibleSimpleKey(); 600 // Simple keys are allowed after '?' in the block context. 601 allowSimpleKey_ = (flowLevel_ == 0); 602 603 Mark startMark = reader_.mark; 604 reader_.forward(); 605 tokens_.push(keyToken(startMark, reader_.mark)); 606 } 607 608 /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process. 609 void fetchValue() @safe 610 { 611 //Do we determine a simple key? 612 if(possibleSimpleKeys_.length > flowLevel_ && 613 !possibleSimpleKeys_[flowLevel_].isNull) 614 { 615 const key = possibleSimpleKeys_[flowLevel_]; 616 possibleSimpleKeys_[flowLevel_].isNull = true; 617 Mark keyMark = Mark(key.line, key.column); 618 const idx = key.tokenIndex - tokensTaken_; 619 620 assert(idx >= 0); 621 622 // Add KEY. 623 // Manually inserting since tokens are immutable (need linked list). 624 tokens_.insert(keyToken(keyMark, keyMark), idx); 625 626 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START. 627 if(flowLevel_ == 0 && addIndent(key.column)) 628 { 629 tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx); 630 } 631 632 // There cannot be two simple keys in a row. 633 allowSimpleKey_ = false; 634 } 635 // Part of a complex key 636 else 637 { 638 // We can start a complex value if and only if we can start a simple key. 639 enforce(flowLevel_ > 0 || allowSimpleKey_, 640 new ScannerException("Mapping values are not allowed here", reader_.mark)); 641 642 // If this value starts a new block mapping, we need to add 643 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser. 644 if(flowLevel_ == 0 && addIndent(reader_.column)) 645 { 646 tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark)); 647 } 648 649 // Reset possible simple key on the current level. 650 removePossibleSimpleKey(); 651 // Simple keys are allowed after ':' in the block context. 652 allowSimpleKey_ = (flowLevel_ == 0); 653 } 654 655 // Add VALUE. 656 Mark startMark = reader_.mark; 657 reader_.forward(); 658 tokens_.push(valueToken(startMark, reader_.mark)); 659 } 660 661 /// Add ALIAS or ANCHOR token. 662 void fetchAnchor_(TokenID id)() @trusted 663 if(id == TokenID.Alias || id == TokenID.Anchor) 664 { 665 // ALIAS/ANCHOR could be a simple key. 666 savePossibleSimpleKey(); 667 // No simple keys after ALIAS/ANCHOR. 668 allowSimpleKey_ = false; 669 670 auto anchor = scanAnchor(id); 671 throwIfError(); 672 tokens_.push(anchor); 673 } 674 675 /// Aliases to add ALIAS or ANCHOR token. 676 alias fetchAnchor_!(TokenID.Alias) fetchAlias; 677 alias fetchAnchor_!(TokenID.Anchor) fetchAnchor; 678 679 /// Add TAG token. 680 void fetchTag() @trusted 681 { 682 //TAG could start a simple key. 683 savePossibleSimpleKey(); 684 //No simple keys after TAG. 685 allowSimpleKey_ = false; 686 687 tokens_.push(scanTag()); 688 throwIfError(); 689 } 690 691 /// Add block SCALAR token. 692 void fetchBlockScalar(ScalarStyle style)() @trusted 693 if(style == ScalarStyle.Literal || style == ScalarStyle.Folded) 694 { 695 // Reset possible simple key on the current level. 696 removePossibleSimpleKey(); 697 // A simple key may follow a block scalar. 698 allowSimpleKey_ = true; 699 700 auto blockScalar = scanBlockScalar(style); 701 throwIfError(); 702 tokens_.push(blockScalar); 703 } 704 705 /// Aliases to add literal or folded block scalar. 706 alias fetchBlockScalar!(ScalarStyle.Literal) fetchLiteral; 707 alias fetchBlockScalar!(ScalarStyle.Folded) fetchFolded; 708 709 /// Add quoted flow SCALAR token. 710 void fetchFlowScalar(ScalarStyle quotes)() @safe 711 { 712 // A flow scalar could be a simple key. 713 savePossibleSimpleKey(); 714 // No simple keys after flow scalars. 715 allowSimpleKey_ = false; 716 717 // Scan and add SCALAR. 718 auto scalar = scanFlowScalar(quotes); 719 throwIfError(); 720 tokens_.push(scalar); 721 } 722 723 /// Aliases to add single or double quoted block scalar. 724 alias fetchFlowScalar!(ScalarStyle.SingleQuoted) fetchSingle; 725 alias fetchFlowScalar!(ScalarStyle.DoubleQuoted) fetchDouble; 726 727 /// Add plain SCALAR token. 728 void fetchPlain() @safe 729 { 730 // A plain scalar could be a simple key 731 savePossibleSimpleKey(); 732 // No simple keys after plain scalars. But note that scanPlain() will 733 // change this flag if the scan is finished at the beginning of the line. 734 allowSimpleKey_ = false; 735 auto plain = scanPlain(); 736 throwIfError(); 737 738 // Scan and add SCALAR. May change allowSimpleKey_ 739 tokens_.push(plain); 740 } 741 742 pure nothrow @nogc: 743 744 ///Check if the next token is DIRECTIVE: ^ '%' ... 745 bool checkDirective() @safe 746 { 747 return reader_.peekByte() == '%' && reader_.column == 0; 748 } 749 750 /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n') 751 bool checkDocumentStart() @safe 752 { 753 // Check one char first, then all 3, to prevent reading outside the buffer. 754 return reader_.column == 0 && 755 reader_.peekByte() == '-' && 756 reader_.prefix(3) == "---" && 757 searchAllWhitespace.canFind(reader_.peek(3)); 758 } 759 760 /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') 761 bool checkDocumentEnd() @safe 762 { 763 // Check one char first, then all 3, to prevent reading outside the buffer. 764 return reader_.column == 0 && 765 reader_.peekByte() == '.' && 766 reader_.prefix(3) == "..." && 767 searchAllWhitespace.canFind(reader_.peek(3)); 768 } 769 770 /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') 771 bool checkBlockEntry() @safe 772 { 773 return searchAllWhitespace.canFind(reader_.peek(1)); 774 } 775 776 /// Check if the next token is KEY(flow context): '?' 777 /// 778 /// or KEY(block context): '?' (' '|'\n') 779 bool checkKey() @safe 780 { 781 return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1))); 782 } 783 784 /// Check if the next token is VALUE(flow context): ':' 785 /// 786 /// or VALUE(block context): ':' (' '|'\n') 787 bool checkValue() @safe 788 { 789 return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)); 790 } 791 792 /// Check if the next token is a plain scalar. 793 /// 794 /// A plain scalar may start with any non-space character except: 795 /// '-', '?', ':', ',', '[', ']', '{', '}', 796 /// '#', '&', '*', '!', '|', '>', '\'', '\"', 797 /// '%', '@', '`'. 798 /// 799 /// It may also start with 800 /// '-', '?', ':' 801 /// if it is followed by a non-space character. 802 /// 803 /// Note that we limit the last rule to the block context (except the 804 /// '-' character) because we want the flow context to be space 805 /// independent. 806 bool checkPlain() @safe 807 { 808 const c = reader_.peek(); 809 mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d 810 searchPlainNotFirstChar; 811 if(!searchPlainNotFirstChar.canFind(c)) 812 { 813 return true; 814 } 815 return !searchAllWhitespace.canFind(reader_.peek(1)) && 816 (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); 817 } 818 819 /// Move to the next non-space character. 820 void findNextNonSpace() @safe 821 { 822 while(reader_.peekByte() == ' ') { reader_.forward(); } 823 } 824 825 /// Scan a string of alphanumeric or "-_" characters. 826 /// 827 /// Assumes that the caller is building a slice in Reader, and puts the scanned 828 /// characters into that slice. 829 /// 830 /// In case of an error, error_ is set. Use throwIfError() to handle this. 831 void scanAlphaNumericToSlice(string name)(const Mark startMark) @system 832 { 833 size_t length = 0; 834 dchar c = reader_.peek(); 835 while(c.isAlphaNum || "-_"d.canFind(c)) { c = reader_.peek(++length); } 836 837 if(length == 0) 838 { 839 enum contextMsg = "While scanning " ~ name; 840 error(contextMsg, startMark, expected("alphanumeric, '-' or '_'", c), 841 reader_.mark); 842 return; 843 } 844 845 reader_.sliceBuilder.write(reader_.get(length)); 846 } 847 848 /// Scan and throw away all characters until next line break. 849 void scanToNextBreak() @safe 850 { 851 while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); } 852 } 853 854 /// Scan all characters until next line break. 855 /// 856 /// Assumes that the caller is building a slice in Reader, and puts the scanned 857 /// characters into that slice. 858 void scanToNextBreakToSlice() @system 859 { 860 uint length = 0; 861 while(!searchAllBreaks.canFind(reader_.peek(length))) 862 { 863 ++length; 864 } 865 reader_.sliceBuilder.write(reader_.get(length)); 866 } 867 868 869 /// Move to next token in the file/stream. 870 /// 871 /// We ignore spaces, line breaks and comments. 872 /// If we find a line break in the block context, we set 873 /// allowSimpleKey` on. 874 /// 875 /// We do not yet support BOM inside the stream as the 876 /// specification requires. Any such mark will be considered as a part 877 /// of the document. 878 void scanToNextToken() @safe 879 { 880 // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is: 881 // Tabs cannot precede tokens 882 // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, 883 // KEY(block), VALUE(block), BLOCK-ENTRY 884 // So the checking code is 885 // if <TAB>: 886 // allowSimpleKey_ = false 887 // We also need to add the check for `allowSimpleKey_ == true` to 888 // `unwindIndent` before issuing BLOCK-END. 889 // Scanners for block, flow, and plain scalars need to be modified. 890 891 for(;;) 892 { 893 findNextNonSpace(); 894 895 if(reader_.peekByte() == '#') { scanToNextBreak(); } 896 if(scanLineBreak() != '\0') 897 { 898 if(flowLevel_ == 0) { allowSimpleKey_ = true; } 899 } 900 else 901 { 902 break; 903 } 904 } 905 } 906 907 /// Scan directive token. 908 Token scanDirective() @trusted 909 { 910 Mark startMark = reader_.mark; 911 // Skip the '%'. 912 reader_.forward(); 913 914 // Scan directive name 915 reader_.sliceBuilder.begin(); 916 scanDirectiveNameToSlice(startMark); 917 if(error_) { return Token.init; } 918 const name = reader_.sliceBuilder.finish(); 919 920 reader_.sliceBuilder.begin(); 921 922 // Index where tag handle ends and suffix starts in a tag directive value. 923 uint tagHandleEnd = uint.max; 924 if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } 925 else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } 926 if(error_) { return Token.init; } 927 char[] value = reader_.sliceBuilder.finish(); 928 929 Mark endMark = reader_.mark; 930 931 DirectiveType directive; 932 if(name == "YAML") { directive = DirectiveType.YAML; } 933 else if(name == "TAG") { directive = DirectiveType.TAG; } 934 else 935 { 936 directive = DirectiveType.Reserved; 937 scanToNextBreak(); 938 } 939 940 scanDirectiveIgnoredLine(startMark); 941 if(error_) { return Token.init; } 942 943 return directiveToken(startMark, endMark, value, directive, tagHandleEnd); 944 } 945 946 /// Scan name of a directive token. 947 /// 948 /// Assumes that the caller is building a slice in Reader, and puts the scanned 949 /// characters into that slice. 950 /// 951 /// In case of an error, error_ is set. Use throwIfError() to handle this. 952 void scanDirectiveNameToSlice(const Mark startMark) @system 953 { 954 // Scan directive name. 955 scanAlphaNumericToSlice!"a directive"(startMark); 956 if(error_) { return; } 957 958 if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; } 959 error("While scanning a directive", startMark, 960 expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark); 961 } 962 963 /// Scan value of a YAML directive token. Returns major, minor version separated by '.'. 964 /// 965 /// Assumes that the caller is building a slice in Reader, and puts the scanned 966 /// characters into that slice. 967 /// 968 /// In case of an error, error_ is set. Use throwIfError() to handle this. 969 void scanYAMLDirectiveValueToSlice(const Mark startMark) @system 970 { 971 findNextNonSpace(); 972 973 scanYAMLDirectiveNumberToSlice(startMark); 974 if(error_) { return; } 975 976 if(reader_.peekByte() != '.') 977 { 978 error("While scanning a directive", startMark, 979 expected("digit or '.'", reader_.peek()), reader_.mark); 980 return; 981 } 982 // Skip the '.'. 983 reader_.forward(); 984 985 reader_.sliceBuilder.write('.'); 986 scanYAMLDirectiveNumberToSlice(startMark); 987 if(error_) { return; } 988 989 if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) 990 { 991 error("While scanning a directive", startMark, 992 expected("digit or '.'", reader_.peek()), reader_.mark); 993 } 994 } 995 996 /// Scan a number from a YAML directive. 997 /// 998 /// Assumes that the caller is building a slice in Reader, and puts the scanned 999 /// characters into that slice. 1000 /// 1001 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1002 void scanYAMLDirectiveNumberToSlice(const Mark startMark) @system 1003 { 1004 if(!isDigit(reader_.peek())) 1005 { 1006 error("While scanning a directive", startMark, 1007 expected("digit", reader_.peek()), reader_.mark); 1008 return; 1009 } 1010 1011 // Already found the first digit in the enforce(), so set length to 1. 1012 uint length = 1; 1013 while(reader_.peek(length).isDigit) { ++length; } 1014 1015 reader_.sliceBuilder.write(reader_.get(length)); 1016 } 1017 1018 /// Scan value of a tag directive. 1019 /// 1020 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1021 /// characters into that slice. 1022 /// 1023 /// Returns: Length of tag handle (which is before tag prefix) in scanned data 1024 /// 1025 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1026 uint scanTagDirectiveValueToSlice(const Mark startMark) @system 1027 { 1028 findNextNonSpace(); 1029 const startLength = reader_.sliceBuilder.length; 1030 scanTagDirectiveHandleToSlice(startMark); 1031 if(error_) { return uint.max; } 1032 const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength); 1033 findNextNonSpace(); 1034 scanTagDirectivePrefixToSlice(startMark); 1035 1036 return handleLength; 1037 } 1038 1039 /// Scan handle of a tag directive. 1040 /// 1041 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1042 /// characters into that slice. 1043 /// 1044 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1045 void scanTagDirectiveHandleToSlice(const Mark startMark) @system 1046 { 1047 scanTagHandleToSlice!"directive"(startMark); 1048 if(error_) { return; } 1049 if(reader_.peekByte() == ' ') { return; } 1050 error("While scanning a directive handle", startMark, 1051 expected("' '", reader_.peek()), reader_.mark); 1052 } 1053 1054 /// Scan prefix of a tag directive. 1055 /// 1056 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1057 /// characters into that slice. 1058 /// 1059 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1060 void scanTagDirectivePrefixToSlice(const Mark startMark) @system 1061 { 1062 scanTagURIToSlice!"directive"(startMark); 1063 if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; } 1064 error("While scanning a directive prefix", startMark, 1065 expected("' '", reader_.peek()), reader_.mark); 1066 } 1067 1068 /// Scan (and ignore) ignored line after a directive. 1069 /// 1070 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1071 void scanDirectiveIgnoredLine(const Mark startMark) @safe 1072 { 1073 findNextNonSpace(); 1074 if(reader_.peekByte() == '#') { scanToNextBreak(); } 1075 if(searchAllBreaks.canFind(reader_.peek())) 1076 { 1077 scanLineBreak(); 1078 return; 1079 } 1080 error("While scanning a directive", startMark, 1081 expected("comment or a line break", reader_.peek()), reader_.mark); 1082 } 1083 1084 1085 /// Scan an alias or an anchor. 1086 /// 1087 /// The specification does not restrict characters for anchors and 1088 /// aliases. This may lead to problems, for instance, the document: 1089 /// [ *alias, value ] 1090 /// can be interpteted in two ways, as 1091 /// [ "value" ] 1092 /// and 1093 /// [ *alias , "value" ] 1094 /// Therefore we restrict aliases to ASCII alphanumeric characters. 1095 /// 1096 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1097 Token scanAnchor(const TokenID id) @trusted 1098 { 1099 const startMark = reader_.mark; 1100 const dchar i = reader_.get(); 1101 1102 reader_.sliceBuilder.begin(); 1103 if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); } 1104 else { scanAlphaNumericToSlice!"an anchor"(startMark); } 1105 // On error, value is discarded as we return immediately 1106 char[] value = reader_.sliceBuilder.finish(); 1107 if(error_) { return Token.init; } 1108 1109 if(!searchAllWhitespace.canFind(reader_.peek()) && 1110 !"?:,]}%@"d.canFind(reader_.peekByte())) 1111 { 1112 enum anchorCtx = "While scanning an anchor"; 1113 enum aliasCtx = "While scanning an alias"; 1114 error(i == '*' ? aliasCtx : anchorCtx, startMark, 1115 expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark); 1116 return Token.init; 1117 } 1118 1119 if(id == TokenID.Alias) 1120 { 1121 return aliasToken(startMark, reader_.mark, value); 1122 } 1123 if(id == TokenID.Anchor) 1124 { 1125 return anchorToken(startMark, reader_.mark, value); 1126 } 1127 assert(false, "This code should never be reached"); 1128 } 1129 1130 /// Scan a tag token. 1131 /// 1132 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1133 Token scanTag() @trusted 1134 { 1135 const startMark = reader_.mark; 1136 dchar c = reader_.peek(1); 1137 1138 reader_.sliceBuilder.begin(); 1139 scope(failure) { reader_.sliceBuilder.finish(); } 1140 // Index where tag handle ends and tag suffix starts in the tag value 1141 // (slice) we will produce. 1142 uint handleEnd; 1143 1144 mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search; 1145 if(c == '<') 1146 { 1147 reader_.forward(2); 1148 1149 handleEnd = 0; 1150 scanTagURIToSlice!"tag"(startMark); 1151 if(error_) { return Token.init; } 1152 if(reader_.peekByte() != '>') 1153 { 1154 error("While scanning a tag", startMark, 1155 expected("'>'", reader_.peek()), reader_.mark); 1156 return Token.init; 1157 } 1158 reader_.forward(); 1159 } 1160 else if(searchAllWhitespace.canFind(c)) 1161 { 1162 reader_.forward(); 1163 handleEnd = 0; 1164 reader_.sliceBuilder.write('!'); 1165 } 1166 else 1167 { 1168 uint length = 1; 1169 bool useHandle = false; 1170 1171 while(!search.canFind(c)) 1172 { 1173 if(c == '!') 1174 { 1175 useHandle = true; 1176 break; 1177 } 1178 ++length; 1179 c = reader_.peek(length); 1180 } 1181 1182 if(useHandle) 1183 { 1184 scanTagHandleToSlice!"tag"(startMark); 1185 handleEnd = cast(uint)reader_.sliceBuilder.length; 1186 if(error_) { return Token.init; } 1187 } 1188 else 1189 { 1190 reader_.forward(); 1191 reader_.sliceBuilder.write('!'); 1192 handleEnd = cast(uint)reader_.sliceBuilder.length; 1193 } 1194 1195 scanTagURIToSlice!"tag"(startMark); 1196 if(error_) { return Token.init; } 1197 } 1198 1199 if(search.canFind(reader_.peek())) 1200 { 1201 char[] slice = reader_.sliceBuilder.finish(); 1202 return tagToken(startMark, reader_.mark, slice, handleEnd); 1203 } 1204 1205 error("While scanning a tag", startMark, expected("' '", reader_.peek()), 1206 reader_.mark); 1207 return Token.init; 1208 } 1209 1210 /// Scan a block scalar token with specified style. 1211 /// 1212 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1213 Token scanBlockScalar(const ScalarStyle style) @trusted 1214 { 1215 const startMark = reader_.mark; 1216 1217 // Scan the header. 1218 reader_.forward(); 1219 1220 const indicators = scanBlockScalarIndicators(startMark); 1221 if(error_) { return Token.init; } 1222 1223 const chomping = indicators[0]; 1224 const increment = indicators[1]; 1225 scanBlockScalarIgnoredLine(startMark); 1226 if(error_) { return Token.init; } 1227 1228 // Determine the indentation level and go to the first non-empty line. 1229 Mark endMark; 1230 uint indent = max(1, indent_ + 1); 1231 1232 reader_.sliceBuilder.begin(); 1233 alias Transaction = SliceBuilder.Transaction; 1234 // Used to strip the last line breaks written to the slice at the end of the 1235 // scalar, which may be needed based on chomping. 1236 Transaction breaksTransaction = Transaction(reader_.sliceBuilder); 1237 // Read the first indentation/line breaks before the scalar. 1238 size_t startLen = reader_.sliceBuilder.length; 1239 if(increment == int.min) 1240 { 1241 auto indentation = scanBlockScalarIndentationToSlice(); 1242 endMark = indentation[1]; 1243 indent = max(indent, indentation[0]); 1244 } 1245 else 1246 { 1247 indent += increment - 1; 1248 endMark = scanBlockScalarBreaksToSlice(indent); 1249 } 1250 1251 // int.max means there's no line break (int.max is outside UTF-32). 1252 dchar lineBreak = cast(dchar)int.max; 1253 1254 // Scan the inner part of the block scalar. 1255 while(reader_.column == indent && reader_.peekByte() != '\0') 1256 { 1257 breaksTransaction.commit(); 1258 const bool leadingNonSpace = !" \t"d.canFind(reader_.peekByte()); 1259 // This is where the 'interesting' non-whitespace data gets read. 1260 scanToNextBreakToSlice(); 1261 lineBreak = scanLineBreak(); 1262 1263 1264 // This transaction serves to rollback data read in the 1265 // scanBlockScalarBreaksToSlice() call. 1266 breaksTransaction = Transaction(reader_.sliceBuilder); 1267 startLen = reader_.sliceBuilder.length; 1268 // The line breaks should actually be written _after_ the if() block 1269 // below. We work around that by inserting 1270 endMark = scanBlockScalarBreaksToSlice(indent); 1271 1272 // This will not run during the last iteration (see the if() vs the 1273 // while()), hence breaksTransaction rollback (which happens after this 1274 // loop) will never roll back data written in this if() block. 1275 if(reader_.column == indent && reader_.peekByte() != '\0') 1276 { 1277 // Unfortunately, folding rules are ambiguous. 1278 1279 // This is the folding according to the specification: 1280 if(style == ScalarStyle.Folded && lineBreak == '\n' && 1281 leadingNonSpace && !" \t"d.canFind(reader_.peekByte())) 1282 { 1283 // No breaks were scanned; no need to insert the space in the 1284 // middle of slice. 1285 if(startLen == reader_.sliceBuilder.length) 1286 { 1287 reader_.sliceBuilder.write(' '); 1288 } 1289 } 1290 else 1291 { 1292 // We need to insert in the middle of the slice in case any line 1293 // breaks were scanned. 1294 reader_.sliceBuilder.insert(lineBreak, startLen); 1295 } 1296 1297 ////this is Clark Evans's interpretation (also in the spec 1298 ////examples): 1299 // 1300 //if(style == ScalarStyle.Folded && lineBreak == '\n') 1301 //{ 1302 // if(startLen == endLen) 1303 // { 1304 // if(!" \t"d.canFind(reader_.peekByte())) 1305 // { 1306 // reader_.sliceBuilder.write(' '); 1307 // } 1308 // else 1309 // { 1310 // chunks ~= lineBreak; 1311 // } 1312 // } 1313 //} 1314 //else 1315 //{ 1316 // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen); 1317 //} 1318 } 1319 else 1320 { 1321 break; 1322 } 1323 } 1324 1325 // If chompint is Keep, we keep (commit) the last scanned line breaks 1326 // (which are at the end of the scalar). Otherwise re remove them (end the 1327 // transaction). 1328 if(chomping == Chomping.Keep) { breaksTransaction.commit(); } 1329 else { breaksTransaction.__dtor(); } 1330 if(chomping != Chomping.Strip && lineBreak != int.max) 1331 { 1332 // If chomping is Keep, we keep the line break but the first line break 1333 // that isn't stripped (since chomping isn't Strip in this branch) must 1334 // be inserted _before_ the other line breaks. 1335 if(chomping == Chomping.Keep) 1336 { 1337 reader_.sliceBuilder.insert(lineBreak, startLen); 1338 } 1339 // If chomping is not Keep, breaksTransaction was cancelled so we can 1340 // directly write the first line break (as it isn't stripped - chomping 1341 // is not Strip) 1342 else 1343 { 1344 reader_.sliceBuilder.write(lineBreak); 1345 } 1346 } 1347 1348 char[] slice = reader_.sliceBuilder.finish(); 1349 return scalarToken(startMark, endMark, slice, style); 1350 } 1351 1352 /// Scan chomping and indentation indicators of a scalar token. 1353 /// 1354 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1355 Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe 1356 { 1357 auto chomping = Chomping.Clip; 1358 int increment = int.min; 1359 dchar c = reader_.peek(); 1360 1361 /// Indicators can be in any order. 1362 if(getChomping(c, chomping)) 1363 { 1364 getIncrement(c, increment, startMark); 1365 if(error_) { return tuple(Chomping.init, int.max); } 1366 } 1367 else 1368 { 1369 const gotIncrement = getIncrement(c, increment, startMark); 1370 if(error_) { return tuple(Chomping.init, int.max); } 1371 if(gotIncrement) { getChomping(c, chomping); } 1372 } 1373 1374 if(" \0\n\r\u0085\u2028\u2029"d.canFind(c)) 1375 { 1376 return tuple(chomping, increment); 1377 } 1378 error("While scanning a block scalar", startMark, 1379 expected("chomping or indentation indicator", c), reader_.mark); 1380 return tuple(Chomping.init, int.max); 1381 } 1382 1383 /// Get chomping indicator, if detected. Return false otherwise. 1384 /// 1385 /// Used in scanBlockScalarIndicators. 1386 /// 1387 /// Params: 1388 /// 1389 /// c = The character that may be a chomping indicator. 1390 /// chomping = Write the chomping value here, if detected. 1391 bool getChomping(ref dchar c, ref Chomping chomping) @safe 1392 { 1393 if(!"+-"d.canFind(c)) { return false; } 1394 chomping = c == '+' ? Chomping.Keep : Chomping.Strip; 1395 reader_.forward(); 1396 c = reader_.peek(); 1397 return true; 1398 } 1399 1400 /// Get increment indicator, if detected. Return false otherwise. 1401 /// 1402 /// Used in scanBlockScalarIndicators. 1403 /// 1404 /// Params: 1405 /// 1406 /// c = The character that may be an increment indicator. 1407 /// If an increment indicator is detected, this will be updated to 1408 /// the next character in the Reader. 1409 /// increment = Write the increment value here, if detected. 1410 /// startMark = Mark for error messages. 1411 /// 1412 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1413 bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe 1414 { 1415 if(!c.isDigit) { return false; } 1416 // Convert a digit to integer. 1417 increment = c - '0'; 1418 assert(increment < 10 && increment >= 0, "Digit has invalid value"); 1419 if(increment > 0) 1420 { 1421 reader_.forward(); 1422 c = reader_.peek(); 1423 return true; 1424 } 1425 error("While scanning a block scalar", startMark, 1426 expected("indentation indicator in range 1-9", "0"), reader_.mark); 1427 return false; 1428 } 1429 1430 /// Scan (and ignore) ignored line in a block scalar. 1431 /// 1432 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1433 void scanBlockScalarIgnoredLine(const Mark startMark) @safe 1434 { 1435 findNextNonSpace(); 1436 if(reader_.peekByte()== '#') { scanToNextBreak(); } 1437 1438 if(searchAllBreaks.canFind(reader_.peek())) 1439 { 1440 scanLineBreak(); 1441 return; 1442 } 1443 error("While scanning a block scalar", startMark, 1444 expected("comment or line break", reader_.peek()), reader_.mark); 1445 } 1446 1447 /// Scan indentation in a block scalar, returning line breaks, max indent and end mark. 1448 /// 1449 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1450 /// characters into that slice. 1451 Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @system 1452 { 1453 uint maxIndent; 1454 Mark endMark = reader_.mark; 1455 1456 while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) 1457 { 1458 if(reader_.peekByte() != ' ') 1459 { 1460 reader_.sliceBuilder.write(scanLineBreak()); 1461 endMark = reader_.mark; 1462 continue; 1463 } 1464 reader_.forward(); 1465 maxIndent = max(reader_.column, maxIndent); 1466 } 1467 1468 return tuple(maxIndent, endMark); 1469 } 1470 1471 /// Scan line breaks at lower or specified indentation in a block scalar. 1472 /// 1473 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1474 /// characters into that slice. 1475 Mark scanBlockScalarBreaksToSlice(const uint indent) @trusted 1476 { 1477 Mark endMark = reader_.mark; 1478 1479 for(;;) 1480 { 1481 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } 1482 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; } 1483 reader_.sliceBuilder.write(scanLineBreak()); 1484 endMark = reader_.mark; 1485 } 1486 1487 return endMark; 1488 } 1489 1490 /// Scan a qouted flow scalar token with specified quotes. 1491 /// 1492 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1493 Token scanFlowScalar(const ScalarStyle quotes) @trusted 1494 { 1495 const startMark = reader_.mark; 1496 const quote = reader_.get(); 1497 1498 reader_.sliceBuilder.begin(); 1499 scope(exit) if(error_) { reader_.sliceBuilder.finish(); } 1500 1501 scanFlowScalarNonSpacesToSlice(quotes, startMark); 1502 if(error_) { return Token.init; } 1503 1504 while(reader_.peek() != quote) 1505 { 1506 scanFlowScalarSpacesToSlice(startMark); 1507 if(error_) { return Token.init; } 1508 scanFlowScalarNonSpacesToSlice(quotes, startMark); 1509 if(error_) { return Token.init; } 1510 } 1511 reader_.forward(); 1512 1513 auto slice = reader_.sliceBuilder.finish(); 1514 return scalarToken(startMark, reader_.mark, slice, quotes); 1515 } 1516 1517 /// Scan nonspace characters in a flow scalar. 1518 /// 1519 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1520 /// characters into that slice. 1521 /// 1522 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1523 void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark) 1524 @system 1525 { 1526 for(;;) with(ScalarStyle) 1527 { 1528 dchar c = reader_.peek(); 1529 1530 mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search; 1531 1532 size_t numCodePoints = 0; 1533 // This is an optimized way of writing: 1534 // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; } 1535 outer: for(size_t oldSliceLength;;) 1536 { 1537 // This will not necessarily make slice 32 chars longer, as not all 1538 // code points are 1 char. 1539 const char[] slice = reader_.slice(numCodePoints + 32); 1540 if(slice.length == oldSliceLength) 1541 { 1542 error("While reading a flow scalar", startMark, 1543 "reached end of file", reader_.mark); 1544 return; 1545 } 1546 for(size_t i = oldSliceLength; i < slice.length;) 1547 { 1548 // slice is UTF-8 - need to decode 1549 const ch = slice[i] < 0x80 ? slice[i++] : decodeValidUTF8NoGC(slice, i); 1550 if(search.canFind(ch)) { break outer; } 1551 ++numCodePoints; 1552 } 1553 oldSliceLength = slice.length; 1554 } 1555 1556 reader_.sliceBuilder.write(reader_.get(numCodePoints)); 1557 1558 c = reader_.peek(); 1559 if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'') 1560 { 1561 reader_.forward(2); 1562 reader_.sliceBuilder.write('\''); 1563 } 1564 else if((quotes == DoubleQuoted && c == '\'') || 1565 (quotes == SingleQuoted && "\"\\"d.canFind(c))) 1566 { 1567 reader_.forward(); 1568 reader_.sliceBuilder.write(c); 1569 } 1570 else if(quotes == DoubleQuoted && c == '\\') 1571 { 1572 reader_.forward(); 1573 c = reader_.peek(); 1574 if(dyaml.escapes.escapes.canFind(c)) 1575 { 1576 reader_.forward(); 1577 // Escaping has been moved to Parser as it can't be done in 1578 // place (in a slice) in case of '\P' and '\L' (very uncommon, 1579 // but we don't want to break the spec) 1580 char[2] escapeSequence = ['\\', cast(char)c]; 1581 reader_.sliceBuilder.write(escapeSequence); 1582 } 1583 else if(dyaml.escapes.escapeHexCodeList.canFind(c)) 1584 { 1585 const hexLength = dyaml.escapes.escapeHexLength(c); 1586 reader_.forward(); 1587 1588 foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit) 1589 { 1590 error("While scanning a double quoted scalar", startMark, 1591 expected("escape sequence of hexadecimal numbers", 1592 reader_.peek(i)), reader_.mark); 1593 return; 1594 } 1595 char[] hex = reader_.get(hexLength); 1596 char[2] escapeStart = ['\\', cast(char) c]; 1597 reader_.sliceBuilder.write(escapeStart); 1598 reader_.sliceBuilder.write(hex); 1599 bool overflow; 1600 // Note: This is just error checking; Parser does the actual 1601 // escaping (otherwise we could accidentally create an 1602 // escape sequence here that wasn't in input, breaking the 1603 // escaping code in parser, which is in parser because it 1604 // can't always be done in place) 1605 parseNoGC!int(hex, 16u, overflow); 1606 if(overflow) 1607 { 1608 error("While scanning a double quoted scalar", startMark, 1609 "overflow when parsing an escape sequence of " ~ 1610 "hexadecimal numbers.", reader_.mark); 1611 return; 1612 } 1613 } 1614 else if("\n\r\u0085\u2028\u2029"d.canFind(c)) 1615 { 1616 scanLineBreak(); 1617 scanFlowScalarBreaksToSlice(startMark); 1618 if(error_) { return; } 1619 } 1620 else 1621 { 1622 error("While scanning a double quoted scalar", startMark, 1623 buildMsg("found unsupported escape character", c), 1624 reader_.mark); 1625 return; 1626 } 1627 } 1628 else { return; } 1629 } 1630 } 1631 1632 /// Scan space characters in a flow scalar. 1633 /// 1634 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1635 /// spaces into that slice. 1636 /// 1637 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1638 void scanFlowScalarSpacesToSlice(const Mark startMark) @system 1639 { 1640 // Increase length as long as we see whitespace. 1641 size_t length = 0; 1642 while(" \t"d.canFind(reader_.peekByte(length))) { ++length; } 1643 auto whitespaces = reader_.prefixBytes(length); 1644 1645 // Can check the last byte without striding because '\0' is ASCII 1646 const c = reader_.peek(length); 1647 if(c == '\0') 1648 { 1649 error("While scanning a quoted scalar", startMark, 1650 "found unexpected end of buffer", reader_.mark); 1651 return; 1652 } 1653 1654 // Spaces not followed by a line break. 1655 if(!"\n\r\u0085\u2028\u2029"d.canFind(c)) 1656 { 1657 reader_.forward(length); 1658 reader_.sliceBuilder.write(whitespaces); 1659 return; 1660 } 1661 1662 // There's a line break after the spaces. 1663 reader_.forward(length); 1664 const lineBreak = scanLineBreak(); 1665 1666 if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } 1667 1668 // If we have extra line breaks after the first, scan them into the 1669 // slice. 1670 const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark); 1671 if(error_) { return; } 1672 1673 // No extra breaks, one normal line break. Replace it with a space. 1674 if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } 1675 } 1676 1677 /// Scan line breaks in a flow scalar. 1678 /// 1679 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1680 /// line breaks into that slice. 1681 /// 1682 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1683 bool scanFlowScalarBreaksToSlice(const Mark startMark) @system 1684 { 1685 // True if at least one line break was found. 1686 bool anyBreaks; 1687 for(;;) 1688 { 1689 // Instead of checking indentation, we check for document separators. 1690 const prefix = reader_.prefix(3); 1691 if((prefix == "---" || prefix == "...") && 1692 searchAllWhitespace.canFind(reader_.peek(3))) 1693 { 1694 error("While scanning a quoted scalar", startMark, 1695 "found unexpected document separator", reader_.mark); 1696 return false; 1697 } 1698 1699 // Skip any whitespaces. 1700 while(" \t"d.canFind(reader_.peekByte())) { reader_.forward(); } 1701 1702 // Encountered a non-whitespace non-linebreak character, so we're done. 1703 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; } 1704 1705 const lineBreak = scanLineBreak(); 1706 anyBreaks = true; 1707 reader_.sliceBuilder.write(lineBreak); 1708 } 1709 return anyBreaks; 1710 } 1711 1712 /// Scan plain scalar token (no block, no quotes). 1713 /// 1714 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1715 Token scanPlain() @trusted 1716 { 1717 // We keep track of the allowSimpleKey_ flag here. 1718 // Indentation rules are loosed for the flow context 1719 const startMark = reader_.mark; 1720 Mark endMark = startMark; 1721 const indent = indent_ + 1; 1722 1723 // We allow zero indentation for scalars, but then we need to check for 1724 // document separators at the beginning of the line. 1725 // if(indent == 0) { indent = 1; } 1726 1727 reader_.sliceBuilder.begin(); 1728 1729 alias Transaction = SliceBuilder.Transaction; 1730 Transaction spacesTransaction; 1731 // Stop at a comment. 1732 while(reader_.peekByte() != '#') 1733 { 1734 // Scan the entire plain scalar. 1735 size_t length = 0; 1736 dchar c = void; 1737 // Moved the if() out of the loop for optimization. 1738 if(flowLevel_ == 0) 1739 { 1740 c = reader_.peek(length); 1741 for(;;) 1742 { 1743 const cNext = reader_.peek(length + 1); 1744 if(searchAllWhitespace.canFind(c) || 1745 (c == ':' && searchAllWhitespace.canFind(cNext))) 1746 { 1747 break; 1748 } 1749 ++length; 1750 c = cNext; 1751 } 1752 } 1753 else 1754 { 1755 for(;;) 1756 { 1757 c = reader_.peek(length); 1758 if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c)) 1759 { 1760 break; 1761 } 1762 ++length; 1763 } 1764 } 1765 1766 // It's not clear what we should do with ':' in the flow context. 1767 if(flowLevel_ > 0 && c == ':' && 1768 !searchAllWhitespace.canFind(reader_.peek(length + 1)) && 1769 !",[]{}"d.canFind(reader_.peek(length + 1))) 1770 { 1771 // This is an error; throw the slice away. 1772 spacesTransaction.commit(); 1773 reader_.sliceBuilder.finish(); 1774 reader_.forward(length); 1775 error("While scanning a plain scalar", startMark, 1776 "found unexpected ':' . Please check " ~ 1777 "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.", 1778 reader_.mark); 1779 return Token.init; 1780 } 1781 1782 if(length == 0) { break; } 1783 1784 allowSimpleKey_ = false; 1785 1786 reader_.sliceBuilder.write(reader_.get(length)); 1787 1788 endMark = reader_.mark; 1789 1790 spacesTransaction.commit(); 1791 spacesTransaction = Transaction(reader_.sliceBuilder); 1792 1793 const startLength = reader_.sliceBuilder.length; 1794 scanPlainSpacesToSlice(startMark); 1795 if(startLength == reader_.sliceBuilder.length || 1796 (flowLevel_ == 0 && reader_.column < indent)) 1797 { 1798 break; 1799 } 1800 } 1801 1802 spacesTransaction.__dtor(); 1803 char[] slice = reader_.sliceBuilder.finish(); 1804 1805 return scalarToken(startMark, endMark, slice, ScalarStyle.Plain); 1806 } 1807 1808 /// Scan spaces in a plain scalar. 1809 /// 1810 /// Assumes that the caller is building a slice in Reader, and puts the spaces 1811 /// into that slice. 1812 void scanPlainSpacesToSlice(const Mark startMark) @system 1813 { 1814 // The specification is really confusing about tabs in plain scalars. 1815 // We just forbid them completely. Do not use tabs in YAML! 1816 1817 // Get as many plain spaces as there are. 1818 size_t length = 0; 1819 while(reader_.peekByte(length) == ' ') { ++length; } 1820 char[] whitespaces = reader_.prefixBytes(length); 1821 reader_.forward(length); 1822 1823 dchar c = reader_.peek(); 1824 mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search; 1825 // No newline after the spaces (if any) 1826 // (Excluding ' ' so we can use the same FastCharSearch as below) 1827 if(!search.canFind(c) && c != ' ') 1828 { 1829 // We have spaces, but no newline. 1830 if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } 1831 return; 1832 } 1833 1834 // Newline after the spaces (if any) 1835 const lineBreak = scanLineBreak(); 1836 allowSimpleKey_ = true; 1837 1838 static bool end(Reader reader_) @safe pure nothrow @nogc 1839 { 1840 const prefix = reader_.prefix(3); 1841 return ("---" == prefix || "..." == prefix) 1842 && " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3)); 1843 } 1844 1845 if(end(reader_)) { return; } 1846 1847 bool extraBreaks = false; 1848 1849 alias Transaction = SliceBuilder.Transaction; 1850 auto transaction = Transaction(reader_.sliceBuilder); 1851 if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } 1852 while(search.canFind(reader_.peek())) 1853 { 1854 if(reader_.peekByte() == ' ') { reader_.forward(); } 1855 else 1856 { 1857 const lBreak = scanLineBreak(); 1858 extraBreaks = true; 1859 reader_.sliceBuilder.write(lBreak); 1860 1861 if(end(reader_)) { return; } 1862 } 1863 } 1864 transaction.commit(); 1865 1866 // No line breaks, only a space. 1867 if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } 1868 } 1869 1870 /// Scan handle of a tag token. 1871 /// 1872 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1873 /// characters into that slice. 1874 /// 1875 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1876 void scanTagHandleToSlice(string name)(const Mark startMark) @system 1877 { 1878 dchar c = reader_.peek(); 1879 enum contextMsg = "While scanning a " ~ name; 1880 if(c != '!') 1881 { 1882 error(contextMsg, startMark, expected("'!'", c), reader_.mark); 1883 return; 1884 } 1885 1886 uint length = 1; 1887 c = reader_.peek(length); 1888 if(c != ' ') 1889 { 1890 while(c.isAlphaNum || "-_"d.canFind(c)) 1891 { 1892 ++length; 1893 c = reader_.peek(length); 1894 } 1895 if(c != '!') 1896 { 1897 reader_.forward(length); 1898 error(contextMsg, startMark, expected("'!'", c), reader_.mark); 1899 return; 1900 } 1901 ++length; 1902 } 1903 1904 reader_.sliceBuilder.write(reader_.get(length)); 1905 } 1906 1907 /// Scan URI in a tag token. 1908 /// 1909 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1910 /// characters into that slice. 1911 /// 1912 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1913 void scanTagURIToSlice(string name)(const Mark startMark) @trusted 1914 { 1915 // Note: we do not check if URI is well-formed. 1916 dchar c = reader_.peek(); 1917 const startLen = reader_.sliceBuilder.length; 1918 { 1919 uint length = 0; 1920 mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search; 1921 while(c.isAlphaNum || search.canFind(c)) 1922 { 1923 if(c == '%') 1924 { 1925 auto chars = reader_.get(length); 1926 reader_.sliceBuilder.write(chars); 1927 length = 0; 1928 scanURIEscapesToSlice!name(startMark); 1929 if(error_) { return; } 1930 } 1931 else { ++length; } 1932 c = reader_.peek(length); 1933 } 1934 if(length > 0) 1935 { 1936 auto chars = reader_.get(length); 1937 reader_.sliceBuilder.write(chars); 1938 length = 0; 1939 } 1940 } 1941 // OK if we scanned something, error otherwise. 1942 if(reader_.sliceBuilder.length > startLen) { return; } 1943 1944 enum contextMsg = "While parsing a " ~ name; 1945 error(contextMsg, startMark, expected("URI", c), reader_.mark); 1946 } 1947 1948 // Not @nogc yet because std.utf.decode is not @nogc 1949 /// Scan URI escape sequences. 1950 /// 1951 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1952 /// characters into that slice. 1953 /// 1954 /// In case of an error, error_ is set. Use throwIfError() to handle this. 1955 void scanURIEscapesToSlice(string name)(const Mark startMark) @system 1956 { 1957 // URI escapes encode a UTF-8 string. We store UTF-8 code units here for 1958 // decoding into UTF-32. 1959 char[4] bytes; 1960 size_t bytesUsed; 1961 Mark mark = reader_.mark; 1962 1963 // Get one dchar by decoding data from bytes. 1964 // 1965 // This is probably slow, but simple and URI escapes are extremely uncommon 1966 // in YAML. 1967 // 1968 // Returns the number of bytes used by the dchar in bytes on success, 1969 // size_t.max on failure. 1970 static size_t getDchar(char[] bytes, Reader reader_) 1971 { 1972 size_t nextChar; 1973 dchar c; 1974 if(bytes[0] < 0x80) 1975 { 1976 c = bytes[0]; 1977 ++nextChar; 1978 } 1979 else 1980 { 1981 const decoded = decodeUTF8NoGC!(No.validated)(bytes[], nextChar); 1982 if(decoded.errorMessage !is null) { return size_t.max; } 1983 c = decoded.decoded; 1984 } 1985 reader_.sliceBuilder.write(c); 1986 if(bytes.length - nextChar > 0) 1987 { 1988 core.stdc..string.memmove(bytes.ptr, bytes.ptr + nextChar, 1989 bytes.length - nextChar); 1990 } 1991 return bytes.length - nextChar; 1992 } 1993 1994 enum contextMsg = "While scanning a " ~ name; 1995 while(reader_.peekByte() == '%') 1996 { 1997 reader_.forward(); 1998 if(bytesUsed == bytes.length) 1999 { 2000 bytesUsed = getDchar(bytes[], reader_); 2001 if(bytesUsed == size_t.max) 2002 { 2003 error(contextMsg, startMark, 2004 "Invalid UTF-8 data encoded in URI escape sequence", 2005 reader_.mark); 2006 return; 2007 } 2008 } 2009 2010 char b = 0; 2011 uint mult = 16; 2012 // Converting 2 hexadecimal digits to a byte. 2013 foreach(k; 0 .. 2) 2014 { 2015 const dchar c = reader_.peek(k); 2016 if(!c.isHexDigit) 2017 { 2018 auto msg = expected("URI escape sequence of 2 hexadecimal " ~ 2019 "numbers", c); 2020 error(contextMsg, startMark, msg, reader_.mark); 2021 return; 2022 } 2023 2024 uint digit; 2025 if(c - '0' < 10) { digit = c - '0'; } 2026 else if(c - 'A' < 6) { digit = c - 'A'; } 2027 else if(c - 'a' < 6) { digit = c - 'a'; } 2028 else { assert(false); } 2029 b += mult * digit; 2030 mult /= 16; 2031 } 2032 bytes[bytesUsed++] = b; 2033 2034 reader_.forward(2); 2035 } 2036 2037 bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_); 2038 } 2039 2040 2041 /// Scan a line break, if any. 2042 /// 2043 /// Transforms: 2044 /// '\r\n' : '\n' 2045 /// '\r' : '\n' 2046 /// '\n' : '\n' 2047 /// '\u0085' : '\n' 2048 /// '\u2028' : '\u2028' 2049 /// '\u2029 : '\u2029' 2050 /// no break : '\0' 2051 dchar scanLineBreak() @safe 2052 { 2053 // Fast path for ASCII line breaks. 2054 const b = reader_.peekByte(); 2055 if(b < 0x80) 2056 { 2057 if(b == '\n' || b == '\r') 2058 { 2059 if(reader_.prefix(2) == "\r\n") { reader_.forward(2); } 2060 else { reader_.forward(); } 2061 return '\n'; 2062 } 2063 return '\0'; 2064 } 2065 2066 const c = reader_.peek(); 2067 if(c == '\x85') 2068 { 2069 reader_.forward(); 2070 return '\n'; 2071 } 2072 if(c == '\u2028' || c == '\u2029') 2073 { 2074 reader_.forward(); 2075 return c; 2076 } 2077 return '\0'; 2078 } 2079 } 2080 2081 private: 2082 2083 /// A nothrow function that converts a dchar[] to a string. 2084 string utf32To8(C)(C[] str) @safe pure nothrow 2085 if(is(Unqual!C == dchar)) 2086 { 2087 try { return str.to!string; } 2088 catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); } 2089 catch(Exception e) { assert(false, "Unexpected exception during UTF-8 encoding"); } 2090 }