1 /*
   2  * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import java.text.BreakIterator;
  29 import java.util.HashMap;
  30 import java.util.Map;
  31 
  32 import com.sun.source.doctree.AttributeTree.ValueKind;
  33 import com.sun.source.doctree.DocTree;
  34 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
  35 import com.sun.tools.javac.parser.Tokens.Comment;
  36 import com.sun.tools.javac.parser.Tokens.TokenKind;
  37 import com.sun.tools.javac.tree.DCTree;
  38 import com.sun.tools.javac.tree.DCTree.DCAttribute;
  39 import com.sun.tools.javac.tree.DCTree.DCDocComment;
  40 import com.sun.tools.javac.tree.DCTree.DCEndElement;
  41 import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
  42 import com.sun.tools.javac.tree.DCTree.DCErroneous;
  43 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
  44 import com.sun.tools.javac.tree.DCTree.DCReference;
  45 import com.sun.tools.javac.tree.DCTree.DCStartElement;
  46 import com.sun.tools.javac.tree.DCTree.DCText;
  47 import com.sun.tools.javac.tree.DocTreeMaker;
  48 import com.sun.tools.javac.tree.JCTree;
  49 import com.sun.tools.javac.util.DiagnosticSource;
  50 import com.sun.tools.javac.util.List;
  51 import com.sun.tools.javac.util.ListBuffer;
  52 import com.sun.tools.javac.util.Log;
  53 import com.sun.tools.javac.util.Name;
  54 import com.sun.tools.javac.util.Names;
  55 import com.sun.tools.javac.util.Position;
  56 import com.sun.tools.javac.util.StringUtils;
  57 
  58 import static com.sun.tools.javac.util.LayoutCharacters.*;
  59 
  60 /**
  61  *
  62  *  <p><b>This is NOT part of any supported API.
  63  *  If you write code that depends on this, you do so at your own risk.
  64  *  This code and its internal interfaces are subject to change or
  65  *  deletion without notice.</b>
  66  */
  67 public class DocCommentParser {
  68     static class ParseException extends Exception {
  69         private static final long serialVersionUID = 0;
  70         ParseException(String key) {
  71             super(key);
  72         }
  73     }
  74 
  75     private enum Phase {PREAMBLE, BODY, POSTAMBLE};
  76 
  77     final ParserFactory fac;
  78     final DiagnosticSource diagSource;
  79     final Comment comment;
  80     final DocTreeMaker m;
  81     final Names names;
  82     final boolean isFileContent;
  83 
  84     BreakIterator sentenceBreaker;
  85 
  86     /** The input buffer, index of most recent character read,
  87      *  index of one past last character in buffer.
  88      */
  89     protected char[] buf;
  90     protected int bp;
  91     protected int buflen;
  92 
  93     /** The current character.
  94      */
  95     protected char ch;
  96 
  97     int textStart = -1;
  98     int lastNonWhite = -1;
  99     boolean newline = true;
 100 
 101     Map<Name, TagParser> tagParsers;
 102 
 103     public DocCommentParser(ParserFactory fac, DiagnosticSource diagSource,
 104                             Comment comment, boolean isFileContent) {
 105         this.fac = fac;
 106         this.diagSource = diagSource;
 107         this.comment = comment;
 108         names = fac.names;
 109         this.isFileContent = isFileContent;
 110         m = fac.docTreeMaker;
 111         initTagParsers();
 112     }
 113 
 114     public DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
 115         this(fac, diagSource, comment, false);
 116     }
 117 
 118     public DocCommentParser(ParserFactory fac) {
 119         this(fac, null, null, false);
 120     }
 121 
 122     public DCDocComment parse() {
 123         String c = comment.getText();
 124         buf = new char[c.length() + 1];
 125         c.getChars(0, c.length(), buf, 0);
 126         buf[buf.length - 1] = EOI;
 127         buflen = buf.length - 1;
 128         bp = -1;
 129         nextChar();
 130 
 131         List<DCTree> preamble = isFileContent ? blockContent(Phase.PREAMBLE) : List.nil();
 132         List<DCTree> body = blockContent(Phase.BODY);
 133         List<DCTree> tags = blockTags();
 134         List<DCTree> postamble = isFileContent ? blockContent(Phase.POSTAMBLE) : List.nil();
 135 
 136         int pos = Position.NOPOS;
 137         if (!preamble.isEmpty())
 138             pos = preamble.head.pos;
 139         else if (!body.isEmpty())
 140             pos = body.head.pos;
 141         else if (!tags.isEmpty())
 142             pos = tags.head.pos;
 143         else if (!postamble.isEmpty())
 144             pos = postamble.head.pos;
 145 
 146         DCDocComment dc = m.at(pos).newDocCommentTree(comment, body, tags, preamble, postamble);
 147         return dc;
 148     }
 149 
 150     void nextChar() {
 151         ch = buf[bp < buflen ? ++bp : buflen];
 152         switch (ch) {
 153             case '\f': case '\n': case '\r':
 154                 newline = true;
 155         }
 156     }
 157 
 158     protected List<DCTree> blockContent() {
 159         return blockContent(Phase.BODY);
 160     }
 161 
 162     /**
 163      * Read block content, consisting of text, html and inline tags.
 164      * Terminated by the end of input, or the beginning of the next block tag:
 165      * i.e. @ as the first non-whitespace character on a line.
 166      */
 167     @SuppressWarnings("fallthrough")
 168     protected List<DCTree> blockContent(Phase phase) {
 169         ListBuffer<DCTree> trees = new ListBuffer<>();
 170         textStart = -1;
 171 
 172         loop:
 173         while (bp < buflen) {
 174             switch (ch) {
 175                 case '\n': case '\r': case '\f':
 176                     newline = true;
 177                     // fallthrough
 178 
 179                 case ' ': case '\t':
 180                     nextChar();
 181                     break;
 182 
 183                 case '&':
 184                     entity(trees);
 185                     break;
 186 
 187                 case '<':
 188                     newline = false;
 189                     if (isFileContent) {
 190                         switch (phase) {
 191                             case PREAMBLE:
 192                                 if (peek("body")) {
 193                                     trees.add(html());
 194                                     if (textStart == -1) {
 195                                         textStart = bp;
 196                                         lastNonWhite = -1;
 197                                     }
 198                                     // mark this as the start, for processing purposes
 199                                     newline = true;
 200                                     break loop;
 201                                 }
 202                                 break;
 203                             case BODY:
 204                                 if (peek("/body")) {
 205                                     addPendingText(trees, lastNonWhite);
 206                                     break loop;
 207                                 }
 208                                 break;
 209                             default:
 210                                 // fallthrough
 211                         }
 212                     }
 213                     addPendingText(trees, bp - 1);
 214                     trees.add(html());
 215 
 216                     if (phase == Phase.PREAMBLE || phase == Phase.POSTAMBLE) {
 217                         break; // Ignore newlines after html tags, in the meta content
 218                     }
 219                     if (textStart == -1) {
 220                         textStart = bp;
 221                         lastNonWhite = -1;
 222                     }
 223                     break;
 224 
 225                 case '>':
 226                     newline = false;
 227                     addPendingText(trees, bp - 1);
 228                     trees.add(m.at(bp).newErroneousTree(newString(bp, bp + 1), diagSource, "dc.bad.gt"));
 229                     nextChar();
 230                     if (textStart == -1) {
 231                         textStart = bp;
 232                         lastNonWhite = -1;
 233                     }
 234                     break;
 235 
 236                 case '{':
 237                     inlineTag(trees);
 238                     break;
 239 
 240                 case '@':
 241                     if (newline) {
 242                         addPendingText(trees, lastNonWhite);
 243                         break loop;
 244                     }
 245                     // fallthrough
 246 
 247                 default:
 248                     newline = false;
 249                     if (textStart == -1)
 250                         textStart = bp;
 251                     lastNonWhite = bp;
 252                     nextChar();
 253             }
 254         }
 255 
 256         if (lastNonWhite != -1)
 257             addPendingText(trees, lastNonWhite);
 258 
 259         return trees.toList();
 260     }
 261 
 262     /**
 263      * Read a series of block tags, including their content.
 264      * Standard tags parse their content appropriately.
 265      * Non-standard tags are represented by {@link UnknownBlockTag}.
 266      */
 267     protected List<DCTree> blockTags() {
 268         ListBuffer<DCTree> tags = new ListBuffer<>();
 269         while (ch == '@')
 270             tags.add(blockTag());
 271         return tags.toList();
 272     }
 273 
 274     /**
 275      * Read a single block tag, including its content.
 276      * Standard tags parse their content appropriately.
 277      * Non-standard tags are represented by {@link UnknownBlockTag}.
 278      */
 279     protected DCTree blockTag() {
 280         int p = bp;
 281         try {
 282             nextChar();
 283             if (isIdentifierStart(ch)) {
 284                 Name name = readTagName();
 285                 TagParser tp = tagParsers.get(name);
 286                 if (tp == null) {
 287                     List<DCTree> content = blockContent();
 288                     return m.at(p).newUnknownBlockTagTree(name, content);
 289                 } else {
 290                     switch (tp.getKind()) {
 291                         case BLOCK:
 292                             return tp.parse(p);
 293                         case INLINE:
 294                             return erroneous("dc.bad.inline.tag", p);
 295                     }
 296                 }
 297             }
 298             blockContent();
 299 
 300             return erroneous("dc.no.tag.name", p);
 301         } catch (ParseException e) {
 302             blockContent();
 303             return erroneous(e.getMessage(), p);
 304         }
 305     }
 306 
 307     protected void inlineTag(ListBuffer<DCTree> list) {
 308         newline = false;
 309         nextChar();
 310         if (ch == '@') {
 311             addPendingText(list, bp - 2);
 312             list.add(inlineTag());
 313             textStart = bp;
 314             lastNonWhite = -1;
 315         } else {
 316             if (textStart == -1)
 317                 textStart = bp - 1;
 318             lastNonWhite = bp;
 319         }
 320     }
 321 
 322     /**
 323      * Read a single inline tag, including its content.
 324      * Standard tags parse their content appropriately.
 325      * Non-standard tags are represented by {@link UnknownBlockTag}.
 326      * Malformed tags may be returned as {@link Erroneous}.
 327      */
 328     protected DCTree inlineTag() {
 329         int p = bp - 1;
 330         try {
 331             nextChar();
 332             if (isIdentifierStart(ch)) {
 333                 Name name = readTagName();
 334                 TagParser tp = tagParsers.get(name);
 335 
 336                 if (tp == null) {
 337                     skipWhitespace();
 338                     DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);
 339                     if (text != null) {
 340                         nextChar();
 341                         return m.at(p).newUnknownInlineTagTree(name, List.of(text)).setEndPos(bp);
 342                     }
 343                 } else {
 344                     if (!tp.retainWhiteSpace) {
 345                         skipWhitespace();
 346                     }
 347                     if (tp.getKind() == TagParser.Kind.INLINE) {
 348                         DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
 349                         if (tree != null) {
 350                             return tree.setEndPos(bp);
 351                         }
 352                     } else { // handle block tags (ex: @see) in inline content
 353                         inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content
 354                         nextChar();
 355                     }
 356                 }
 357             }
 358             return erroneous("dc.no.tag.name", p);
 359         } catch (ParseException e) {
 360             return erroneous(e.getMessage(), p);
 361         }
 362     }
 363 
 364     private static enum WhitespaceRetentionPolicy {
 365         RETAIN_ALL,
 366         REMOVE_FIRST_SPACE,
 367         REMOVE_ALL
 368     }
 369 
 370     /**
 371      * Read plain text content of an inline tag.
 372      * Matching pairs of { } are skipped; the text is terminated by the first
 373      * unmatched }. It is an error if the beginning of the next tag is detected.
 374      */
 375     private DCTree inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {
 376         switch (whitespacePolicy) {
 377             case REMOVE_ALL:
 378                 skipWhitespace();
 379                 break;
 380             case REMOVE_FIRST_SPACE:
 381                 if (ch == ' ')
 382                     nextChar();
 383                 break;
 384             case RETAIN_ALL:
 385             default:
 386                 // do nothing
 387                 break;
 388 
 389         }
 390         int pos = bp;
 391         int depth = 1;
 392 
 393         loop:
 394         while (bp < buflen) {
 395             switch (ch) {
 396                 case '\n': case '\r': case '\f':
 397                     newline = true;
 398                     break;
 399 
 400                 case ' ': case '\t':
 401                     break;
 402 
 403                 case '{':
 404                     newline = false;
 405                     lastNonWhite = bp;
 406                     depth++;
 407                     break;
 408 
 409                 case '}':
 410                     if (--depth == 0) {
 411                         return m.at(pos).newTextTree(newString(pos, bp));
 412                     }
 413                     newline = false;
 414                     lastNonWhite = bp;
 415                     break;
 416 
 417                 case '@':
 418                     if (newline)
 419                         break loop;
 420                     newline = false;
 421                     lastNonWhite = bp;
 422                     break;
 423 
 424                 default:
 425                     newline = false;
 426                     lastNonWhite = bp;
 427                     break;
 428             }
 429             nextChar();
 430         }
 431         throw new ParseException("dc.unterminated.inline.tag");
 432     }
 433 
 434     /**
 435      * Read Java class name, possibly followed by member
 436      * Matching pairs of {@literal < >} are skipped. The text is terminated by the first
 437      * unmatched }. It is an error if the beginning of the next tag is detected.
 438      */
 439     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
 440     // TODO: improve quality of parse to forbid bad constructions.
 441     // TODO: update to use ReferenceParser
 442     @SuppressWarnings("fallthrough")
 443     protected DCReference reference(boolean allowMember) throws ParseException {
 444         int pos = bp;
 445         int depth = 0;
 446 
 447         // scan to find the end of the signature, by looking for the first
 448         // whitespace not enclosed in () or <>, or the end of the tag
 449         loop:
 450         while (bp < buflen) {
 451             switch (ch) {
 452                 case '\n': case '\r': case '\f':
 453                     newline = true;
 454                     // fallthrough
 455 
 456                 case ' ': case '\t':
 457                     if (depth == 0)
 458                         break loop;
 459                     break;
 460 
 461                 case '(':
 462                 case '<':
 463                     newline = false;
 464                     depth++;
 465                     break;
 466 
 467                 case ')':
 468                 case '>':
 469                     newline = false;
 470                     --depth;
 471                     break;
 472 
 473                 case '}':
 474                     if (bp == pos)
 475                         return null;
 476                     newline = false;
 477                     break loop;
 478 
 479                 case '@':
 480                     if (newline)
 481                         break loop;
 482                     // fallthrough
 483 
 484                 default:
 485                     newline = false;
 486 
 487             }
 488             nextChar();
 489         }
 490 
 491         if (depth != 0)
 492             throw new ParseException("dc.unterminated.signature");
 493 
 494         String sig = newString(pos, bp);
 495 
 496         // Break sig apart into qualifiedExpr member paramTypes.
 497         JCTree qualExpr;
 498         Name member;
 499         List<JCTree> paramTypes;
 500 
 501         Log.DeferredDiagnosticHandler deferredDiagnosticHandler
 502                 = new Log.DeferredDiagnosticHandler(fac.log);
 503 
 504         try {
 505             int hash = sig.indexOf("#");
 506             int lparen = sig.indexOf("(", hash + 1);
 507             if (hash == -1) {
 508                 if (lparen == -1) {
 509                     qualExpr = parseType(sig);
 510                     member = null;
 511                 } else {
 512                     qualExpr = null;
 513                     member = parseMember(sig.substring(0, lparen));
 514                 }
 515             } else {
 516                 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
 517                 if (lparen == -1)
 518                     member = parseMember(sig.substring(hash + 1));
 519                 else
 520                     member = parseMember(sig.substring(hash + 1, lparen));
 521             }
 522 
 523             if (lparen < 0) {
 524                 paramTypes = null;
 525             } else {
 526                 int rparen = sig.indexOf(")", lparen);
 527                 if (rparen != sig.length() - 1)
 528                     throw new ParseException("dc.ref.bad.parens");
 529                 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
 530             }
 531 
 532             if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
 533                 throw new ParseException("dc.ref.syntax.error");
 534 
 535         } finally {
 536             fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
 537         }
 538 
 539         return m.at(pos).newReferenceTree(sig, qualExpr, member, paramTypes).setEndPos(bp);
 540     }
 541 
 542     JCTree parseType(String s) throws ParseException {
 543         JavacParser p = fac.newParser(s, false, false, false);
 544         JCTree tree = p.parseType();
 545         if (p.token().kind != TokenKind.EOF)
 546             throw new ParseException("dc.ref.unexpected.input");
 547         return tree;
 548     }
 549 
 550     Name parseMember(String s) throws ParseException {
 551         JavacParser p = fac.newParser(s, false, false, false);
 552         Name name = p.ident();
 553         if (p.token().kind != TokenKind.EOF)
 554             throw new ParseException("dc.ref.unexpected.input");
 555         return name;
 556     }
 557 
 558     List<JCTree> parseParams(String s) throws ParseException {
 559         if (s.trim().isEmpty())
 560             return List.nil();
 561 
 562         JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
 563         ListBuffer<JCTree> paramTypes = new ListBuffer<>();
 564         paramTypes.add(p.parseType());
 565 
 566         if (p.token().kind == TokenKind.IDENTIFIER)
 567             p.nextToken();
 568 
 569         while (p.token().kind == TokenKind.COMMA) {
 570             p.nextToken();
 571             paramTypes.add(p.parseType());
 572 
 573             if (p.token().kind == TokenKind.IDENTIFIER)
 574                 p.nextToken();
 575         }
 576 
 577         if (p.token().kind != TokenKind.EOF)
 578             throw new ParseException("dc.ref.unexpected.input");
 579 
 580         return paramTypes.toList();
 581     }
 582 
 583     /**
 584      * Read Java identifier
 585      * Matching pairs of { } are skipped; the text is terminated by the first
 586      * unmatched }. It is an error if the beginning of the next tag is detected.
 587      */
 588     @SuppressWarnings("fallthrough")
 589     protected DCIdentifier identifier() throws ParseException {
 590         skipWhitespace();
 591         int pos = bp;
 592 
 593         if (isJavaIdentifierStart(ch)) {
 594             Name name = readJavaIdentifier();
 595             return m.at(pos).newIdentifierTree(name);
 596         }
 597 
 598         throw new ParseException("dc.identifier.expected");
 599     }
 600 
 601     /**
 602      * Read a quoted string.
 603      * It is an error if the beginning of the next tag is detected.
 604      */
 605     @SuppressWarnings("fallthrough")
 606     protected DCText quotedString() {
 607         int pos = bp;
 608         nextChar();
 609 
 610         loop:
 611         while (bp < buflen) {
 612             switch (ch) {
 613                 case '\n': case '\r': case '\f':
 614                     newline = true;
 615                     break;
 616 
 617                 case ' ': case '\t':
 618                     break;
 619 
 620                 case '"':
 621                     nextChar();
 622                     // trim trailing white-space?
 623                     return m.at(pos).newTextTree(newString(pos, bp));
 624 
 625                 case '@':
 626                     if (newline)
 627                         break loop;
 628 
 629             }
 630             nextChar();
 631         }
 632         return null;
 633     }
 634 
 635     /**
 636      * Read a term ie. one word.
 637      * It is an error if the beginning of the next tag is detected.
 638      */
 639     @SuppressWarnings("fallthrough")
 640     protected DCText inlineWord() {
 641         int pos = bp;
 642         int depth = 0;
 643         loop:
 644         while (bp < buflen) {
 645             switch (ch) {
 646                 case '\n':
 647                     newline = true;
 648                     // fallthrough
 649 
 650                 case '\r': case '\f': case ' ': case '\t':
 651                     return m.at(pos).newTextTree(newString(pos, bp));
 652 
 653                 case '@':
 654                     if (newline)
 655                         break loop;
 656 
 657                 case '{':
 658                     depth++;
 659                     break;
 660 
 661                 case '}':
 662                     if (depth == 0 || --depth == 0)
 663                         return m.at(pos).newTextTree(newString(pos, bp));
 664                     break;
 665             }
 666             newline = false;
 667             nextChar();
 668         }
 669         return null;
 670     }
 671 
 672     /**
 673      * Read general text content of an inline tag, including HTML entities and elements.
 674      * Matching pairs of { } are skipped; the text is terminated by the first
 675      * unmatched }. It is an error if the beginning of the next tag is detected.
 676      */
 677     @SuppressWarnings("fallthrough")
 678     private List<DCTree> inlineContent() {
 679         ListBuffer<DCTree> trees = new ListBuffer<>();
 680 
 681         skipWhitespace();
 682         int pos = bp;
 683         int depth = 1;
 684         textStart = -1;
 685 
 686         loop:
 687         while (bp < buflen) {
 688 
 689             switch (ch) {
 690                 case '\n': case '\r': case '\f':
 691                     newline = true;
 692                     // fall through
 693 
 694                 case ' ': case '\t':
 695                     nextChar();
 696                     break;
 697 
 698                 case '&':
 699                     entity(trees);
 700                     break;
 701 
 702                 case '<':
 703                     newline = false;
 704                     addPendingText(trees, bp - 1);
 705                     trees.add(html());
 706                     break;
 707 
 708                 case '{':
 709                     if (textStart == -1)
 710                         textStart = bp;
 711                     newline = false;
 712                     depth++;
 713                     nextChar();
 714                     break;
 715 
 716                 case '}':
 717                     newline = false;
 718                     if (--depth == 0) {
 719                         addPendingText(trees, bp - 1);
 720                         nextChar();
 721                         return trees.toList();
 722                     }
 723                     nextChar();
 724                     break;
 725 
 726                 case '@':
 727                     if (newline)
 728                         break loop;
 729                     // fallthrough
 730 
 731                 default:
 732                     if (textStart == -1)
 733                         textStart = bp;
 734                     nextChar();
 735                     break;
 736             }
 737         }
 738 
 739         return List.of(erroneous("dc.unterminated.inline.tag", pos));
 740     }
 741 
 742     protected void entity(ListBuffer<DCTree> list) {
 743         newline = false;
 744         addPendingText(list, bp - 1);
 745         list.add(entity());
 746         if (textStart == -1) {
 747             textStart = bp;
 748             lastNonWhite = -1;
 749         }
 750     }
 751 
 752     /**
 753      * Read an HTML entity.
 754      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
 755      */
 756     protected DCTree entity() {
 757         int p = bp;
 758         nextChar();
 759         Name name = null;
 760         if (ch == '#') {
 761             int namep = bp;
 762             nextChar();
 763             if (isDecimalDigit(ch)) {
 764                 nextChar();
 765                 while (isDecimalDigit(ch))
 766                     nextChar();
 767                 name = names.fromChars(buf, namep, bp - namep);
 768             } else if (ch == 'x' || ch == 'X') {
 769                 nextChar();
 770                 if (isHexDigit(ch)) {
 771                     nextChar();
 772                     while (isHexDigit(ch))
 773                         nextChar();
 774                     name = names.fromChars(buf, namep, bp - namep);
 775                 }
 776             }
 777         } else if (isIdentifierStart(ch)) {
 778             name = readIdentifier();
 779         }
 780 
 781         if (name == null)
 782             return erroneous("dc.bad.entity", p);
 783         else {
 784             if (ch != ';')
 785                 return erroneous("dc.missing.semicolon", p);
 786             nextChar();
 787             return m.at(p).newEntityTree(name);
 788         }
 789     }
 790 
 791     boolean peek(String s) {
 792         final int savedpos = bp;
 793         try {
 794             if (ch == '<')
 795                 nextChar();
 796 
 797             if (ch == '/') {
 798                 if (s.charAt(0) != ch) {
 799                     return false;
 800                 } else {
 801                     s = s.substring(1, s.length());
 802                     nextChar();
 803                 }
 804             }
 805 
 806             if (isIdentifierStart(ch)) {
 807                 Name name = readIdentifier();
 808                 return StringUtils.toLowerCase(name.toString()).equals(s);
 809             }
 810             return false;
 811         } finally {
 812             bp = savedpos;
 813             ch = buf[bp];
 814         }
 815     }
 816 
 817     /**
 818      * Read the start or end of an HTML tag, or an HTML comment
 819      * {@literal <identifier attrs> } or {@literal </identifier> }
 820      */
 821     private DCTree html() {
 822         int p = bp;
 823         nextChar();
 824         if (isIdentifierStart(ch)) {
 825             Name name = readIdentifier();
 826             List<DCTree> attrs = htmlAttrs();
 827             if (attrs != null) {
 828                 boolean selfClosing = false;
 829                 if (ch == '/') {
 830                     nextChar();
 831                     selfClosing = true;
 832                 }
 833                 if (ch == '>') {
 834                     nextChar();
 835                     DCTree dctree = m.at(p).newStartElementTree(name, attrs, selfClosing).setEndPos(bp);
 836                     return dctree;
 837                 }
 838             }
 839         } else if (ch == '/') {
 840             nextChar();
 841             if (isIdentifierStart(ch)) {
 842                 Name name = readIdentifier();
 843                 skipWhitespace();
 844                 if (ch == '>') {
 845                     nextChar();
 846                     return m.at(p).newEndElementTree(name);
 847                 }
 848             }
 849         } else if (ch == '!') {
 850             nextChar();
 851             if (ch == '-') {
 852                 nextChar();
 853                 if (ch == '-') {
 854                     nextChar();
 855                     while (bp < buflen) {
 856                         int dash = 0;
 857                         while (ch == '-') {
 858                             dash++;
 859                             nextChar();
 860                         }
 861                         // Strictly speaking, a comment should not contain "--"
 862                         // so dash > 2 is an error, dash == 2 implies ch == '>'
 863                         // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
 864                         // for more details.
 865                         if (dash >= 2 && ch == '>') {
 866                             nextChar();
 867                             return m.at(p).newCommentTree(newString(p, bp));
 868                         }
 869 
 870                         nextChar();
 871                     }
 872                 }
 873             } else if (isIdentifierStart(ch) && peek("doctype")) {
 874                 readIdentifier();
 875                 nextChar();
 876                 skipWhitespace();
 877                 int d = bp;
 878                 while (bp < buflen) {
 879                     if (ch == '>') {
 880                         int mark = bp;
 881                         nextChar();
 882                         return m.at(d).newDocTypeTree(newString(d, mark));
 883                     }
 884                     nextChar();
 885                 }
 886             }
 887         }
 888 
 889         bp = p + 1;
 890         ch = buf[bp];
 891         return erroneous("dc.malformed.html", p);
 892     }
 893 
 894     /**
 895      * Read a series of HTML attributes, terminated by {@literal > }.
 896      * Each attribute is of the form {@literal identifier[=value] }.
 897      * "value" may be unquoted, single-quoted, or double-quoted.
 898      */
 899     protected List<DCTree> htmlAttrs() {
 900         ListBuffer<DCTree> attrs = new ListBuffer<>();
 901         skipWhitespace();
 902 
 903         loop:
 904         while (isIdentifierStart(ch)) {
 905             int namePos = bp;
 906             Name name = readAttributeName();
 907             skipWhitespace();
 908             List<DCTree> value = null;
 909             ValueKind vkind = ValueKind.EMPTY;
 910             if (ch == '=') {
 911                 ListBuffer<DCTree> v = new ListBuffer<>();
 912                 nextChar();
 913                 skipWhitespace();
 914                 if (ch == '\'' || ch == '"') {
 915                     vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
 916                     char quote = ch;
 917                     nextChar();
 918                     textStart = bp;
 919                     while (bp < buflen && ch != quote) {
 920                         if (newline && ch == '@') {
 921                             attrs.add(erroneous("dc.unterminated.string", namePos));
 922                             // No point trying to read more.
 923                             // In fact, all attrs get discarded by the caller
 924                             // and superseded by a malformed.html node because
 925                             // the html tag itself is not terminated correctly.
 926                             break loop;
 927                         }
 928                         attrValueChar(v);
 929                     }
 930                     addPendingText(v, bp - 1);
 931                     nextChar();
 932                 } else {
 933                     vkind = ValueKind.UNQUOTED;
 934                     textStart = bp;
 935                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
 936                         attrValueChar(v);
 937                     }
 938                     addPendingText(v, bp - 1);
 939                 }
 940                 skipWhitespace();
 941                 value = v.toList();
 942             }
 943             DCAttribute attr = m.at(namePos).newAttributeTree(name, vkind, value);
 944             attrs.add(attr);
 945         }
 946 
 947         return attrs.toList();
 948     }
 949 
 950     protected void attrValueChar(ListBuffer<DCTree> list) {
 951         switch (ch) {
 952             case '&':
 953                 entity(list);
 954                 break;
 955 
 956             case '{':
 957                 inlineTag(list);
 958                 break;
 959 
 960             default:
 961                 nextChar();
 962         }
 963     }
 964 
 965     protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
 966         if (textStart != -1) {
 967             if (textStart <= textEnd) {
 968                 list.add(m.at(textStart).newTextTree(newString(textStart, textEnd + 1)));
 969             }
 970             textStart = -1;
 971         }
 972     }
 973 
 974     protected DCErroneous erroneous(String code, int pos) {
 975         int i = bp - 1;
 976         loop:
 977         while (i > pos) {
 978             switch (buf[i]) {
 979                 case '\f': case '\n': case '\r':
 980                     newline = true;
 981                     break;
 982                 case '\t': case ' ':
 983                     break;
 984                 default:
 985                     break loop;
 986             }
 987             i--;
 988         }
 989         textStart = -1;
 990         return m.at(pos).newErroneousTree(newString(pos, i + 1), diagSource, code);
 991     }
 992 
 993     protected boolean isIdentifierStart(char ch) {
 994         return Character.isUnicodeIdentifierStart(ch);
 995     }
 996 
 997     protected Name readIdentifier() {
 998         int start = bp;
 999         nextChar();
1000         while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
1001             nextChar();
1002         return names.fromChars(buf, start, bp - start);
1003     }
1004 
1005     protected Name readAttributeName() {
1006         int start = bp;
1007         nextChar();
1008         while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))
1009             nextChar();
1010         return names.fromChars(buf, start, bp - start);
1011     }
1012 
1013     protected Name readTagName() {
1014         int start = bp;
1015         nextChar();
1016         while (bp < buflen
1017                 && (Character.isUnicodeIdentifierPart(ch) || ch == '.'
1018                 || ch == '-' || ch == ':')) {
1019             nextChar();
1020         }
1021         return names.fromChars(buf, start, bp - start);
1022     }
1023 
1024     protected boolean isJavaIdentifierStart(char ch) {
1025         return Character.isJavaIdentifierStart(ch);
1026     }
1027 
1028     protected Name readJavaIdentifier() {
1029         int start = bp;
1030         nextChar();
1031         while (bp < buflen && Character.isJavaIdentifierPart(ch))
1032             nextChar();
1033         return names.fromChars(buf, start, bp - start);
1034     }
1035 
1036     protected Name readSystemPropertyName() {
1037         int pos = bp;
1038         nextChar();
1039         while (bp < buflen && Character.isUnicodeIdentifierPart(ch) || ch == '.')
1040             nextChar();
1041         return names.fromChars(buf, pos, bp - pos);
1042     }
1043 
1044     protected boolean isDecimalDigit(char ch) {
1045         return ('0' <= ch && ch <= '9');
1046     }
1047 
1048     protected boolean isHexDigit(char ch) {
1049         return ('0' <= ch && ch <= '9')
1050                 || ('a' <= ch && ch <= 'f')
1051                 || ('A' <= ch && ch <= 'F');
1052     }
1053 
1054     protected boolean isUnquotedAttrValueTerminator(char ch) {
1055         switch (ch) {
1056             case '\f': case '\n': case '\r': case '\t':
1057             case ' ':
1058             case '"': case '\'': case '`':
1059             case '=': case '<': case '>':
1060                 return true;
1061             default:
1062                 return false;
1063         }
1064     }
1065 
1066     protected boolean isWhitespace(char ch) {
1067         return Character.isWhitespace(ch);
1068     }
1069 
1070     protected void skipWhitespace() {
1071         while (isWhitespace(ch)) {
1072             nextChar();
1073         }
1074     }
1075 
1076     /**
1077      * @param start position of first character of string
1078      * @param end position of character beyond last character to be included
1079      */
1080     String newString(int start, int end) {
1081         return new String(buf, start, end - start);
1082     }
1083 
1084     static abstract class TagParser {
1085         enum Kind { INLINE, BLOCK }
1086 
1087         final Kind kind;
1088         final DCTree.Kind treeKind;
1089         final boolean retainWhiteSpace;
1090 
1091 
1092         TagParser(Kind k, DCTree.Kind tk) {
1093             kind = k;
1094             treeKind = tk;
1095             retainWhiteSpace = false;
1096         }
1097 
1098         TagParser(Kind k, DCTree.Kind tk, boolean retainWhiteSpace) {
1099             kind = k;
1100             treeKind = tk;
1101             this.retainWhiteSpace = retainWhiteSpace;
1102         }
1103 
1104         Kind getKind() {
1105             return kind;
1106         }
1107 
1108         DCTree.Kind getTreeKind() {
1109             return treeKind;
1110         }
1111 
1112         abstract DCTree parse(int pos) throws ParseException;
1113     }
1114 
1115     /**
1116      * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/tools/unix/javadoc.html#CHDJGIJB">Javadoc Tags</a>
1117      */
1118     private void initTagParsers() {
1119         TagParser[] parsers = {
1120             // @author name-text
1121             new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1122                 public DCTree parse(int pos) {
1123                     List<DCTree> name = blockContent();
1124                     return m.at(pos).newAuthorTree(name);
1125                 }
1126             },
1127 
1128             // {@code text}
1129             new TagParser(Kind.INLINE, DCTree.Kind.CODE, true) {
1130                 public DCTree parse(int pos) throws ParseException {
1131                     DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1132                     nextChar();
1133                     return m.at(pos).newCodeTree((DCText) text);
1134                 }
1135             },
1136 
1137             // @deprecated deprecated-text
1138             new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1139                 public DCTree parse(int pos) {
1140                     List<DCTree> reason = blockContent();
1141                     return m.at(pos).newDeprecatedTree(reason);
1142                 }
1143             },
1144 
1145             // {@docRoot}
1146             new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1147                 public DCTree parse(int pos) throws ParseException {
1148                     if (ch == '}') {
1149                         nextChar();
1150                         return m.at(pos).newDocRootTree();
1151                     }
1152                     inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1153                     nextChar();
1154                     throw new ParseException("dc.unexpected.content");
1155                 }
1156             },
1157 
1158             // @exception class-name description
1159             new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1160                 public DCTree parse(int pos) throws ParseException {
1161                     skipWhitespace();
1162                     DCReference ref = reference(false);
1163                     List<DCTree> description = blockContent();
1164                     return m.at(pos).newExceptionTree(ref, description);
1165                 }
1166             },
1167 
1168             // @hidden hidden-text
1169             new TagParser(Kind.BLOCK, DCTree.Kind.HIDDEN) {
1170                 public DCTree parse(int pos) {
1171                     List<DCTree> reason = blockContent();
1172                     return m.at(pos).newHiddenTree(reason);
1173                 }
1174             },
1175 
1176             // @index search-term options-description
1177             new TagParser(Kind.INLINE, DCTree.Kind.INDEX) {
1178                 public DCTree parse(int pos) throws ParseException {
1179                     skipWhitespace();
1180                     if (ch == '}') {
1181                         throw new ParseException("dc.no.content");
1182                     }
1183                     DCTree term = ch == '"' ? quotedString() : inlineWord();
1184                     if (term == null) {
1185                         throw new ParseException("dc.no.content");
1186                     }
1187                     skipWhitespace();
1188                     List<DCTree> description = List.nil();
1189                     if (ch != '}') {
1190                         description = inlineContent();
1191                     } else {
1192                         nextChar();
1193                     }
1194                     return m.at(pos).newIndexTree(term, description);
1195                 }
1196             },
1197 
1198             // {@inheritDoc}
1199             new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1200                 public DCTree parse(int pos) throws ParseException {
1201                     if (ch == '}') {
1202                         nextChar();
1203                         return m.at(pos).newInheritDocTree();
1204                     }
1205                     inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
1206                     nextChar();
1207                     throw new ParseException("dc.unexpected.content");
1208                 }
1209             },
1210 
1211             // {@link package.class#member label}
1212             new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1213                 public DCTree parse(int pos) throws ParseException {
1214                     DCReference ref = reference(true);
1215                     List<DCTree> label = inlineContent();
1216                     return m.at(pos).newLinkTree(ref, label);
1217                 }
1218             },
1219 
1220             // {@linkplain package.class#member label}
1221             new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1222                 public DCTree parse(int pos) throws ParseException {
1223                     DCReference ref = reference(true);
1224                     List<DCTree> label = inlineContent();
1225                     return m.at(pos).newLinkPlainTree(ref, label);
1226                 }
1227             },
1228 
1229             // {@literal text}
1230             new TagParser(Kind.INLINE, DCTree.Kind.LITERAL, true) {
1231                 public DCTree parse(int pos) throws ParseException {
1232                     DCTree text = inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
1233                     nextChar();
1234                     return m.at(pos).newLiteralTree((DCText) text);
1235                 }
1236             },
1237 
1238             // {@getter text}
1239             new AccessorParser(DCTree.Kind.GETTER),
1240 
1241             // {@getter text}
1242             new AccessorParser(DCTree.Kind.SETTER),
1243 
1244             // @param parameter-name description
1245             new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1246                 public DCTree parse(int pos) throws ParseException {
1247                     skipWhitespace();
1248 
1249                     boolean typaram = false;
1250                     if (ch == '<') {
1251                         typaram = true;
1252                         nextChar();
1253                     }
1254 
1255                     DCIdentifier id = identifier();
1256 
1257                     if (typaram) {
1258                         if (ch != '>')
1259                             throw new ParseException("dc.gt.expected");
1260                         nextChar();
1261                     }
1262 
1263                     skipWhitespace();
1264                     List<DCTree> desc = blockContent();
1265                     return m.at(pos).newParamTree(typaram, id, desc);
1266                 }
1267             },
1268 
1269             // @provides service-name description
1270             new TagParser(Kind.BLOCK, DCTree.Kind.PROVIDES) {
1271                 public DCTree parse(int pos) throws ParseException {
1272                     skipWhitespace();
1273                     DCReference ref = reference(true);
1274                     List<DCTree> description = blockContent();
1275                     return m.at(pos).newProvidesTree(ref, description);
1276                 }
1277             },
1278 
1279             // @return description
1280             new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1281                 public DCTree parse(int pos) {
1282                     List<DCTree> description = blockContent();
1283                     return m.at(pos).newReturnTree(description);
1284                 }
1285             },
1286 
1287             // @see reference | quoted-string | HTML
1288             new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1289                 public DCTree parse(int pos) throws ParseException {
1290                     skipWhitespace();
1291                     switch (ch) {
1292                         case '"':
1293                             DCText string = quotedString();
1294                             if (string != null) {
1295                                 skipWhitespace();
1296                                 if (ch == '@'
1297                                         || ch == EOI && bp == buf.length - 1) {
1298                                     return m.at(pos).newSeeTree(List.<DCTree>of(string));
1299                                 }
1300                             }
1301                             break;
1302 
1303                         case '<':
1304                             List<DCTree> html = blockContent();
1305                             if (html != null)
1306                                 return m.at(pos).newSeeTree(html);
1307                             break;
1308 
1309                         case '@':
1310                             if (newline)
1311                                 throw new ParseException("dc.no.content");
1312                             break;
1313 
1314                         case EOI:
1315                             if (bp == buf.length - 1)
1316                                 throw new ParseException("dc.no.content");
1317                             break;
1318 
1319                         default:
1320                             if (isJavaIdentifierStart(ch) || ch == '#') {
1321                                 DCReference ref = reference(true);
1322                                 List<DCTree> description = blockContent();
1323                                 return m.at(pos).newSeeTree(description.prepend(ref));
1324                             }
1325                     }
1326                     throw new ParseException("dc.unexpected.content");
1327                 }
1328             },
1329 
1330             // @serialData data-description
1331             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1332                 public DCTree parse(int pos) {
1333                     List<DCTree> description = blockContent();
1334                     return m.at(pos).newSerialDataTree(description);
1335                 }
1336             },
1337 
1338             // @serialField field-name field-type description
1339             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1340                 public DCTree parse(int pos) throws ParseException {
1341                     skipWhitespace();
1342                     DCIdentifier name = identifier();
1343                     skipWhitespace();
1344                     DCReference type = reference(false);
1345                     List<DCTree> description = null;
1346                     if (isWhitespace(ch)) {
1347                         skipWhitespace();
1348                         description = blockContent();
1349                     }
1350                     return m.at(pos).newSerialFieldTree(name, type, description);
1351                 }
1352             },
1353 
1354             // @serial field-description | include | exclude
1355             new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1356                 public DCTree parse(int pos) {
1357                     List<DCTree> description = blockContent();
1358                     return m.at(pos).newSerialTree(description);
1359                 }
1360             },
1361 
1362             // @since since-text
1363             new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1364                 public DCTree parse(int pos) {
1365                     List<DCTree> description = blockContent();
1366                     return m.at(pos).newSinceTree(description);
1367                 }
1368             },
1369 
1370             // @summary summary-text
1371             new TagParser(Kind.INLINE, DCTree.Kind.SUMMARY) {
1372                 public DCTree parse(int pos) throws ParseException {
1373                     List<DCTree> summary = inlineContent();
1374                     return m.at(pos).newSummaryTree(summary);
1375                 }
1376             },
1377 
1378             // @systemProperty property-name
1379             new TagParser(Kind.INLINE, DCTree.Kind.SYSTEM_PROPERTY) {
1380                 public DCTree parse(int pos) throws ParseException {
1381                     skipWhitespace();
1382                     if (ch == '}') {
1383                         throw new ParseException("dc.no.content");
1384                     }
1385                     Name propertyName = readSystemPropertyName();
1386                     if (propertyName == null) {
1387                         throw new ParseException("dc.no.content");
1388                     }
1389                     skipWhitespace();
1390                     if (ch != '}') {
1391                         nextChar();
1392                         throw new ParseException("dc.unexpected.content");
1393                     } else {
1394                         nextChar();
1395                         return m.at(pos).newSystemPropertyTree(propertyName);
1396                     }
1397                 }
1398             },
1399 
1400             // @throws class-name description
1401             new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1402                 public DCTree parse(int pos) throws ParseException {
1403                     skipWhitespace();
1404                     DCReference ref = reference(false);
1405                     List<DCTree> description = blockContent();
1406                     return m.at(pos).newThrowsTree(ref, description);
1407                 }
1408             },
1409 
1410             // @uses service-name description
1411             new TagParser(Kind.BLOCK, DCTree.Kind.USES) {
1412                 public DCTree parse(int pos) throws ParseException {
1413                     skipWhitespace();
1414                     DCReference ref = reference(true);
1415                     List<DCTree> description = blockContent();
1416                     return m.at(pos).newUsesTree(ref, description);
1417                 }
1418             },
1419 
1420             // {@value package.class#field}
1421             new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1422                 public DCTree parse(int pos) throws ParseException {
1423                     DCReference ref = reference(true);
1424                     skipWhitespace();
1425                     if (ch == '}') {
1426                         nextChar();
1427                         return m.at(pos).newValueTree(ref);
1428                     }
1429                     nextChar();
1430                     throw new ParseException("dc.unexpected.content");
1431                 }
1432             },
1433 
1434             // @version version-text
1435             new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1436                 public DCTree parse(int pos) {
1437                     List<DCTree> description = blockContent();
1438                     return m.at(pos).newVersionTree(description);
1439                 }
1440             },
1441         };
1442 
1443         tagParsers = new HashMap<>();
1444         for (TagParser p: parsers)
1445             tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1446 
1447     }
1448 
1449     class AccessorParser extends TagParser {
1450         AccessorParser(DocTree.Kind kind) {
1451             super(Kind.BLOCK, kind, true);
1452         }
1453 
1454         public DCTree parse(int pos) throws ParseException {
1455             List<DCTree> desc = blockContent();
1456             return m.at(pos).newAccessorTree(treeKind, desc);
1457         }
1458     }
1459 }