Class Parser

  • All Implemented Interfaces:
    IParse

    public class Parser
    extends java.lang.Object
    implements IParse
    Parses input text to a tree of nodes.

    Start with the builder() method, configure the parser and build it. Example:

    
     Parser parser = Parser.builder().build();
     Node document = parser.parse("input text");
     
    • Field Detail

      • EXTENSIONS

        public static final DataKey<java.util.Collection<Extension>> EXTENSIONS
      • ASTERISK_DELIMITER_PROCESSOR

        public static final DataKey<java.lang.Boolean> ASTERISK_DELIMITER_PROCESSOR
      • TRACK_DOCUMENT_LINES

        public static final DataKey<java.lang.Boolean> TRACK_DOCUMENT_LINES
      • BLOCK_QUOTE_PARSER

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_PARSER
      • BLOCK_QUOTE_EXTEND_TO_BLANK_LINE

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_EXTEND_TO_BLANK_LINE
      • BLOCK_QUOTE_IGNORE_BLANK_LINE

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_IGNORE_BLANK_LINE
      • BLOCK_QUOTE_ALLOW_LEADING_SPACE

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_ALLOW_LEADING_SPACE
      • BLOCK_QUOTE_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_INTERRUPTS_PARAGRAPH
      • BLOCK_QUOTE_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_INTERRUPTS_ITEM_PARAGRAPH
      • BLOCK_QUOTE_WITH_LEAD_SPACES_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> BLOCK_QUOTE_WITH_LEAD_SPACES_INTERRUPTS_ITEM_PARAGRAPH
      • FENCED_CODE_BLOCK_PARSER

        public static final DataKey<java.lang.Boolean> FENCED_CODE_BLOCK_PARSER
      • MATCH_CLOSING_FENCE_CHARACTERS

        public static final DataKey<java.lang.Boolean> MATCH_CLOSING_FENCE_CHARACTERS
      • FENCED_CODE_CONTENT_BLOCK

        public static final DataKey<java.lang.Boolean> FENCED_CODE_CONTENT_BLOCK
      • CODE_SOFT_LINE_BREAKS

        public static final DataKey<java.lang.Boolean> CODE_SOFT_LINE_BREAKS
      • HARD_LINE_BREAK_LIMIT

        public static final DataKey<java.lang.Boolean> HARD_LINE_BREAK_LIMIT
      • HEADING_PARSER

        public static final DataKey<java.lang.Boolean> HEADING_PARSER
      • HEADING_SETEXT_MARKER_LENGTH

        public static final DataKey<java.lang.Integer> HEADING_SETEXT_MARKER_LENGTH
      • HEADING_NO_ATX_SPACE

        public static final DataKey<java.lang.Boolean> HEADING_NO_ATX_SPACE
      • ESCAPE_HEADING_NO_ATX_SPACE

        public static final DataKey<java.lang.Boolean> ESCAPE_HEADING_NO_ATX_SPACE
      • HEADING_NO_EMPTY_HEADING_WITHOUT_SPACE

        public static final DataKey<java.lang.Boolean> HEADING_NO_EMPTY_HEADING_WITHOUT_SPACE
      • HEADING_NO_LEAD_SPACE

        public static final DataKey<java.lang.Boolean> HEADING_NO_LEAD_SPACE
      • HEADING_CAN_INTERRUPT_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> HEADING_CAN_INTERRUPT_ITEM_PARAGRAPH
      • HTML_BLOCK_PARSER

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_PARSER
      • HTML_COMMENT_BLOCKS_INTERRUPT_PARAGRAPH

        public static final DataKey<java.lang.Boolean> HTML_COMMENT_BLOCKS_INTERRUPT_PARAGRAPH
      • HTML_FOR_TRANSLATOR

        public static final DataKey<java.lang.Boolean> HTML_FOR_TRANSLATOR
      • INLINE_DELIMITER_DIRECTIONAL_PUNCTUATIONS

        public static final DataKey<java.lang.Boolean> INLINE_DELIMITER_DIRECTIONAL_PUNCTUATIONS
      • INDENTED_CODE_BLOCK_PARSER

        public static final DataKey<java.lang.Boolean> INDENTED_CODE_BLOCK_PARSER
      • INDENTED_CODE_NO_TRAILING_BLANK_LINES

        public static final DataKey<java.lang.Boolean> INDENTED_CODE_NO_TRAILING_BLANK_LINES
      • INTELLIJ_DUMMY_IDENTIFIER

        public static final DataKey<java.lang.Boolean> INTELLIJ_DUMMY_IDENTIFIER
      • MATCH_NESTED_LINK_REFS_FIRST

        public static final DataKey<java.lang.Boolean> MATCH_NESTED_LINK_REFS_FIRST
      • PARSE_INNER_HTML_COMMENTS

        public static final DataKey<java.lang.Boolean> PARSE_INNER_HTML_COMMENTS
      • PARSE_MULTI_LINE_IMAGE_URLS

        public static final DataKey<java.lang.Boolean> PARSE_MULTI_LINE_IMAGE_URLS
      • PARSE_JEKYLL_MACROS_IN_URLS

        public static final DataKey<java.lang.Boolean> PARSE_JEKYLL_MACROS_IN_URLS
      • SPACE_IN_LINK_URLS

        public static final DataKey<java.lang.Boolean> SPACE_IN_LINK_URLS
      • SPACE_IN_LINK_ELEMENTS

        public static final DataKey<java.lang.Boolean> SPACE_IN_LINK_ELEMENTS
      • WWW_AUTO_LINK_ELEMENT

        public static final DataKey<java.lang.Boolean> WWW_AUTO_LINK_ELEMENT
      • LINK_TEXT_PRIORITY_OVER_LINK_REF

        public static final DataKey<java.lang.Boolean> LINK_TEXT_PRIORITY_OVER_LINK_REF
      • REFERENCE_PARAGRAPH_PRE_PROCESSOR

        public static final DataKey<java.lang.Boolean> REFERENCE_PARAGRAPH_PRE_PROCESSOR
      • THEMATIC_BREAK_PARSER

        public static final DataKey<java.lang.Boolean> THEMATIC_BREAK_PARSER
      • THEMATIC_BREAK_RELAXED_START

        public static final DataKey<java.lang.Boolean> THEMATIC_BREAK_RELAXED_START
      • UNDERSCORE_DELIMITER_PROCESSOR

        public static final DataKey<java.lang.Boolean> UNDERSCORE_DELIMITER_PROCESSOR
      • BLANK_LINES_IN_AST

        public static final DataKey<java.lang.Boolean> BLANK_LINES_IN_AST
      • USE_HARDCODED_LINK_ADDRESS_PARSER

        public static final DataKey<java.lang.Boolean> USE_HARDCODED_LINK_ADDRESS_PARSER
      • STRONG_WRAPS_EMPHASIS

        public static final DataKey<java.lang.Boolean> STRONG_WRAPS_EMPHASIS
        STRONG_WRAPS_EMPHASIS default false, when true makes parsing CommonMark Spec 0.27 compliant
      • LINKS_ALLOW_MATCHED_PARENTHESES

        public static final DataKey<java.lang.Boolean> LINKS_ALLOW_MATCHED_PARENTHESES
        LINKS_ALLOW_MATCHED_PARENTHESES default true, when false makes parsing CommonMark Spec 0.27 compliant
      • LIST_BLOCK_PARSER

        public static final DataKey<java.lang.Boolean> LIST_BLOCK_PARSER
      • HTML_BLOCK_DEEP_PARSER

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSER
      • HTML_BLOCK_DEEP_PARSE_NON_BLOCK

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_NON_BLOCK
      • HTML_BLOCK_COMMENT_ONLY_FULL_LINE

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_COMMENT_ONLY_FULL_LINE
      • HTML_BLOCK_START_ONLY_ON_BLOCK_TAGS

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_START_ONLY_ON_BLOCK_TAGS
      • HTML_BLOCK_TAGS

        public static final DataKey<java.util.List<java.lang.String>> HTML_BLOCK_TAGS
      • HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS
        Blank line interrupts HTML block when not in raw tag, otherwise only when closed
      • HTML_BLOCK_DEEP_PARSE_FIRST_OPEN_TAG_ON_ONE_LINE

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_FIRST_OPEN_TAG_ON_ONE_LINE
        open tags must be contained on one line
      • HTML_BLOCK_DEEP_PARSE_MARKDOWN_INTERRUPTS_CLOSED

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_MARKDOWN_INTERRUPTS_CLOSED
        Other markdown elements can interrupt a closed block without an intervening blank line
      • HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS_PARTIAL_TAG

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS_PARTIAL_TAG
        blank line interrupts partially open tag ie. <TAG without a corresponding >
      • HTML_BLOCK_DEEP_PARSE_INDENTED_CODE_INTERRUPTS

        public static final DataKey<java.lang.Boolean> HTML_BLOCK_DEEP_PARSE_INDENTED_CODE_INTERRUPTS
        Indented code can interrupt HTML block
      • HTML_ALLOW_NAME_SPACE

        public static final DataKey<java.lang.Boolean> HTML_ALLOW_NAME_SPACE
        Name spaces are allowed in HTML elements, default false for backward compatibility
      • TRANSLATION_HTML_BLOCK_TAG_PATTERN

        public static final DataKey<java.lang.String> TRANSLATION_HTML_BLOCK_TAG_PATTERN
        Used by formatter for translation parsing
      • TRANSLATION_HTML_INLINE_TAG_PATTERN

        public static final DataKey<java.lang.String> TRANSLATION_HTML_INLINE_TAG_PATTERN
      • TRANSLATION_AUTOLINK_TAG_PATTERN

        public static final DataKey<java.lang.String> TRANSLATION_AUTOLINK_TAG_PATTERN
      • LISTS_CODE_INDENT

        public static final DataKey<java.lang.Integer> LISTS_CODE_INDENT
      • LISTS_ITEM_INDENT

        public static final DataKey<java.lang.Integer> LISTS_ITEM_INDENT
      • LISTS_NEW_ITEM_CODE_INDENT

        public static final DataKey<java.lang.Integer> LISTS_NEW_ITEM_CODE_INDENT
      • LISTS_ITEM_MARKER_SPACE

        public static final DataKey<java.lang.Boolean> LISTS_ITEM_MARKER_SPACE
      • LISTS_ITEM_MARKER_SUFFIXES

        public static final DataKey<java.lang.String[]> LISTS_ITEM_MARKER_SUFFIXES
      • LISTS_NUMBERED_ITEM_MARKER_SUFFIXED

        public static final DataKey<java.lang.Boolean> LISTS_NUMBERED_ITEM_MARKER_SUFFIXED
      • LISTS_AUTO_LOOSE

        public static final DataKey<java.lang.Boolean> LISTS_AUTO_LOOSE
      • LISTS_AUTO_LOOSE_ONE_LEVEL_LISTS

        public static final DataKey<java.lang.Boolean> LISTS_AUTO_LOOSE_ONE_LEVEL_LISTS
      • LISTS_LOOSE_WHEN_PREV_HAS_TRAILING_BLANK_LINE

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_PREV_HAS_TRAILING_BLANK_LINE
      • LISTS_LOOSE_WHEN_LAST_ITEM_PREV_HAS_TRAILING_BLANK_LINE

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_LAST_ITEM_PREV_HAS_TRAILING_BLANK_LINE
      • LISTS_LOOSE_WHEN_HAS_NON_LIST_CHILDREN

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_HAS_NON_LIST_CHILDREN
      • LISTS_LOOSE_WHEN_BLANK_LINE_FOLLOWS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_BLANK_LINE_FOLLOWS_ITEM_PARAGRAPH
      • LISTS_LOOSE_WHEN_HAS_LOOSE_SUB_ITEM

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_HAS_LOOSE_SUB_ITEM
      • LISTS_LOOSE_WHEN_HAS_TRAILING_BLANK_LINE

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_HAS_TRAILING_BLANK_LINE
      • LISTS_LOOSE_WHEN_CONTAINS_BLANK_LINE

        public static final DataKey<java.lang.Boolean> LISTS_LOOSE_WHEN_CONTAINS_BLANK_LINE
      • LISTS_DELIMITER_MISMATCH_TO_NEW_LIST

        public static final DataKey<java.lang.Boolean> LISTS_DELIMITER_MISMATCH_TO_NEW_LIST
      • LISTS_END_ON_DOUBLE_BLANK

        public static final DataKey<java.lang.Boolean> LISTS_END_ON_DOUBLE_BLANK
      • LISTS_ITEM_TYPE_MISMATCH_TO_NEW_LIST

        public static final DataKey<java.lang.Boolean> LISTS_ITEM_TYPE_MISMATCH_TO_NEW_LIST
      • LISTS_ITEM_TYPE_MISMATCH_TO_SUB_LIST

        public static final DataKey<java.lang.Boolean> LISTS_ITEM_TYPE_MISMATCH_TO_SUB_LIST
      • LISTS_ORDERED_ITEM_DOT_ONLY

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_ITEM_DOT_ONLY
      • LISTS_ORDERED_LIST_MANUAL_START

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_LIST_MANUAL_START
      • LISTS_ITEM_CONTENT_AFTER_SUFFIX

        public static final DataKey<java.lang.Boolean> LISTS_ITEM_CONTENT_AFTER_SUFFIX
      • LISTS_BULLET_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_BULLET_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_ORDERED_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH
      • LISTS_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_BULLET_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_BULLET_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_ORDERED_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_EMPTY_ORDERED_NON_ONE_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

        public static final DataKey<java.lang.Boolean> LISTS_EMPTY_ORDERED_NON_ONE_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
      • LISTS_ITEM_PREFIX_CHARS

        public static final DataKey<java.lang.String> LISTS_ITEM_PREFIX_CHARS
      • CODE_BLOCK_INDENT

        public static final DataKey<java.lang.Integer> CODE_BLOCK_INDENT
    • Method Detail

      • builder

        public static Parser.Builder builder()
        Create a new builder for configuring a Parser.
        Returns:
        a builder
      • parse

        @NotNull
        public Document parse​(@NotNull
                              BasedSequence input)
        Parse the specified input text into a tree of nodes.

        Note that this method is thread-safe (a new parser state is used for each invocation).

        Specified by:
        parse in interface IParse
        Parameters:
        input - the text to parse
        Returns:
        the root node
      • parse

        @NotNull
        public Document parse​(@NotNull
                              java.lang.String input)
        Parse the specified input text into a tree of nodes.

        Note that this method is thread-safe (a new parser state is used for each invocation).

        Specified by:
        parse in interface IParse
        Parameters:
        input - the text to parse
        Returns:
        the root node
      • parseReader

        @NotNull
        public Document parseReader​(@NotNull
                                    java.io.Reader input)
                             throws java.io.IOException
        Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader.

        Note that this method is thread-safe (a new parser state is used for each invocation).

        Specified by:
        parseReader in interface IParse
        Parameters:
        input - the reader to parse
        Returns:
        the root node
        Throws:
        java.io.IOException - when reading throws an exception
      • getOptions

        @NotNull
        public DataHolder getOptions()
        Description copied from interface: IParse
        Get Options for parsing
        Specified by:
        getOptions in interface IParse
        Returns:
        DataHolder for options
      • transferReferences

        public boolean transferReferences​(@NotNull
                                          Document document,
                                          @NotNull
                                          Document included,
                                          java.lang.Boolean onlyIfUndefined)
        Description copied from interface: IParse
        Transfer reference definition between documents
        Specified by:
        transferReferences in interface IParse
        Parameters:
        document - destination document
        included - source document
        onlyIfUndefined - true if only should transfer references not already defined in the destination document, false to transfer all, null to use repository's KEEP_TYPE to make the determination (if KEEP_FIRST then only transfer if undefined,
        Returns:
        true if any references were transferred
      • transferReferences

        public static <T extends Node> boolean transferReferences​(NodeRepository<T> destination,
                                                                  NodeRepository<T> included,
                                                                  boolean onlyIfUndefined)
      • addExtensions

        public static MutableDataHolder addExtensions​(MutableDataHolder options,
                                                      Extension... extensions)
        Add extension(s) to the extension list
        Parameters:
        options - mutable options holding existing extensions
        extensions - extension to add
        Returns:
        mutable options
      • removeExtensions

        public static MutableDataHolder removeExtensions​(MutableDataHolder options,
                                                         java.lang.Class... extensions)
        Remove extension(s) of given class from the extension list
        Parameters:
        options - mutable options holding existing extensions
        extensions - extension classes to remove
        Returns:
        mutable options