Class Parser

java.lang.Object
com.vladsch.flexmark.parser.Parser
All Implemented Interfaces:
IParse

public class Parser extends Object implements IParse
Parses input text to a tree of nodes.

Start with the builder() method, configure the parser and build it. Example:


 Parser parser = Parser.builder().build();
 Node document = parser.parse("input text");
 
  • Field Details

    • EXTENSIONS

      public static final DataKey<Collection<Extension>> EXTENSIONS
    • REFERENCES_KEEP

      public static final DataKey<KeepType> REFERENCES_KEEP
    • REFERENCES

      public static final DataKey<ReferenceRepository> REFERENCES
    • ASTERISK_DELIMITER_PROCESSOR

      public static final DataKey<Boolean> ASTERISK_DELIMITER_PROCESSOR
    • TRACK_DOCUMENT_LINES

      public static final DataKey<Boolean> TRACK_DOCUMENT_LINES
    • BLOCK_QUOTE_PARSER

      public static final DataKey<Boolean> BLOCK_QUOTE_PARSER
    • BLOCK_QUOTE_EXTEND_TO_BLANK_LINE

      public static final DataKey<Boolean> BLOCK_QUOTE_EXTEND_TO_BLANK_LINE
    • BLOCK_QUOTE_IGNORE_BLANK_LINE

      public static final DataKey<Boolean> BLOCK_QUOTE_IGNORE_BLANK_LINE
    • BLOCK_QUOTE_ALLOW_LEADING_SPACE

      public static final DataKey<Boolean> BLOCK_QUOTE_ALLOW_LEADING_SPACE
    • BLOCK_QUOTE_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> BLOCK_QUOTE_INTERRUPTS_PARAGRAPH
    • BLOCK_QUOTE_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> BLOCK_QUOTE_INTERRUPTS_ITEM_PARAGRAPH
    • BLOCK_QUOTE_WITH_LEAD_SPACES_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> BLOCK_QUOTE_WITH_LEAD_SPACES_INTERRUPTS_ITEM_PARAGRAPH
    • FENCED_CODE_BLOCK_PARSER

      public static final DataKey<Boolean> FENCED_CODE_BLOCK_PARSER
    • MATCH_CLOSING_FENCE_CHARACTERS

      public static final DataKey<Boolean> MATCH_CLOSING_FENCE_CHARACTERS
    • FENCED_CODE_CONTENT_BLOCK

      public static final DataKey<Boolean> FENCED_CODE_CONTENT_BLOCK
    • CODE_SOFT_LINE_BREAKS

      public static final DataKey<Boolean> CODE_SOFT_LINE_BREAKS
    • HARD_LINE_BREAK_LIMIT

      public static final DataKey<Boolean> HARD_LINE_BREAK_LIMIT
    • HEADING_PARSER

      public static final DataKey<Boolean> HEADING_PARSER
    • HEADING_SETEXT_MARKER_LENGTH

      public static final DataKey<Integer> HEADING_SETEXT_MARKER_LENGTH
    • HEADING_NO_ATX_SPACE

      public static final DataKey<Boolean> HEADING_NO_ATX_SPACE
    • ESCAPE_HEADING_NO_ATX_SPACE

      public static final DataKey<Boolean> ESCAPE_HEADING_NO_ATX_SPACE
    • HEADING_NO_EMPTY_HEADING_WITHOUT_SPACE

      public static final DataKey<Boolean> HEADING_NO_EMPTY_HEADING_WITHOUT_SPACE
    • HEADING_NO_LEAD_SPACE

      public static final DataKey<Boolean> HEADING_NO_LEAD_SPACE
    • HEADING_CAN_INTERRUPT_ITEM_PARAGRAPH

      public static final DataKey<Boolean> HEADING_CAN_INTERRUPT_ITEM_PARAGRAPH
    • HTML_BLOCK_PARSER

      public static final DataKey<Boolean> HTML_BLOCK_PARSER
    • HTML_COMMENT_BLOCKS_INTERRUPT_PARAGRAPH

      public static final DataKey<Boolean> HTML_COMMENT_BLOCKS_INTERRUPT_PARAGRAPH
    • HTML_FOR_TRANSLATOR

      public static final DataKey<Boolean> HTML_FOR_TRANSLATOR
    • INLINE_DELIMITER_DIRECTIONAL_PUNCTUATIONS

      public static final DataKey<Boolean> INLINE_DELIMITER_DIRECTIONAL_PUNCTUATIONS
    • INDENTED_CODE_BLOCK_PARSER

      public static final DataKey<Boolean> INDENTED_CODE_BLOCK_PARSER
    • INDENTED_CODE_NO_TRAILING_BLANK_LINES

      public static final DataKey<Boolean> INDENTED_CODE_NO_TRAILING_BLANK_LINES
    • INTELLIJ_DUMMY_IDENTIFIER

      public static final DataKey<Boolean> INTELLIJ_DUMMY_IDENTIFIER
    • PARSE_INNER_HTML_COMMENTS

      public static final DataKey<Boolean> PARSE_INNER_HTML_COMMENTS
    • PARSE_MULTI_LINE_IMAGE_URLS

      public static final DataKey<Boolean> PARSE_MULTI_LINE_IMAGE_URLS
    • PARSE_JEKYLL_MACROS_IN_URLS

      public static final DataKey<Boolean> PARSE_JEKYLL_MACROS_IN_URLS
    • REFERENCE_PARAGRAPH_PRE_PROCESSOR

      public static final DataKey<Boolean> REFERENCE_PARAGRAPH_PRE_PROCESSOR
    • THEMATIC_BREAK_PARSER

      public static final DataKey<Boolean> THEMATIC_BREAK_PARSER
    • THEMATIC_BREAK_RELAXED_START

      public static final DataKey<Boolean> THEMATIC_BREAK_RELAXED_START
    • UNDERSCORE_DELIMITER_PROCESSOR

      public static final DataKey<Boolean> UNDERSCORE_DELIMITER_PROCESSOR
    • BLANK_LINES_IN_AST

      public static final DataKey<Boolean> BLANK_LINES_IN_AST
    • STRONG_WRAPS_EMPHASIS

      public static final DataKey<Boolean> STRONG_WRAPS_EMPHASIS
      STRONG_WRAPS_EMPHASIS default false, when true makes parsing CommonMark Spec 0.27 compliant
    • LIST_BLOCK_PARSER

      public static final DataKey<Boolean> LIST_BLOCK_PARSER
    • PARSER_EMULATION_PROFILE

      public static final DataKey<ParserEmulationProfile> PARSER_EMULATION_PROFILE
    • HTML_BLOCK_DEEP_PARSER

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSER
    • HTML_BLOCK_DEEP_PARSE_NON_BLOCK

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_NON_BLOCK
    • HTML_BLOCK_COMMENT_ONLY_FULL_LINE

      public static final DataKey<Boolean> HTML_BLOCK_COMMENT_ONLY_FULL_LINE
    • HTML_BLOCK_START_ONLY_ON_BLOCK_TAGS

      public static final DataKey<Boolean> HTML_BLOCK_START_ONLY_ON_BLOCK_TAGS
    • HTML_BLOCK_TAGS

      public static final DataKey<List<String>> HTML_BLOCK_TAGS
    • HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS
      Blank line interrupts HTML block when not in raw tag, otherwise only when closed
    • HTML_BLOCK_DEEP_PARSE_FIRST_OPEN_TAG_ON_ONE_LINE

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_FIRST_OPEN_TAG_ON_ONE_LINE
      open tags must be contained on one line
    • HTML_BLOCK_DEEP_PARSE_MARKDOWN_INTERRUPTS_CLOSED

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_MARKDOWN_INTERRUPTS_CLOSED
      Other markdown elements can interrupt a closed block without an intervening blank line
    • HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS_PARTIAL_TAG

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_BLANK_LINE_INTERRUPTS_PARTIAL_TAG
      blank line interrupts partially open tag ie. <TAG without a corresponding >
    • HTML_BLOCK_DEEP_PARSE_INDENTED_CODE_INTERRUPTS

      public static final DataKey<Boolean> HTML_BLOCK_DEEP_PARSE_INDENTED_CODE_INTERRUPTS
      Indented code can interrupt HTML block
    • HTML_ALLOW_NAME_SPACE

      public static final DataKey<Boolean> HTML_ALLOW_NAME_SPACE
      Name spaces are allowed in HTML elements, default false for backward compatibility
    • TRANSLATION_HTML_BLOCK_TAG_PATTERN

      public static final DataKey<String> TRANSLATION_HTML_BLOCK_TAG_PATTERN
      Used by formatter for translation parsing
    • TRANSLATION_HTML_INLINE_TAG_PATTERN

      public static final DataKey<String> TRANSLATION_HTML_INLINE_TAG_PATTERN
    • LISTS_CODE_INDENT

      public static final DataKey<Integer> LISTS_CODE_INDENT
    • LISTS_ITEM_INDENT

      public static final DataKey<Integer> LISTS_ITEM_INDENT
    • LISTS_NEW_ITEM_CODE_INDENT

      public static final DataKey<Integer> LISTS_NEW_ITEM_CODE_INDENT
    • LISTS_ITEM_MARKER_SPACE

      public static final DataKey<Boolean> LISTS_ITEM_MARKER_SPACE
    • LISTS_ITEM_MARKER_SUFFIXES

      public static final DataKey<String[]> LISTS_ITEM_MARKER_SUFFIXES
    • LISTS_NUMBERED_ITEM_MARKER_SUFFIXED

      public static final DataKey<Boolean> LISTS_NUMBERED_ITEM_MARKER_SUFFIXED
    • LISTS_AUTO_LOOSE

      public static final DataKey<Boolean> LISTS_AUTO_LOOSE
    • LISTS_AUTO_LOOSE_ONE_LEVEL_LISTS

      public static final DataKey<Boolean> LISTS_AUTO_LOOSE_ONE_LEVEL_LISTS
    • LISTS_LOOSE_WHEN_PREV_HAS_TRAILING_BLANK_LINE

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_PREV_HAS_TRAILING_BLANK_LINE
    • LISTS_LOOSE_WHEN_LAST_ITEM_PREV_HAS_TRAILING_BLANK_LINE

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_LAST_ITEM_PREV_HAS_TRAILING_BLANK_LINE
    • LISTS_LOOSE_WHEN_HAS_NON_LIST_CHILDREN

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_HAS_NON_LIST_CHILDREN
    • LISTS_LOOSE_WHEN_BLANK_LINE_FOLLOWS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_BLANK_LINE_FOLLOWS_ITEM_PARAGRAPH
    • LISTS_LOOSE_WHEN_HAS_LOOSE_SUB_ITEM

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_HAS_LOOSE_SUB_ITEM
    • LISTS_LOOSE_WHEN_HAS_TRAILING_BLANK_LINE

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_HAS_TRAILING_BLANK_LINE
    • LISTS_LOOSE_WHEN_CONTAINS_BLANK_LINE

      public static final DataKey<Boolean> LISTS_LOOSE_WHEN_CONTAINS_BLANK_LINE
    • LISTS_DELIMITER_MISMATCH_TO_NEW_LIST

      public static final DataKey<Boolean> LISTS_DELIMITER_MISMATCH_TO_NEW_LIST
    • LISTS_END_ON_DOUBLE_BLANK

      public static final DataKey<Boolean> LISTS_END_ON_DOUBLE_BLANK
    • LISTS_ITEM_TYPE_MISMATCH_TO_NEW_LIST

      public static final DataKey<Boolean> LISTS_ITEM_TYPE_MISMATCH_TO_NEW_LIST
    • LISTS_ITEM_TYPE_MISMATCH_TO_SUB_LIST

      public static final DataKey<Boolean> LISTS_ITEM_TYPE_MISMATCH_TO_SUB_LIST
    • LISTS_ORDERED_ITEM_DOT_ONLY

      public static final DataKey<Boolean> LISTS_ORDERED_ITEM_DOT_ONLY
    • LISTS_ORDERED_LIST_MANUAL_START

      public static final DataKey<Boolean> LISTS_ORDERED_LIST_MANUAL_START
    • LISTS_ITEM_CONTENT_AFTER_SUFFIX

      public static final DataKey<Boolean> LISTS_ITEM_CONTENT_AFTER_SUFFIX
    • LISTS_BULLET_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_BULLET_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_ORDERED_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_ORDERED_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_PARAGRAPH
    • LISTS_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_BULLET_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_NON_ONE_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_BULLET_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_BULLET_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_ORDERED_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_EMPTY_ORDERED_NON_ONE_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH

      public static final DataKey<Boolean> LISTS_EMPTY_ORDERED_NON_ONE_SUB_ITEM_INTERRUPTS_ITEM_PARAGRAPH
    • LISTS_ITEM_PREFIX_CHARS

      public static final DataKey<String> LISTS_ITEM_PREFIX_CHARS
    • SPECIAL_LEAD_IN_HANDLERS

      public static final DataKey<List<SpecialLeadInHandler>> SPECIAL_LEAD_IN_HANDLERS
    • CODE_BLOCK_INDENT

      public static final DataKey<Integer> CODE_BLOCK_INDENT
    • blockParserFactories

      private final List<CustomBlockParserFactory> blockParserFactories
    • delimiterProcessors

      private final Map<Character,DelimiterProcessor> delimiterProcessors
    • delimiterCharacters

      private final BitSet delimiterCharacters
    • specialCharacters

      private final BitSet specialCharacters
    • postProcessorDependencies

      private final List<PostProcessorManager.PostProcessorDependencyStage> postProcessorDependencies
    • paragraphPreProcessorFactories

      private final List<List<ParagraphPreProcessorFactory>> paragraphPreProcessorFactories
    • blockPreProcessorDependencies

      private final List<List<BlockPreProcessorFactory>> blockPreProcessorDependencies
    • linkRefProcessors

      private final LinkRefProcessorData linkRefProcessors
    • inlineParserExtensionFactories

      private final List<InlineParserExtensionFactory> inlineParserExtensionFactories
    • inlineParserFactory

      private final InlineParserFactory inlineParserFactory
    • options

      @NotNull private final @NotNull DataHolder options
  • Constructor Details

  • Method Details

    • builder

      public static Parser.Builder builder()
      Create a new builder for configuring a Parser.
      Returns:
      a builder
    • builder

      public static Parser.Builder builder(DataHolder options)
    • parse

      @NotNull public @NotNull Document parse(@NotNull @NotNull BasedSequence input)
      Parse the specified input text into a tree of nodes.

      Note that this method is thread-safe (a new parser state is used for each invocation).

      Specified by:
      parse in interface IParse
      Parameters:
      input - the text to parse
      Returns:
      the root node
    • parse

      @NotNull public @NotNull Document parse(@NotNull @NotNull String input)
      Parse the specified input text into a tree of nodes.

      Note that this method is thread-safe (a new parser state is used for each invocation).

      Specified by:
      parse in interface IParse
      Parameters:
      input - the text to parse
      Returns:
      the root node
    • parseReader

      @NotNull public @NotNull Document parseReader(@NotNull @NotNull Reader input) throws IOException
      Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader.

      Note that this method is thread-safe (a new parser state is used for each invocation).

      Specified by:
      parseReader in interface IParse
      Parameters:
      input - the reader to parse
      Returns:
      the root node
      Throws:
      IOException - when reading throws an exception
    • postProcess

      private Document postProcess(Document document)
    • getOptions

      @NotNull public @NotNull DataHolder getOptions()
      Description copied from interface: IParse
      Get Options for parsing
      Specified by:
      getOptions in interface IParse
      Returns:
      DataHolder for options
    • transferReferences

      public boolean transferReferences(@NotNull @NotNull Document document, @NotNull @NotNull Document included, Boolean onlyIfUndefined)
      Description copied from interface: IParse
      Transfer reference definition between documents
      Specified by:
      transferReferences in interface IParse
      Parameters:
      document - destination document
      included - source document
      onlyIfUndefined - true if only should transfer references not already defined in the destination document, false to transfer all, null to use repository's KEEP_TYPE to make the determination (if KEEP_FIRST then only transfer if undefined,
      Returns:
      true if any references were transferred
    • transferReferences

      public static <T extends Node> boolean transferReferences(NodeRepository<T> destination, NodeRepository<T> included, boolean onlyIfUndefined)
    • addExtensions

      public static MutableDataHolder addExtensions(MutableDataHolder options, Extension... extensions)
      Add extension(s) to the extension list
      Parameters:
      options - mutable options holding existing extensions
      extensions - extension to add
      Returns:
      mutable options
    • removeExtensions

      public static MutableDataHolder removeExtensions(MutableDataHolder options, Class... extensions)
      Remove extension(s) of given class from the extension list
      Parameters:
      options - mutable options holding existing extensions
      extensions - extension classes to remove
      Returns:
      mutable options