syntax .html-entity

state entity
    char # hash
    char -b a-zA-Z name
    recolor error 1
    char "\n" END
    eat END error

state hash entity
    char xX x
    char 0-9 dec
    eat END error

state x entity
    char 0-9a-fA-F hex
    eat END error

state hex entity
    char 0-9a-fA-F this
    char ";" END entity
    eat END error

state dec entity
    char 0-9 this
    char ";" END entity
    eat END error

state name entity
    char -b a-zA-Z0-9 this
    inlist entity optional-semicolon
    noeat -b semicolon

state optional-semicolon
    char ";" END entity
    noeat END

state semicolon
    char ";" END entity
    recolor error
    eat END error

# Generated by tools/get-html-entities.sh
list entity \
    aacute Aacute acirc Acirc acute aelig AElig agrave Agrave amp AMP \
    aring Aring atilde Atilde auml Auml brvbar ccedil Ccedil cedil cent \
    copy COPY curren deg divide eacute Eacute ecirc Ecirc egrave Egrave \
    eth ETH euml Euml frac12 frac14 frac34 gt GT iacute Iacute icirc \
    Icirc iexcl igrave Igrave iquest iuml Iuml laquo lt LT macr micro \
    middot nbsp not ntilde Ntilde oacute Oacute ocirc Ocirc ograve \
    Ograve ordf ordm oslash Oslash otilde Otilde ouml Ouml para plusmn \
    pound quot QUOT raquo reg REG sect shy sup1 sup2 sup3 szlig thorn \
    THORN times uacute Uacute ucirc Ucirc ugrave Ugrave uml uuml Uuml \
    yacute Yacute yen yuml

syntax html

# TODO: Allow comments before doctype
# TODO: Don't highlight tags inside RCDATA elements (e.g. style, script, etc.)

state start text
    char " \t\n" this
    str "<!" doctype
    noeat text

state doctype comment
    char > text comment
    char < text error
    eat this

state text
    str "<!--" comment
    char < tag-start
    char > start error
    char "&" .html-entity:this
    eat this

state comment
    str -- --> text comment
    eat this

state tag-start tag
    char / close-tag
    char -b a-zA-Z0-9_ tag-name
    char " \t\n" this
    char > text tag
    eat this error

state close-tag tag
    char -b a-zA-Z0-9_ close-tag-name
    char " \t\n" this
    eat text error

state tag-name tag-unknown
    char -b a-zA-Z0-9_ this
    inlist tag attrs
    inlist tag-deprecated attrs
    noeat attrs

state close-tag-name tag-unknown
    char -b a-zA-Z0-9_ this
    inlist tag close-tag-end
    inlist tag-deprecated close-tag-end
    noeat close-tag-end

state close-tag-end tag
    char " \t\n" this
    char > text tag
    eat text error

state attrs code
    char " \t\n" this
    char -b a-zA-Z attr-name
    char > text tag
    char / short-close
    eat this error

state short-close tag
    char > text tag
    eat text error

state attr-name attr
    char a-zA-Z:_- this
    char = attr-eq
    noeat attrs

state attr-eq attr
    char \" dq
    char \' sq
    noeat attrs

state dq string
    char \" attrs string
    char "\n" attrs
    char "&" .html-entity:this
    eat this

state sq string
    char \' attrs string
    char "\n" attrs
    char "&" .html-entity:this
    eat this

list -i tag \
    a abbr address area b base bdo blockquote body br \
    button caption cite code col colgroup dd del dfn div dl dt em \
    fieldset form h1 h2 h3 h4 h5 h6 head hr html i iframe img input \
    ins kbd label legend li link map meta noscript object ol optgroup \
    option p param pre q samp script select small span strong style \
    sub sup table tbody td textarea tfoot th thead title tr ul var \
\
    article aside audio bdi canvas command data datalist details \
    dialog embed figcaption figure footer header hgroup main mark \
    menu meter nav output picture progress rp rt ruby s section slot \
    source summary template time track u video wbr

list -i tag-deprecated \
    acronym applet basefont bgsound big blink center dir font frame \
    frameset isindex keygen listing marquee menuitem multicol \
    nextid nobr noembed noframes plaintext rb rtc spacer strike tt \
    xmp

default code tag-unknown
default error tag-deprecated
default special entity
