• Top
    • Documentation
    • Books
    • Boolean-reasoning
    • Projects
    • Debugging
    • Std
    • Community
    • Proof-automation
    • ACL2
    • Macro-libraries
    • Interfacing-tools
    • Hardware-verification
    • Software-verification
      • Kestrel-books
        • Crypto-hdwallet
        • Apt
        • Error-checking
        • Fty-extensions
        • Isar
        • Kestrel-utilities
        • Set
        • Soft
        • C
          • Syntax-for-tools
            • Disambiguator
            • Abstract-syntax
            • Parser
              • Parse-exprs/decls/stmts
              • Check-full-ppnumber
              • Read-char
              • Parstate
              • Lex-identifier/keyword
                • Lex-oct-iconst-/-dec-fconst
                • Lex-lexeme
                • Parse-external-declaration
                • Parse-cast-expression
                • Lex-isuffix-if-present
                • Parse-expression-or-type-name
                • Parse-postfix-expression
                • Lex-hex-iconst/fconst
                • Lex-dec-iconst/fconst
                • Lex-block-comment
                • Lex-escape-sequence
                • Read-token
                • Token
                • Lex-*-hexadecimal-digit
                • Lex-*-c-char
                • Lex-*-s-char
                • Parse-specifier/qualifier
                • Lex-*-digit
                • Lex-line-comment
                • Char-to-msg
                • Parse-primary-expression
                • Parse-declaration-specifier
                • Lex-iconst/fconst
                • Parse-external-declaration-list
                • Parse-declaration-specifiers
                • Lex-dec-expo-if-present
                • Init-parstate
                • Reread-to-token
                • Parse-asm-name-specifier
                • Lex-character-constant
                • Parse-expression-rest
                • Parstate$
                • Lex-dec-fconst
                • Token-unary-expression-start-p
                • Parse-declarator-or-abstract-declarator
                • Parse-asm-goto-labels
                • Parse-asm-clobbers
                • Lex-stringlit
                • Lex-non-octal-digit
                • Position
                • Lexeme
                • Lex-fsuffix-if-present
                • Parse-translation-unit
                • Lex-hexadecimal-digit
                • Token-type-specifier-keyword-p
                • Token-postfix-expression-rest-start-p
                • To-parstate$
                • Parse-?-asm-name-specifier
                • Parse-postfix-expression-rest
                • Parse-expression
                • Make-expr-unary-with-preinc/predec-ops
                • Lex-sign-if-present
                • Lex-dec-expo
                • Lex-bin-expo
                • Unread-to-token
                • Parse-*-stringlit
                • Parse-statement
                • Parse-fileset
                • Token-option
                • Lexeme-option
                • Token-struct-declaration-start-p
                • Parse-*-attribute-specifier
                • Parse-initializer-list
                • Parse-file
                • Parse-pointer
                • Parse-array/function-declarator
                • Unread-chars
                • Span
                • Read-stringlit
                • Lex-hex-quad
                • Unread-tokens
                • Token-type-qualifier-p
                • Read-identifier
                • Parse-*-asm-qualifier
                • Unread-char
                • Parse-attribute-name
                • Parse-argument-expressions
                • Make-expr-cast/add-or-cast/sub-ambig
                • Read-punctuator
                • Parse-struct-or-union-specifier
                • Parse-assignment-expression
                • Parse-asm-clobber
                • Token-specifier/qualifier-start-p
                • Token-primary-expression-start-p
                • Token-function-specifier-p
                • Reterr-msg
                • Read-keyword
                • Parse-*-increment/decrement
                • Parse-direct-abstract-declarator
                • Parse-declaration-or-statement
                • Char+position
                • Unread-token
                • Token+span
                • Token-expression-start-p
                • Update-parstate->tokens-unread
                • Update-parstate->chars-unread
                • Token-to-msg
                • Update-parstate->tokens-read
                • Update-parstate->tokens-length
                • Update-parstate->position
                • Update-parstate->chars-read
                • Parse-parameter-declaration
                • Parse-argument-expressions-rest
                • Update-parstate->chars-length
                • Token-to-type-specifier-keyword
                • Update-parstate->bytes
                • Parse-unary-expression
                • Parse-generic-associations-rest
                • Parse-conditional-expression
                • Update-parstate->size
                • Update-parstate->gcc
                • Parsize
                • Parse-direct-abstract-declarator-rest
                • Token-declaration-specifier-start-p
                • Parse-designator-list
                • Token-designation?-initializer-start-p
                • Token-abstract-declarator-start-p
                • Parse-?-asm-output-operands
                • Parse-?-asm-input-operands
                • Parse-struct-declaration
                • Parse-specifier-qualifier-list
                • Parse-parameter-declaration-list
                • Parse-constant-expression
                • Token-type-specifier-start-p
                • Token-type-qualifier-or-attribute-specifier-start-p
                • Parse-static-assert-declaration
                • Parse-fileset-loop
                • Parse-direct-declarator
                • Parse-declaration
                • Parse-attribute-parameters
                • Token-unary-operator-p
                • Token-to-unary-operator
                • Token-to-type-qualifier
                • Token-storage-class-specifier-p
                • Token-punctuatorp
                • Token-direct-abstract-declarator-start-p
                • Token-declarator-start-p
                • Parse-direct-declarator-rest
                • Token-to-storage-class-specifier
                • Token-to-assignment-operator
                • Token-to-asm-qualifier
                • Token-struct-declarator-start-p
                • Token-keywordp
                • Token-initializer-start-p
                • Token-direct-declarator-start-p
                • Token-assignment-operator-p
                • Parse-type-qualifier-and-attribute-specifier-list
                • Parse-enumerator-list
                • Parse-designation?-initializer
                • Parse-compound-literal
                • Parse-block-item
                • Token-type-name-start-p
                • Token-to-function-specifier
                • Token-preinc/predec-operator-p
                • Token-multiplicative-operator-p
                • Token-designator-start-p
                • Token-designation-start-p
                • Parstate->tokens-unread
                • Parstate->chars-unread
                • Parstate->bytes
                • Parse-initializer
                • Parse-generic-association
                • Parse-declaration-list
                • Parse-attribute-specifier
                • Parse-asm-output-operands
                • Token-to-relational-operator
                • Token-to-preinc/predec-operator
                • Token-to-multiplicative-operator
                • Token-relational-operator-p
                • Token-equality-operator-p
                • Token-asm-qualifier-p
                • Token-additive-operator-p
                • Span-join
                • Parstate->tokens-read
                • Parstate->tokens-length
                • Parstate->size
                • Parstate->chars-read
                • Parstate->chars-length
                • Parse-asm-statement
                • Parse-asm-input-operands
                • Update-parstate->token
                • Update-parstate->char
                • Token-to-equality-operator
                • Token-to-additive-operator
                • Token-shift-operator-p
                • To-parstate$-tokens-unread
                • To-parstate$-chars-unread
                • Position-inc-line
                • Position-inc-column
                • Parstate->gcc
                • Parse-type-name
                • Parse-struct-declarator-list
                • Parse-struct-declaration-list
                • Parse-relational-expression-rest
                • Parse-multiplicative-expression-rest
                • Parse-logical-or-expression-rest
                • Parse-logical-and-expression-rest
                • Parse-inclusive-or-expression-rest
                • Parse-exclusive-or-expression-rest
                • Parse-equality-expression-rest
                • Parse-array/function-abstract-declarator
                • Parse-additive-expression-rest
                • Token-to-shift-operator
                • To-parstate$-tokens-read
                • Parse-struct-declarator
                • Parse-shift-expression-rest
                • Parse-member-designor
                • Parse-init-declarator-list
                • Parse-init-declarator
                • Parse-and-expression-rest
                • Parse-alignment-specifier
                • To-parstate$-chars-read
                • Position-to-msg
                • Parstate->token
                • Parstate->char
                • Parse-shift-expression
                • Parse-relational-expression
                • Parse-multiplicative-expression
                • Parse-logical-or-expression
                • Parse-logical-and-expression
                • Parse-inclusive-or-expression
                • Parse-exclusive-or-expression
                • Parse-equality-expression
                • Parse-enum-specifier
                • Parse-block-item-list
                • Parse-attribute-list
                • Parse-and-expression
                • Parse-additive-expression
                • Parse-abstract-declarator
                • Char+position-list
                • Token+span-list
                • Span-to-msg
                • Parstate-fix
                • Parse-member-designor-rest
                • Position-init
                • Parstate->position
                • Parse-designator
                • Parse-declarator
                • Parse-attribute
                • Irr-token
                • Irr-span
                • Irr-position
                • Irr-lexeme
                • Token-list
                • Parse-type-qualifier-or-attribute-specifier
                • Parse-enumerator
                • Parse-asm-output-operand
                • Parse-asm-input-operand
              • Validator
              • Printer
              • Formalized-subset
              • Mapping-to-language-definition
              • Input-files
              • Defpred
              • Output-files
              • Abstract-syntax-operations
              • Validation-information
              • Implementation-environments
              • Concrete-syntax
              • Ascii-identifiers
              • Unambiguity
              • Preprocessing
              • Abstraction-mapping
            • Atc
            • Language
            • Representation
            • Transformation-tools
            • Insertion-sort
            • Pack
          • Bv
          • Imp-language
          • Event-macros
          • Java
          • Bitcoin
          • Ethereum
          • Yul
          • Zcash
          • ACL2-programming-language
          • Prime-fields
          • Json
          • Syntheto
          • File-io-light
          • Cryptography
          • Number-theory
          • Lists-light
          • Axe
          • Builtins
          • Solidity
          • Helpers
          • Htclient
          • Typed-lists-light
          • Arithmetic-light
        • X86isa
        • Axe
        • Execloader
      • Math
      • Testing-utilities
    • Parser

    Lex-identifier/keyword

    Lex an identifier or keyword.

    Signature
    (lex-identifier/keyword first-char first-pos parstate) 
      → 
    (mv erp lexeme span new-parstate)
    Arguments
    first-char — Guard (unsigned-byte-p 8 first-char).
    first-pos — Guard (positionp first-pos).
    parstate — Guard (parstatep parstate).
    Returns
    lexeme — Type (lexemep lexeme).
    span — Type (spanp span).
    new-parstate — Type (parstatep new-parstate), given (parstatep parstate).

    This is called after the first character of the identifier or keyword has been already read; that character is passed to this function. The position of that character is also passed as input.

    Since grammatically keywords are identifiers, we just lex grammatical identifiers, but return a keyword lexeme if the grammatical identifier matches a keyword. If GCC extensions are supported, we check the grammatical identifier against some additional keywords; see the ABNF grammar rule for gcc-keyword.

    Given that the first character (a letter or underscore) has already been read, it remains to read zero or more letters, digits, and underscores. These are read in a loop, which stops when no letter, digit, or underscore is read. The stopping happens if there is no next character (i.e. end of file), or the next character is something else; in the latter case, the character is unread, because it could be part of the next lexeme. If successful, the loop returns a list of characters (natural numbers), which the caller combines with the first character to form a string. This is an ASCII string by construction, so the characters all satisfy (unsigned-byte-p 7), but we use (unsigned-byte-p 8) in the guard of this function and in the return type of the loop, because nats=>string has that as guard (more precisely, lists of that). If the ASCII string is a keyword, we return a keyword token. Otherwise, we return an identifier token.

    Definitions and Theorems

    Function: lex-identifier/keyword-loop

    (defun lex-identifier/keyword-loop (pos-so-far parstate)
      (declare (xargs :stobjs (parstate)))
      (declare (xargs :guard (and (positionp pos-so-far)
                                  (parstatep parstate))))
      (let ((__function__ 'lex-identifier/keyword-loop))
        (declare (ignorable __function__))
        (b* (((reterr) nil (irr-position) parstate)
             ((erp char pos parstate)
              (read-char parstate))
             ((when (not char))
              (retok nil (position-fix pos-so-far)
                     parstate))
             ((unless (or (and (<= (char-code #\A) char)
                               (<= char (char-code #\Z)))
                          (and (<= (char-code #\a) char)
                               (<= char (char-code #\z)))
                          (and (<= (char-code #\0) char)
                               (<= char (char-code #\9)))
                          (= char (char-code #\_))))
              (b* ((parstate (unread-char parstate)))
                (retok nil (position-fix pos-so-far)
                       parstate)))
             ((erp chars last-pos parstate)
              (lex-identifier/keyword-loop pos parstate)))
          (retok (cons char chars)
                 last-pos parstate))))

    Theorem: return-type-of-lex-identifier/keyword-loop.chars

    (defthm return-type-of-lex-identifier/keyword-loop.chars
      (b* (((mv acl2::?erp
                ?chars ?last-pos ?new-parstate)
            (lex-identifier/keyword-loop pos-so-far parstate)))
        (unsigned-byte-listp 8 chars))
      :rule-classes :rewrite)

    Theorem: positionp-of-lex-identifier/keyword-loop.last-pos

    (defthm positionp-of-lex-identifier/keyword-loop.last-pos
      (b* (((mv acl2::?erp
                ?chars ?last-pos ?new-parstate)
            (lex-identifier/keyword-loop pos-so-far parstate)))
        (positionp last-pos))
      :rule-classes :rewrite)

    Theorem: parstatep-of-lex-identifier/keyword-loop.new-parstate

    (defthm parstatep-of-lex-identifier/keyword-loop.new-parstate
      (implies (parstatep parstate)
               (b* (((mv acl2::?erp
                         ?chars ?last-pos ?new-parstate)
                     (lex-identifier/keyword-loop pos-so-far parstate)))
                 (parstatep new-parstate)))
      :rule-classes :rewrite)

    Theorem: parsize-of-lex-identifier/keyword-loop-<=

    (defthm parsize-of-lex-identifier/keyword-loop-<=
      (b* (((mv acl2::?erp
                ?chars ?last-pos ?new-parstate)
            (lex-identifier/keyword-loop pos-so-far parstate)))
        (<= (parsize new-parstate)
            (parsize parstate)))
      :rule-classes :linear)

    Function: lex-identifier/keyword

    (defun lex-identifier/keyword (first-char first-pos parstate)
     (declare (xargs :stobjs (parstate)))
     (declare (xargs :guard (and (unsigned-byte-p 8 first-char)
                                 (positionp first-pos)
                                 (parstatep parstate))))
     (declare (xargs :guard (or (and (<= (char-code #\A) first-char)
                                     (<= first-char (char-code #\Z)))
                                (and (<= (char-code #\a) first-char)
                                     (<= first-char (char-code #\z)))
                                (= first-char (char-code #\_)))))
     (let ((__function__ 'lex-identifier/keyword))
      (declare (ignorable __function__))
      (b* (((reterr)
            (irr-lexeme)
            (irr-span)
            parstate)
           ((erp rest-chars last-pos parstate)
            (lex-identifier/keyword-loop first-pos parstate))
           (span (make-span :start first-pos
                            :end last-pos))
           (chars (cons first-char rest-chars))
           (string (nats=>string chars)))
       (if
        (or
         (member-equal string
                       '("auto" "break" "case"
                                "char" "const" "continue" "default" "do"
                                "double" "else" "enum" "extern" "float"
                                "for" "goto" "if" "inline" "int" "long"
                                "register" "restrict" "return" "short"
                                "signed" "sizeof" "static" "struct"
                                "switch" "typedef" "union" "unsigned"
                                "void" "volatile" "while" "_Alignas"
                                "_Alignof" "_Atomic" "_Bool" "_Complex"
                                "_Generic" "_Imaginary" "_Noreturn"
                                "_Static_assert" "_Thread_local"))
         (and (parstate->gcc parstate)
              (member-equal
                   string
                   '("__alignof" "__alignof__" "asm" "__asm"
                                 "__asm__" "__attribute" "__attribute__"
                                 "__auto_type" "__builtin_offsetof"
                                 "__builtin_types_compatible_p"
                                 "__builtin_va_arg"
                                 "__builtin_va_list" "__declspec"
                                 "__extension__" "_Float32" "_Float32x"
                                 "_Float64" "_Float64x" "_Float128"
                                 "_Float128x" "__inline" "__inline__"
                                 "__int128" "__restrict" "__restrict__"
                                 "__signed" "__signed__" "__stdcall"
                                 "typeof" "__typeof" "__typeof__"
                                 "__volatile" "__volatile__"))))
        (retok (lexeme-token (token-keyword string))
               span parstate)
        (retok (lexeme-token (token-ident (ident string)))
               span parstate)))))

    Theorem: lexemep-of-lex-identifier/keyword.lexeme

    (defthm lexemep-of-lex-identifier/keyword.lexeme
      (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate)
            (lex-identifier/keyword first-char first-pos parstate)))
        (lexemep lexeme))
      :rule-classes :rewrite)

    Theorem: spanp-of-lex-identifier/keyword.span

    (defthm spanp-of-lex-identifier/keyword.span
      (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate)
            (lex-identifier/keyword first-char first-pos parstate)))
        (spanp span))
      :rule-classes :rewrite)

    Theorem: parstatep-of-lex-identifier/keyword.new-parstate

    (defthm parstatep-of-lex-identifier/keyword.new-parstate
     (implies
          (parstatep parstate)
          (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate)
                (lex-identifier/keyword first-char first-pos parstate)))
            (parstatep new-parstate)))
     :rule-classes :rewrite)

    Theorem: parsize-of-lex-identifier/keyword-uncond

    (defthm parsize-of-lex-identifier/keyword-uncond
      (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate)
            (lex-identifier/keyword first-char first-pos parstate)))
        (<= (parsize new-parstate)
            (parsize parstate)))
      :rule-classes :linear)