Bugs Catalog
AI/BL_Tokenizing_Machine/Kernel.h
Copyright © 2011, Reusable Software Research Group, The Ohio State University

//  /*----------------------------------------------------------------------*\
//  |   Abstract Instance : BL_Tokenizing_Machine_Kernel
//  \*----------------------------------------------------------------------*/

#ifndef AI_BL_TOKENIZING_MACHINE_KERNEL
#define AI_BL_TOKENIZING_MACHINE_KERNEL 1

///------------------------------------------------------------------------
/// Interface -------------------------------------------------------------
///------------------------------------------------------------------------

enumeration Token_Types
{
    KEYWORD,
    IDENTIFIER,
    CONDITION,
    WHITE_SPACE,
    COMMENT,
    ERROR
};
    
///------------------------------------------------------------------------

abstract_instance
class BL_Tokenizing_Machine_Kernel
{
public:
    
    /*!
        //  some general operations on strings
        
        math definition LAST_OF (
                s: string of character
            ): string of character satisfies
            if s = empty_string
                then LAST_OF (s) = empty_string
            else there exists c: character, rest_of_s: string of character
                     (s = rest_of_s * <c> and
                      LAST_OF (s) = <c>)

        math definition ALL_BUT_LAST_OF (
                s: string of character
            ): string of character satisfies
            s = ALL_BUT_LAST_OF (s) * LAST_OF (s)

        math definition PREFIX (
                s_set: set of string of character
            ): set of string of character is
            {x: string of character where
                 (there exists y: string of character
                      (x * y is in s_set))}

        // domain specific definitions

        math definition IS_KEYWORD (
                s: string of character
            ): boolean is
            s is in {"BEGIN", "DO", "ELSE", "END", "INSTRUCTION",
                     "IF", "IS", "PROGRAM", "THEN", "WHILE"}

        math definition IS_CONDITION_NAME (
                s: string of character
            ): boolean is
            s is in {"next-is-empty", "next-is-not-empty",
                     "next-is-enemy", "next-is-not-enemy",
                     "next-is-friend", "next-is-not-friend",
                     "next-is-wall", "next-is-not-wall",
                     "random", "true"}

        math definition IS_LETTER (
                c: character
            ): boolean is
            c is in {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
                     'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
                     's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A',
                     'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
                     'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
                     'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}

        math definition IS_DIGIT (
                c: character
            ): boolean is
            c is in {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}

        math definition IS_IDENTIFIER (
                s: string of character
            ): boolean is
            there exists c: character, rest: string of character
                (s = <c> * rest and
                 IS_LETTER (c) and
                 for all x: character where (x is in elements (rest))
                     (IS_LETTER (x) or IS_DIGIT (x) or x = '-')) and
            not IS_KEYWORD (s) and
            not IS_CONDITION_NAME (s)

        math definition IS_WHITE_SPACE (
                s : string of character
            ): boolean is
            s /= empty_string  and
            elements (s) is subset of {'\n', '\t', ' '}
                
        math definition IS_COMMENT (
                s : string of character
            ): boolean is
            first (s) = '#' and
            '\n' is not in elements (s)
                
        math definition OK_STRINGS: set of string of character is
            {s: string of character where (IS_KEYWORD (s))} union
            {s: string of character where (IS_IDENTIFIER (s))} union
            {s: string of character where (IS_CONDITION_NAME (s))} union
            {s: string of character where (IS_WHITE_SPACE (s))} union
            {s: string of character where (IS_COMMENT (s))}

        math definition IS_COMPLETE_TOKEN_TEXT (
                s: string of character
                c: character
            ): boolean is
            (s is in OK_STRINGS and s * <c> is not in OK_STRINGS) or
            (<c> is in PREFIX (OK_STRINGS) and
             s * <c> is not in PREFIX (OK_STRINGS))

        math definition WHICH_KIND (
                s: string of character
            ): integer satisfies
            if IS_KEYWORD (s)
            then
                WHICH_KIND (s) = KEYWORD
            else if IS_IDENTIFIER (s)
            then
                WHICH_KIND (s) = IDENTIFIER
            else if IS_CONDITION_NAME (s)
            then
                WHICH_KIND (s) = CONDITION
            else if IS_WHITE_SPACE (s)
            then
                WHICH_KIND (s) = WHITE_SPACE
            else if IS_COMMENT (s)
            then
                WHICH_KIND (s) = COMMENT
            else
                WHICH_KIND (s) = ERROR

        math definition REMOVE_SEPARATORS (
                s: string of string of character
            ): string of string of character satisfies
            if s = empty_string
            then
                REMOVE_SEPARATORS (s) = empty_string
            else
                there exists t: string of character,
                             rest: string of string of character
                    (s = <t> * rest and
                     if IS_WHITE_SPACE (t) or IS_COMMENT (t)
                     then
                         REMOVE_SEPARATORS (s) = REMOVE_SEPARATORS (rest)
                     else
                         REMOVE_SEPARATORS (s) =
                             <t> * REMOVE_SEPARATORS (rest))

        math definition TOKENIZE_PROGRAM_TEXT (
                s: string of character
            ): string of string of character satisfies
            if s = empty_string
            then
                TOKENIZE_PROGRAM_TEXT (s) = empty_string
            else
                there exists t, rest: string of character
                    (s = t * rest and
                     t /= empty_string and
                     (if rest /= empty_string
                      then
                          there exists c: character,
                                       rest_of_rest: string of character
                              (rest = <c> * rest_of_rest and
                               IS_COMPLETE_TOKEN_TEXT (t, c))) and
                     (for all p, q: string of character, c: character
                      where (t = p * <c> * q)
                          (not IS_COMPLETE_TOKEN_TEXT (p, c))) and
                     TOKENIZE_PROGRAM_TEXT (s) =
                         <t> * TOKENIZE_PROGRAM_TEXT (rest))

        math subtype BL_TOKENIZING_MACHINE_MODEL is (
                buffer: string of character
                ready_to_dispense: boolean
            )
            exemplar m
            constraint
                m.ready_to_dispense =
                    there exists c: character
                       (LAST_OF (m.buffer) = <c> and
                        IS_COMPLETE_TOKEN_TEXT (ALL_BUT_LAST_OF (m.buffer), c))
    !*/

    standard_abstract_operations (BL_Tokenizing_Machine_Kernel);
    /*!
        BL_Tokenizing_Machine_Kernel is modeled by BL_TOKENIZING_MACHINE_MODEL
        initialization ensures
            self.buffer = empty_string and
            self.ready_to_dispense = false
    !*/

    procedure Insert (
            preserves Character ch
        ) is_abstract;
    /*!
	requires
	    self.ready_to_dispense = false
	ensures
	    self.buffer = #self.buffer * <ch> and
	    self.ready_to_dispense =
			IS_COMPLETE_TOKEN_TEXT (#self.buffer, ch)
    !*/

    procedure Dispense (
            produces Text& token_text,
            produces Integer& token_kind
        ) is_abstract;
    /*!
	requires
	    self.ready_to_dispense = true
	ensures
	    token_text = ALL_BUT_LAST_OF (#self.buffer) and
	    token_kind = WHICH_KIND (token_text) and
	    self.buffer = LAST_OF (#self.buffer) and
	    self.ready_to_dispense = false
    !*/

    procedure Flush_A_Token (
            produces Text& token_text,
            produces Integer& token_kind
        ) is_abstract;
    /*!
	requires
	    self.ready_to_dispense = false
	ensures
	    token_text = #self.buffer and
	    token_kind = WHICH_KIND (token_text) and
	    self.buffer = empty_string and
	    self.ready_to_dispense = false
    !*/

    function Boolean Is_Ready_To_Dispense ( ) is_abstract;
    /*!
	ensures
	    Is_Ready_To_Dispense = self.ready_to_dispense
    !*/

    function Integer Size ( ) is_abstract;
    /*!
	ensures
	    Size = |self.buffer|
    !*/

};

#endif // AI_BL_TOKENIZING_MACHINE_KERNEL

Last modified: Wed Feb 16 12:17:11 EST 2011