%{
/* Lexical analyzer for our project compiler for C. Note that CONSTANTS
 * are all positive in order to avoid confusions (to prevent a-1 from
 * being interpreted as NAME CONSTANT rather than NAME MINUS CONSTANT.
 * This file is modelled after Holubs "COMPILER DESIGN IN C", p 829.
 */

#include "types.h"
#include "y.tab.h"
#include "tree.h"
#include "symtbl.h"
#include <stdlib.h>

#define   YYDEBUG

#undef    YY_BREAK
#define   YY_BREAK ;

%}

let           [_a-zA-Z]
alnum         [_a-zA-Z0-9]
h             [0-9a-fA-F]
o             [0-7]
d             [0-9]
suffix        [UuLl]
white         [ ]
escape        [abfnrtv\\\'\"\?]
octal         {o}({o}{o}?)?
hex           [x|X]{h}({h}{h}?)?
escape_seq    {escape}|{octal}|{hex}

%%
"/*"                         {
			       int i, last_i = 0;
			       while ((i = input()) && (i != EOF)) {
					if (i == '/' && last_i == '*' )
						break;
					last_i = i;
			       }
                               if (i == 0 || i == EOF)
                                  printf("End of file in comment \n");
                             }
\"((\\\")|[^\"\n\r])*\"     {  yylval.info.tree_info = create_node("STRING CONST");
			       add_child(yylval.info.tree_info,create_node(yytext));
                               return  STRING; }
\"((\\\")|[^\"])*[\r\n] {
                              printf("Adding missing quote to string constant\n");
			      yylval.info.tree_info = create_node("STRING CONST");
			      add_child(yylval.info.tree_info,create_node(yytext));
                               return  STRING; }

'.'                     |    /* 'a','b', etc                              */
'\\{escape}'            |    /* '\t','\f', etc                            */
'\\{octal}'             |    /* '\123', '\12', '\1'                       */
'\\{hex}'                    /* '\x123', '\x12', '\x1'                    */
                           {  yylval.info.tree_info = create_node("CHAR CONST");
			      add_child(yylval.info.tree_info,create_node(yytext));
			      return CHARACTER_CONSTANT; }
0{o}*{suffix}?          |    /* 0, 01, 012, 012L, etc                     */
0x{h}+{suffix}?         |    /* 0x1, 0x12, 0x12L, etc                     */
0X{h}+{suffix}?         |    /* 0X1, 0X12, 0X12L, etc                     */
[1-9]{d}*{suffix}?          { yylval.info.tree_info = create_node("INTEGER CONST");
			      add_child(yylval.info.tree_info,create_node(yytext));
                              return  INTEGER_CONSTANT; }

({d}+\.{d}*|{d}*\.{d}+)([eE][-+]?{d}+)?[lLfF]? { yylval.info.tree_info = create_node("FLOAT CONST");
						 add_child(yylval.info.tree_info,
								create_node(yytext));
						return  FLOATING_CONSTANT; }
({d}+)([eE][-+]?{d}+)[lLfF]?   { yylval.info.tree_info = create_node("FLOAT CONST");
				 add_child(yylval.info.tree_info,create_node(yytext));
				return  FLOATING_CONSTANT; }

"("                         {  yylval.info.tree_info = create_node("LEFT_PAREN"); 
				return  LEFT_PARENTH; }
")"                         {  yylval.info.tree_info = create_node("RIGHT_PAREN"); 
				return  RIGHT_PARENTH; }
"{"                         {  yylval.info.tree_info = create_node("LEFT_BRACE"); 
				return  LEFT_BANANA; }
"}"                         {  yylval.info.tree_info = create_node("RIGHT_BRACE"); 
				return  RIGHT_BANANA; }
"["                         {  yylval.info.tree_info = create_node("LEFT_BRACKET"); 
				return  LEFT_BRACKET; }
"]"                         {  yylval.info.tree_info = create_node("RIGHT_BRACKET"); 
				return  RIGHT_BRACKET; }

"->"                        {  yylval.info.tree_info = create_node("ARROW"); 
				return  ARROW; }
"."                         {  yylval.info.tree_info = create_node("POINT"); 
				return  POINT; }
"++"                        {  yylval.info.tree_info = create_node("PLUS_PLUS"); 
				return  PLUS_PLUS; }
"--"                        {  yylval.info.tree_info = create_node("MINUS_MINUS"); 
				return  MINUS_MINUS; }
"~"                         {  yylval.info.tree_info = create_node("TILDE"); 
				return  TILDE;  }
"!"                         {  yylval.info.tree_info = create_node("EXCLAMATION"); 
				return  EXCLAMATION; }
"*"                         {  yylval.info.tree_info = create_node("ASTERISK"); 
				return  ASTERISK;  }
"/"                         {  yylval.info.tree_info = create_node("SLASH"); 
				return  SLASH; }
"%"                         {  yylval.info.tree_info = create_node("PERCENT"); 
				return  PERCENT; }
"+"                         {  yylval.info.tree_info = create_node("PLUS"); 
				return  PLUS;  }
"-"                         {  yylval.info.tree_info = create_node("MINUS"); 
				return  MOINUS; }
">>"                        {  yylval.info.tree_info = create_node("RIGHT_SHIFT"); 
				return  RIGHT_SHIFT; }
"<<"                        {  yylval.info.tree_info = create_node("LEFT_SHIFT"); 
				return  LEFT_SHIFT; }
"<"                         {  yylval.info.tree_info = create_node("LESS_THAN"); 
				return  LESS; }   
">"                         {  yylval.info.tree_info = create_node("GREATER_THAN"); 
				return  MORE; }   
"<="                        {  yylval.info.tree_info = create_node("LESS_EQUAL"); 
				return  LESS_EQUAL; }   
">="                        {  yylval.info.tree_info = create_node("GREATER_EQUAL"); 
				return  GREATER_EQUAL; }   
"!="                        {  yylval.info.tree_info = create_node("NOT_EQUAL"); 
				return  NOT_EQUAL; }   
"=="                        {  yylval.info.tree_info = create_node("EQUAL_EQUAL"); 
				return  EQUAL_EQUAL; }   
"-="                        {  yylval.info.tree_info = create_node("MINUS_EQUAL"); 
				return  MINUS_EQUAL; }
"/="                        {  yylval.info.tree_info = create_node("DIVIDE_EQUAL"); 
				return  DIVIDE_EQUAL; }
"%="                        {  yylval.info.tree_info = create_node("REMAINDER_EQUAL"); 
				return  REMAINDER_EQUAL; }
"+="                        {  yylval.info.tree_info = create_node("PLUS_EQUAL"); 
				return  PLUS_EQUAL; }
"&="                        {  yylval.info.tree_info = create_node("AND_EQUAL"); 
				return  AND_EQUAL; }
"|="                        {  yylval.info.tree_info = create_node("OR_EQUAL"); 
				return  OR_EQUAL; }
"^="                        {  yylval.info.tree_info = create_node("XOR_EQUAL"); 
				return  XOR_EQUAL; }
"*="                        {  yylval.info.tree_info = create_node("TIMES_EQUAL"); 
				return  MULTIPLY_EQUAL; }
"<<="                       {  yylval.info.tree_info = create_node("LSHIFT_EQUAL"); 
				return  LEFT_SHIFT_EQUAL; } 
">>="                       {  yylval.info.tree_info = create_node("RSHIFT_EQUAL"); 
				return  RIGHT_SHIFT_EQUAL; } 
"="                         {  yylval.info.tree_info = create_node("EQUAL"); 
				return  EQUAL; }
"&"                         {  yylval.info.tree_info = create_node("AMPERSAND"); 
				return  AMPERSAND; }
"^"                         {  yylval.info.tree_info = create_node("CIRCUMFLEX"); 
				return  CIRCONFLEX; }
"|"                         {  yylval.info.tree_info = create_node("OR"); 
				return  OR;  }
"&&"                        {  yylval.info.tree_info = create_node("AND_AND"); 
				return  AND_AND; }
"||"                        {  yylval.info.tree_info = create_node("OR_OR"); 
				return  OR_OR;  }
"?"                         {  yylval.info.tree_info = create_node("QUESTION"); 
				return  QUESTION; }
":"                         {  yylval.info.tree_info = create_node("COLON"); 
				return  COLON; }
","                         {  yylval.info.tree_info = create_node("COMMA"); 
				return  COMMA; }
";"                         {  yylval.info.tree_info = create_node("SEMICOLON"); 
				return  SEMICOLON;  }
"..."                       {  yylval.info.tree_info = create_node("ELLIPSIS"); 
				return  ELLIPSIS; }
{let}{alnum}*               { 
                              {
                                 int i,j;
                                 struct SymbolTableNode *p;
      
                                 i = id_or_keyword(yytext);
                                 if (i == IDENTIFIER)
                                   {
                                     j = Lookup(yytext);
                                     p = (struct SymbolTableNode *) j;
                                     if (j == 0)
                                       {
                                         j = Insert(yytext);
                                         p = (struct SymbolTableNode *) j;
                                         p->count = 1;
                                       }
                                     else
                                         p->count = (p->count) + 1;
				     /* see if we've got a typedef name, and */
				     /* build a tree for the token.          */
                                     if (p->symtype == TYPEDEF_T) {
					 i = TYPEDEF_NAME;
					 yylval.info.tree_info = create_node("TYPEDEF_NAME");
				     }
				     else {
					 yylval.info.tree_info = create_node("IDENTIFIER");
				     }
				     add_child(yylval.info.tree_info,create_node(yytext));
				     yylval.info.symptr = p;
                                    };
                                  return i;
                                };
			    }
[ \r\n\t]+                  { break;     }/* ignore white space                  */
.                           { printf("Illegal character <%s> \n",yytext);
			      printf("   (that is to say, %x)\n",yytext[0]);
                              break;    }
%%

/* ---------------------------------------------------------------------  */

typedef struct               /* routine to recognize keywords             */
{  char *name;
    int   val;
}
KWORD;

KWORD  KTab[] =              /* alphabetic keywords                       */
{
   {"auto",      AUTO      },
   {"break",     BREAK      },
   {"case",      CASE       },
   {"char",      CHAR       },
   {"const",	 CONST	    },
   {"continue",  CONTINUE   },
   {"default",   DEFAULT    },
   {"do",        DO         },
   {"double",    DOUBLE       },
   {"else",      ELSE       },
   {"enum",      ENUM       },
   {"extern",    EXTERN      },
   {"float",     FLOAT       },
   {"for",       FOR        },
   {"goto",      GOTO       },
   {"if",        IF         },
   {"int",       INT       },
   {"long",      LONG       },
   {"neighbour", NEIGHBOUR  },
   {"plural",    PLURAL     },
   {"receive",   RECEIVE    },
   {"register",  REGISTER      },
   {"return",    RETURN     },
   {"send",      SEND       },
   {"shared",    SHARED     },
   {"short",     SHORT       },
   {"signed",    SIGNED       },
   {"sizeof",    SIZEOF     },
   {"static",    STATIC      },
   {"struct",    STRUCT     },
   {"switch",    SWITCH     },
   {"typedef",   TYPEDEF      },
   {"union",     C_UNION     },
   {"unsigned",  UNSIGNED       },
   {"vector",    VECTOR     },
   {"void",      VOID       },
   {"volatile",  VOLATILE       },
   {"while",     WHILE      },

};

int  cmp_str (a,b)
KWORD *a, *b;
{ return  strcmp(a->name, b->name);
}

int  cmp_val (a,b)
KWORD *a, *b;
{ return  (a->val > b->val);
}

int  id_or_keyword (lex)     /* do a binary search for a possible keyword*/
char  *lex;                  /* in KTab. Return the token if it is in the*/
{                            /* table, IDENTIFIER otherwise.             */
    KWORD  *p;
    KWORD  dummy;

    dummy.name = lex;
    p = (KWORD *) bsearch(&dummy,KTab, sizeof(KTab) / sizeof(KWORD),
                   sizeof(KWORD), cmp_str);

    /* build a node for keywords; identifiers and typedef names will get */
    /* build by the pattern matching code above.			 */
    if (p != NULL) {
	yylval.info.tree_info = create_node(p->name);
    }
    return(p ? p->val : IDENTIFIER);

}

#include "symtbl.c"
