/* proj1.yac */
/* Author: Edward A. Green */
/* Description: YACC portion of Compiler Design (66.648): Project 1 */

%{
#include <stdio.h>
#include <string.h>

/* The parse stack type is a pointer to void in order that rule can */
/* return two types of items: strings and lists of strings. */
typedef void *STK_TYP;
#define YYSTYPE STK_TYP

/* A handy macro for printing quads (the label and fields 1-4). */
#define PRINT_QUAD(a,b,c,d,e)	printf("%s\t%s\t%s\t%s\t%s\n",a,b,c,d,e)

/* miscellaneous character variables */
char *temp, *temp1, *temp2;

/* Symbol table structure, kept as a doubly linked list (stack). */
/* Symbol types are:
/*    int, real, int ary, real ary, unknown, untyped procedure or program. */
typedef struct SYM
  {
  char *block;           /* name of enclosing block */
  char *name;            /* name of this symbol */
  char type;             /* i,r,I,R,u,' ': codes for symbol types */
  int  start, end;       /* array bounds */
  int  offset;           /* location of this symbol */
  int  nparms;           /* number of formal parameters for funcs, procs */
  struct SYM  *parmlist; /* list of formal parameters (NULL for variables) */
  struct SYM  *next;     /* pointer to next symbol in the list */
  struct SYM  *last;     /* pointer to last symbol in the list */
  } SYM;

/* pointer tot he base of the entire symbol table */
SYM  *prog = NULL;

/* pointer to the symbol of the subprogram currently being parsed: */
SYM  *subprog = NULL;

/* current top of symbol stack */
SYM  *top  = NULL;

/* miscellaneous symbol variables */
SYM  *sym1 = NULL;
SYM  *sym2 = NULL;

/* counter for offset locations for variables */
int offset = 0;

/* A type for a list of strings: a pointer to this type may also be */
/* passed on the parse stack. */
typedef struct STR_LST
  {
  char *str1;
  struct STR_LST *next;
  } STR_LST;
STR_LST *lbl_stk = NULL;
STR_LST *list1, *list2;

int tv_cnt = 0;    /* temporary variable counter */
int lbl_cnt = 0;   /* label counter */
%}
%token PGM VAR ARY OF INT REAL FUNC PROC BGN END IF THEN ELSE
%token WHILE DO NOT DIV MOD AND OR ID NUM DD ASGN NE LE GE LP RP
%token SC PE CO CL LB RB PL MI EQ LT GT MU DI
%left PL MI OR
%left MU DI DIV MOD AND
%left UMIN
%start prog
%%
prog	:	PGM ID LP idlist 
			{
		/* generate the program symbol (bottom of the stack) */
			prog = new_sym();
			top = prog;
			top->block=strsave($2);
			top->name=strsave($2);
			top->parmlist=top;
		/* process STR_LST structure returned from idlist */
		/* add parameters returned from idlist to sym. table */
		/* 1st: transform the circular list to a linear list */
			list1=((STR_LST *)$4)->next;
			((STR_LST *)$4)->next=NULL;
			while (list1!=NULL)
			  {
			  sym1 = new_sym();
			  if (top->parmlist==top) top->parmlist=sym1;
			  top->next = sym1;
			  sym1->last = top;
			  top=sym1;
			  top->block = strsave(prog->block);
			  top->name = list1->str1;
			  top->type = 'u';
			  prog->nparms++;
			  list2=list1;
			  list1=list2->next;
			  free(list2);
			  }
			}
		RP SC decs spdecs 
			{
		/* All symbols have been collected for the program. */
		/* Report them now. */
			print_sym(prog);
			PRINT_QUAD("_start","NOP","","","");
			}
		cpdstmt PE 
			{
			PRINT_QUAD("","CALL","","","_exit");
			}
	;
idlist
		/* Returns the circularly linked STR_LST which has the */
		/* detected identifiers.  The returned pointer will point */
		/* to the last identifier in the list.  */
	:	ID
			{
			list1=(STR_LST *)malloc(sizeof(STR_LST));
			list1->str1 = $1;
			list1->next = list1;
			$$ = list1;
			}
	|	idlist CO ID
			{
			list1=(STR_LST *)malloc(sizeof(STR_LST));
			list1->str1=$3;
			list1->next=((STR_LST *)$1)->next;
			((STR_LST *)$1)->next=list1;
			$$=list1;
			}
	;
decs
		/* Process the idlists; stack the identifiers on the symbol */
		/* stack with the type picked up here. */
	:
			{$$ = NULL;}
	|	decs VAR idlist CL type SC
			{
		/* 1st: transform the circular list into a linear linked list */
			list1=((STR_LST *)$3)->next;
			((STR_LST *)$3)->next=NULL;
			while (list1!=NULL)
			  {
			  add_sym(list1->str1,$5);
			  list2=list1;
			  list1=list2->next;
			  free(list2);
			  }
			}
	;
type
		/* return a string which reflects the type */
	:	stype	
			{ $$=$1; }
	|	ARY LB NUM DD NUM RB OF stype
			{
			$$=(void *)malloc(strlen("ary ")+strlen($3)+strlen($5)+
					  strlen($8)+10);
			strcpy($$,"ary ");
			strcat($$,$3);
			strcat($$," ");
			strcat($$,$5);
			strcat($$," ");
			strcat($$,$8);
			}
	;
stype
		/* return a string which reflects the simple type */
	:	INT
			{
			$$=(char *)malloc(4);
			strcpy($$,"int");
			}
	|	REAL
			{
			$$=(char *)malloc(5);
			strcpy($$,"real");
			}
	;
spdecs
	:
	|	spdecs spdec SC
	;
spdec
	:	sphead decs
			{
		/* print out the subprogram's symbol table */
			print_sym(subprog);
		/*
			temp=next_lbl();
			printf("*\n*entry point is %s\n",temp);
			PRINT_QUAD(temp,"NOP","","","");
		*/
			PRINT_QUAD(subprog->name,"NOP","","","");
			}
		 cpdstmt
			{
			int i;
		/* End the subroutine with a return statement */
			PRINT_QUAD("","RTN","","","");
		/* Adjust symbol table so the next subroutine or the */
		/* main program can be parsed next.  Drop local variables */
		/* while keeping parameter data. */
			top=subprog;
			for (i=0; i<top->nparms; i++)
			  {
			  subprog=subprog->next;
			  }
			sym1=subprog->next;
			top->next=NULL;
			subprog->next=NULL;
			free_sym_chain(sym1);
			}
	;
sphead
	:	FUNC ID
			{
		/* Set up a new symbol for the function.  Update the */
		/* subprogram symbol pointer. */
			subprog=new_sym();
			top->next=subprog;
			subprog->last=top;
			top=subprog;
			top->block=strsave($2);
			top->name=strsave($2);
			top->offset=offset;
			top->parmlist=top;
			}
		args CL stype SC
			{
		/* Set up the return value type for the subprogram. */
			subprog->type=((char *)$6)[0];
			}
	|	PROC ID
			{
		/* Set up a new symbol for the procedure.  Update the */
		/* subprogram symbol pointer. */
			subprog=new_sym();
			top->next=subprog;
			subprog->last=top;
			top=subprog;
			top->block=strsave($2);
			top->name=strsave($2);
			top->offset=offset;
		/* Make the parmlist not NULL.  One way procedures and */
		/* functions are distinguished from other symbols is that */
		/* the parmlist pointer is not NULL, even though there may */
		/* may be no parmeter list. */
			top->parmlist=top;
			}
		args SC
	;
args
	:
	|	LP parlist RP
			{
			$$=$2;
			}
	;
parlist
	:	idlist CL type 
			{
		/* Process the identifier list.  Make symbols for the */
		/* parameters, linking them to both the stack top and */
		/* the subprogram parameter list. */
			list1=((STR_LST *)$1)->next;
			((STR_LST *)$1)->next=NULL;
			while (list1!=NULL)
			  {
			  add_sym(list1->str1,$3);
			  subprog->nparms++;
			  list2=list1;
			  list1=list1->next;
			  free(list2);
			  }
		/* This will be the first parameter in the list.  Set up */
		/* the parameter pointer to this parameter. */
			subprog->parmlist=subprog->next;
			$$=NULL;
			}
	|	parlist SC idlist CL type 
			{
		/* Process the identifier list.  Make symbols for the */
		/* parameters, linking them to both the stack top and */
		/* the subprogram parameter list. */
			list1=((STR_LST *)$3)->next;
			((STR_LST *)$3)->next=NULL;
			while (list1!=NULL)
			  {
			  add_sym(list1->str1,$5);
			  subprog->nparms++;
			  list2=list1;
			  list1=list1->next;
			  free(list2);
			  }
			$$=NULL;
			}
	;
cpdstmt
	:	BGN opstmt END
	;
opstmt
	:
	|	stmtlist
	;
stmtlist
	:	stmt
	|	stmtlist SC stmt
	;
stmt
	:	ID LB exp RB ASGN exp
			{
		/* Assignment to an array element. */
			PRINT_QUAD("","[]=",$1,$3,$6);
			}
	|	ID ASGN exp
			{
		/* Assignment to a scalar. */
			PRINT_QUAD("","MOV",$3,"",$1);
			}
	|	procstmt
	|	cpdstmt
	|	ifstmt
	|	WHILE
			{
		/* Label loop top and stack the label on the label stack. */
			temp=next_lbl();
			push_lbl(temp);
			PRINT_QUAD(temp,"NOP","","","");
			}
		exp 
			{
		/* Check for end of loop.  Stack the branch label. */
			temp=next_lbl();
			push_lbl(temp);
			PRINT_QUAD("","JZ",$3,"",temp);
			}
		DO stmt
			{
		/* Pop the 2 saved labels.  Unconditional jump to the */
		/* 1st label, label a NOP statement with the second. */
			temp1=pop_lbl();
			temp2=pop_lbl();
			PRINT_QUAD("","JMP","","",temp2);
			PRINT_QUAD(temp1,"NOP","","","");
			free(temp1);
			free(temp2);
			}
	;
ifstmt
	:	IF exp
			{
		/* Handle labels for the ifstmt */
			temp=next_lbl();
			push_lbl(temp);
			PRINT_QUAD("","JZ",$2,"",temp);
			}
		THEN stmt
			{
			temp1=next_lbl();
			PRINT_QUAD("","JMP","","",temp1);
			temp=pop_lbl();
			PRINT_QUAD(temp,"NOP","","","");
			push_lbl(temp1);
			free(temp);
			}
		estmt
	;
estmt
	:
			{
		/* Handle lables for if without else. */
			temp=pop_lbl();
			PRINT_QUAD(temp,"NOP","","","");
			free(temp);
			}
	|	ELSE stmt
			{
		/* Handle labels for if with else. */
			temp=pop_lbl();
			PRINT_QUAD(temp,"NOP","","","");
			free(temp);
			}
var
	:	ID
			{
		/* Parser stack the value of the identifier returned by Lex */
			$$=(void *)malloc(strlen(yylval)+1);
			strcpy($$,yylval);
			}
	|	ID
			{
		/* Save identifier of an array on the label stack */
			push_lbl(yylval);
			}
		LB exp RB
			{
		/* Make a temporary variable equal to the evaluation of the */
		/* array reference. */
			temp=pop_lbl();
			temp1=next_tv();
			PRINT_QUAD("","[]",temp,$4,temp1);
			$$=temp1;
			free(temp);
			}
	;
procstmt
	:	ID
			{
		/* Call the procedure */
			PRINT_QUAD("","CALL","","",$1);
			}
	|	ID LP explist RP
			{
		/* "PUSH" variables before calling procedure */
			list1=((STR_LST *)$3)->next;
			((STR_LST *)$3)->next=NULL;
			while (list1!=NULL)
			  {
			  PRINT_QUAD("","PUSH","","",list1->str1);
			  list2=list1->next;
			  free(list1);
			  list1=list2;
			  }
			PRINT_QUAD("","CALL","","",$1);
			}
	;
explist
	:	exp
			{
		/* Start building a circular list of expressions for passing */
			list1=(STR_LST *)malloc(sizeof(STR_LST));
			list1->str1 = $1;
			list1->next = list1;
			$$ = list1;
			}
	|	explist CO exp
			{
		/* Add to the circular list of expressions for parm passing */
			list1=(STR_LST *)malloc(sizeof(STR_LST));
			list1->str1=$3;
			list1->next=((STR_LST *)$1)->next;
			((STR_LST *)$1)->next=list1;
			$$=list1;
			}
	;
exp
	:	sexp
	|	sexp EQ sexp
			{
		/* Expand the '=' operation with our set of quad operations */
			temp=next_lbl();
			temp1=next_tv();
			temp2=next_tv();
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD("","SUB",$1,$3,temp2);
			PRINT_QUAD("","JZ",temp2,"",temp);
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	|	sexp NE sexp
			{
		/* Expand the '<>' operation with our set of quad operations */
			temp1=next_tv();
			temp2=next_tv();
			temp=next_lbl();
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD("","SUB",$1,$3,temp2);
			PRINT_QUAD("","JZ",temp2,"",temp);
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	|	sexp GT sexp
			{
		/* Expand the '>' operation with our set of quad operations */
			temp1=next_tv();
			temp2=next_tv();
			temp=next_lbl();
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD("","SUB",$1,$3,temp2);
			PRINT_QUAD("","JGZ",temp2,"",temp);
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	|	sexp GE sexp
			{
		/* Expand the '>=' operation with our set of quad operations */
			temp1=next_tv();
			temp2=next_tv();
			temp=next_lbl();
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD("","SUB",$3,$1,temp2);
			PRINT_QUAD("","JGZ",temp2,"",temp);
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	|	sexp LT sexp
			{
		/* Expand the '<' operation with our set of quad operations */
			temp1=next_tv();
			temp2=next_tv();
			temp=next_lbl();
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD("","SUB",$3,$1,temp2);
			PRINT_QUAD("","JGZ",temp2,"",temp);
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	|	sexp LE sexp
			{
		/* Expand the '<=' operation with our set of quad operations */
			temp1=next_tv();
			temp2=next_tv();
			temp=next_lbl();
			PRINT_QUAD("","MOV","0","",temp1);
			PRINT_QUAD("","SUB",$1,$3,temp2);
			PRINT_QUAD("","JGZ",temp2,"",temp);
			PRINT_QUAD("","MOV","1","",temp1);
			PRINT_QUAD(temp,"NOP","","","");
			$$ = temp1;
			free(temp);
			free(temp2);
			}
	;
sexp
	:	term
	|	sexp PL term
			{
			temp=next_tv();
			PRINT_QUAD("","ADD",$1,$3,temp);
			$$ = temp;
			}
	|	sexp MI term
			{
			temp=next_tv();
			PRINT_QUAD("","SUB",$1,$3,temp);
			$$ = temp;
			}
	|	sexp OR term
			{
		/* Expand the 'or' operation with our set of quad operations */
			temp=next_tv();
			temp1=next_lbl();
			temp2=next_lbl();
			PRINT_QUAD("","MOV","0","",temp);
			PRINT_QUAD("","JZ",$1,"",temp1);
			PRINT_QUAD("","MOV","1","",temp);
			PRINT_QUAD(temp1,"JZ",$3,"",temp2);
			PRINT_QUAD("","MOV","1","",temp);
			PRINT_QUAD(temp2,"NOP","","","");
			$$ = temp;
			free(temp1);
			free(temp2);
			}
	|	MI term   %prec UMIN
			{
		/* Expand unary minus with our set of quad operations */
			temp=next_tv();
			PRINT_QUAD("","SUB","0",$2,$2);
			$$=$2;
			}
	|	PL term   %prec UMIN
			{
			$$ = $2;
			}
	;
term
	:	factor
	|	term MU factor
			{
			temp=next_tv();
			PRINT_QUAD("","MUL",$1,$3,temp);
			$$ = temp;
			}
	|	term DI factor
			{
			temp=next_tv();
			PRINT_QUAD("","DI",$1,$3,temp);
			$$ = temp;
			}
	|	term DIV factor
			{
			temp=next_tv();
			PRINT_QUAD("","DIV",$1,$3,temp);
			$$ = temp;
			}
	|	term MOD factor
			{
			temp=next_tv();
			PRINT_QUAD("","MOD",$1,$3,temp);
			$$ = temp;
			}
	|	term AND factor
			{ 
		/* Expand the 'and' operation with our set of quad operations */
			temp=next_tv();
			temp1=next_lbl();
			PRINT_QUAD("","MOV","0","",temp);
			PRINT_QUAD("","JZ",$1,"",temp1);
			PRINT_QUAD("","JZ",$3,"",temp1);
			PRINT_QUAD("","MOV","1","",temp);
			PRINT_QUAD(temp1,"NOP","","","");
			$$ = temp;
			}
	;
factor
	:	var
	|	ID LP explist RP
			{
		/* Convert circular list of expressions to a linear list */
		/* Write "PUSH" commands to send the expressions, the */
		/* CALL command to call the subroutine, and the PULL */
		/* command to return the value */
			list1=((STR_LST *)$3)->next;
			((STR_LST *)$3)->next=NULL;
			while (list1!=NULL)
			  {
			  PRINT_QUAD("","PUSH","","",list1->str1);
			  list2=list1->next;
			  free(list1);
			  list1=list2;
			  }
			PRINT_QUAD("","CALL","","",$1);
			temp1=next_tv();
			PRINT_QUAD("","PULL","","",temp1);
			$$=temp1;
			}
	|	NUM
		{
		$$=(char *)malloc(strlen(yylval)+1);
		strcpy($$,yylval);
		}
	|	LP exp RP
		{
		$$ = $2;
		}
	|	NOT factor
		{
		/* Expand the 'not' operation with our set of quad operations */
		temp=next_lbl();
		temp1=next_tv();
		PRINT_QUAD("","MOV","1","",temp1);
		PRINT_QUAD("","JZ",$2,"",temp);
		PRINT_QUAD("","MOV","0","",temp1);
		PRINT_QUAD(temp,"NOP","","","");
		free(temp);
		}
	;
%%

#include "lex.yy.c"

/* Return an unique temporary variable.  Note that compiler generated */
/* labels start with upper case letters.  */
char *next_tv()
{
	char *s;
	s=(char *)malloc(10);
	if (s==NULL) 
	  {
	  printf("no room: next_tv (tv_cnt=%d)...\n",tv_cnt);
	  exit(1);
	  }
	sprintf(s,"T%d",tv_cnt++);
	return(s);
}

/* Return a unique label. */
char *next_lbl()
{
	char *s;
	s=(char *)malloc(10);
	if (s==NULL) 
	  {
	  printf("no room: next_lbl (lbl_cnt=%d)...\n",lbl_cnt);
	  exit(1);
	  }
	sprintf(s,"L%d",lbl_cnt++);
	return(s);
}
	
/* Save a character value (typically a label) on a stack. */
void push_lbl(c)
char *c;
{
	char *s;
	STR_LST *l;
	l=(STR_LST *)malloc(sizeof(STR_LST));
	if (l==NULL)
	  {
	  printf("no room: push_lbl (stack)\n");
	  exit(1);
	  }
	l->next=lbl_stk;
	l->str1=(char *)malloc(strlen(c)+1);
	if (l->str1==NULL)
 	  {
	  printf("no room: push_lbl (string)\n");
	  exit(1);
	  }
	strcpy(l->str1,c);
	lbl_stk=l;
}

/* retrieve a character value from the "label stack". */
char *pop_lbl()
{
	char *s;
	STR_LST *l;
	s=lbl_stk->str1;
	l=lbl_stk;
	lbl_stk=lbl_stk->next;
	free(l);
	return(s);
}
	
/***********  Symbol table routines **********/

/* Generate a new, blank symbol */
SYM *new_sym() 
{
	SYM *s;
	s=(SYM *)malloc(sizeof(SYM));
	s->block=NULL;
	s->name=NULL;
	s->type=' ';
	s->start=0;
	s->end=0;
	s->offset=0;
	s->nparms=0;
	s->parmlist=NULL;
	s->next=NULL;
	s->last=NULL;
	return(s);
}

/* Look for the symbol in the current context.  Return its pointer. */
SYM *find_sym(s)
char *s;
{
	SYM *f;
	int notfound;
	f=top;
	if (f!=NULL) notfound=strcmp(s,f->name);
	while (notfound && f!=NULL)
	  {
	  notfound=strcmp(s,f->name);
	  f=f->last;
	  }
	return(f);
}

/* Add a new symbol to the top of the symbol stack */
void add_sym(str,t)
char *str; /* name of new symbol */
char *t; /* type of symbol */
{
	SYM *s, *ss;

	ss=find_sym(str);
	if (ss!=NULL)
	  if (0==strcmp(ss->block,top->block))
	    {
	    printf("ERROR: duplicate declaration of name %s in block %s\n",
		ss->name, ss->block);
	    return;
	    }
	s=new_sym();
	s->block=strsave(top->block);
	top->next=s;
	s->last=top;
	top=s;
	top->name=str;
	if (t[0]=='i' || t[0]=='r') top->type=t[0];
	else
	  {
	  sscanf(t,"ary %d %d %c", &(top->start), &(top->end), &(top->type));
	  top->type -=32;  /* switch to upper case */
	  }
	top->offset=offset;
	offset = 4 * (top->end - top->start + 1) + offset;
	return;
}

/* Print the current symbol table. */
void print_sym(sub)
SYM *sub;
{
	SYM *s, *ss;
	int i;

	printf("*B %s\n",sub->block);
	s=top;
	while (s!=NULL)
	  {
	  if (s->parmlist==NULL)
	    {
	    printf("*V %s %s ",s->block,s->name);
	    if (s->type=='i') printf("integer ");
	    if (s->type=='r') printf("real ");
	    if (s->type=='I') printf("integer [%d..%d] ",s->start,s->end);
	    if (s->type=='R') printf("real [%d..%d] ",s->start,s->end);
	    if (s->type=='u') printf("unknown ");
	    printf("%d\n",s->offset);
	    }
	  else
	    {
	    if (s->type == ' ' && s->last!=NULL) printf("*R ");
	    else if (s->type==' ') printf("*P ");
	    else              printf("*F ");
	    printf("%s %s ",s->block,s->name);
	    if (s->type == 'i') printf("integer ");
	    else if (s->type == 'r') printf("real ");
	    printf("%d\n",s->nparms);
	    ss=s->parmlist;
	    if (ss->next!=s)
	      {
	      i=0;
	      while (i<s->nparms)
	        {
	        printf("*P%d ",++i);
	        if (ss->type=='i') printf("integer ");
	        if (ss->type=='r') printf("real ");
	        if (ss->type=='I') 
			printf("integer [%d..%d] ",ss->start,ss->end);
	        if (ss->type=='R') 
			printf("real [%d..%d] ",ss->start,ss->end);
	        if (ss->type=='u') printf("unknown ");
	        printf("%d\n",ss->offset);
	        ss=ss->next;
	        }
	      }
	    }
	  s=s->last;
	  }
}

/* Delete all of the symbols pointed to by s to the top of the stack. */ 
void free_sym_chain(s)
SYM *s;
{
	SYM *ss;
	while (s!=NULL)
	  {
	  free(s->block);
	  free(s->name);
	  ss=s->next;
	  free(s);
	  s=ss;
	  }
}

/* Parse error routine */
yyerror(s)
char *s;
{ printf("%s error at line no. %d \n",s,lineno);
}

main()
{
	yyparse();
}
