RTC INSTITUTE OF TECHNOLOGY
(Approved by AICTE, New Delhi & Gov. of Jharkhand and Affiliated to Jharkhand University of Technology, Ranchi)
Anandi, Ormanjhi, Ranchi – 835219, Jharkhand
Compiler Design Lab Manual
(CS502P, Credit-1)
List of Experiments
1. To Design a lexical analyzer for given language to recognize a few patterns in C (Ex.
identifiers, constants, comments, operators etc.) and the lexical analyzer should
ignore redundant spaces, tabs, andnew lines.
2. To test whether a given identifier is valid or not.
3. To find out the FIRSTPOS and FOLLOWPOS for a given expression.
4. To implement LL (1) parser.
5. To implement Recursive Descent parser.
6. To implement a Symbol Table.
7. To identify that, for a given set of grammar, whether the string belongs to that grammar or
not.
COMPILER DESIGN LAB
AIM:
1. To Design a lexical analyzer for given language to recognize a few patterns in C (Ex.
identifiers, constants, comments, operators etc.) and the lexical analyzer should ignore
redundant spaces, tabs, and new lines.
PROGRAM :
#include<string.h>
#include<ctype.h>
#include<stdio.h>
void keyword(char str[10])
{
if(strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||strcmp("int",str)==0||strcmp(
"float",str)==0||strcmp("char",str)==0||strcmp("double",str)==0||strcmp("static",str)==0||strcmp("s
witch",str)==0||strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
main()
{
FILE *f1,*f2,*f3;
char c, str[10], st1[10];
int num[100], lineno=0, tokenvalue=0,i=0,j=0,k=0;
printf("\n Enter the c program : "); /*gets(st1);*/
f1=fopen("input","w");
while((c=getchar())!=EOF)
putc(c,f1);
fclose(f1);
f1=fopen("input","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
ungetc(c,f1);
}
else
if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else
if(c==' '||c=='\t')
printf(" ");
22 | P a g e
else
if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\n The no's in the program are :");
for(j=0; j<i; j++)
printf("%d", num[j]);
printf("\n");
f2=fopen("identifier", "r");
k=0;
printf("The keywords and identifiers are:");
while((c=getc(f2))!=EOF)
{
if(c!=' ')
str[k++]=c;
else
{
str[k]='\0';
keyword(str);
k=0;
}
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\n Special characters are : ");
while((c=getc(f3))!=EOF)
printf("%c",c);
printf("\n");
fclose(f3);
printf("Total no. of lines are:%d", lineno);
}
Output :
Enter the C program: a+b*c
Ctrl-D
The no’s in the program are:
The keywords and identifiers are:
a is an identifier and terminal
b is an identifier and terminal
c is an identifier and terminal
Special characters are:
+*
Total no. of lines are: 1
AIM:
2. To test whether a given identifier is valid or not
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<ctype.h>
void main()
{
char a[10];
int flag, i=1;
clrscr();
printf("\n Enter an identifier:");
gets(a);
if(isalpha(a[0]))
flag=1;
else
printf("\n Not a valid identifier");
while(a[i]!='\0')
{
if(!isdigit(a[i])&&!isalpha(a[i]))
{
flag=0;
break;
}
i++;
}
if(flag==1)
printf("\n Valid identifier");
getch();
}
OUTPUT:
Enter an identifier: first
Valid identifier
Enter an identifier:1aqw
Not a valid identifier
AIM:
3. To find out the FIRSTPOS and FOLLOWPOS for a given expression
PROGRAM:
#include<stdio.h>
#include<ctype.h>
#include<string.h>
// Functions to calculate Follow
void followfirst(char, int, int);
void follow(char c);
// Function to calculate First
void findfirst(char, int, int);
int count, n = 0;
// Stores the final result of the First Sets
char calc_first[10][100];
// Stores the final result of the Follow Sets
char calc_follow[10][100];
int m = 0;
// Stores the production rules
char production[10][10];
char f[10], first[10];
int k;
char ck;
int e;
int main(int argc, char **argv)
{
int jm = 0;
int km = 0;
int i, choice;
char c, ch;
count = 8;
// The Input grammar
strcpy(production[0], "E=TR");
strcpy(production[1], "R=+TR");
strcpy(production[2], "R=#");
strcpy(production[3], "T=FY");
strcpy(production[4], "Y=*FY");
strcpy(production[5], "Y=#");
strcpy(production[6], "F=(E)");
strcpy(production[7], "F=i");
int kay;
char done[count];
int ptr = -1;
// Initializing the calc_first array
for(k = 0; k < count; k++) {
for(kay = 0; kay < 100; kay++) {
calc_first[k][kay] = '!';
}
}
int point1 = 0, point2, xxx;
for(k = 0; k < count; k++)
{
c = production[k][0];
point2 = 0;
xxx = 0;
// Checking if First of c has
// already been calculated
for(kay = 0; kay <= ptr; kay++)
if(c == done[kay])
xxx = 1;
if (xxx == 1)
continue;
// Function call
findfirst(c, 0, 0);
ptr += 1;
// Adding c to the calculated list
done[ptr] = c;
printf("\n First(%c) = { ", c);
calc_first[point1][point2++] = c;
// Printing the First Sets of the grammar
for(i = 0 + jm; i < n; i++) {
int lark = 0, chk = 0;
for(lark = 0; lark < point2; lark++) {
if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if(chk == 0)
{
printf("%c, ", first[i]);
calc_first[point1][point2++] = first[i];
}
}
printf("}\n");
jm = n;
point1++;
}
printf("\n");
printf("-----------------------------------------------\n\n");
char donee[count];
ptr = -1;
// Initializing the calc_follow array
for(k = 0; k < count; k++) {
for(kay = 0; kay < 100; kay++) {
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for(e = 0; e < count; e++)
{
ck = production[e][0];
point2 = 0;
xxx = 0;
// Checking if Follow of ck
// has already been calculated
for(kay = 0; kay <= ptr; kay++)
if(ck == donee[kay])
xxx = 1;
if (xxx == 1)
continue;
land += 1;
// Function call
follow(ck);
ptr += 1;
// Adding ck to the calculated list
donee[ptr] = ck;
printf(" Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;
// Printing the Follow Sets of the grammar
for(i = 0 + km; i < m; i++) {
int lark = 0, chk = 0;
for(lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])
{
chk = 1;
break;
}
}
if(chk == 0)
{
printf("%c, ", f[i]);
calc_follow[point1][point2++] = f[i];
}
}
printf(" }\n\n");
km = m;
point1++;
}
}
void follow(char c)
{
int i, j;
// Adding "$" to the follow
// set of the start symbol
if(production[0][0] == c) {
f[m++] = '$';
}
for(i = 0; i < 10; i++)
{
for(j = 2;j < 10; j++)
{
if(production[i][j] == c)
{
if(production[i][j+1] != '\0')
{
// Calculate the first of the next
// Non-Terminal in the production
followfirst(production[i][j+1], i, (j+2));
}
if(production[i][j+1]=='\0' && c!=production[i][0])
{
// Calculate the follow of the Non-Terminal
// in the L.H.S. of the production
follow(production[i][0]);
}
}
}
}
}
void findfirst(char c, int q1, int q2)
{
int j;
// The case where we
// encounter a Terminal
if(!(isupper(c))) {
first[n++] = c;
}
for(j = 0; j < count; j++)
{
if(production[j][0] == c)
{
if(production[j][2] == '#')
{
if(production[q1][q2] == '\0')
first[n++] = '#';
else if(production[q1][q2] != '\0'
&& (q1 != 0 || q2 != 0))
{
// Recursion to calculate First of New
// Non-Terminal we encounter after epsilon
findfirst(production[q1][q2], q1, (q2+1));
}
else
first[n++] = '#';
}
else if(!isupper(production[j][2]))
{
first[n++] = production[j][2];
}
else
{
// Recursion to calculate First of
// New Non-Terminal we encounter
// at the beginning
findfirst(production[j][2], j, 3);
}
}
}
}
void followfirst(char c, int c1, int c2)
{
int k;
// The case where we encounter
// a Terminal
if(!(isupper(c)))
f[m++] = c;
else
{
int i = 0, j = 1;
for(i = 0; i < count; i++)
{
if(calc_first[i][0] == c)
break;
}
//Including the First set of the
// Non-Terminal in the Follow of
// the original query
while(calc_first[i][j] != '!')
{
if(calc_first[i][j] != '#')
{
f[m++] = calc_first[i][j];
}
else
{
if(production[c1][c2] == '\0')
{
// Case where we reach the
// end of a production
follow(production[c1][0]);
}
else
{
// Recursion to the next symbol
// in case we encounter a "#"
followfirst(production[c1][c2], c1, c2+1);
}
}
j++;
}
}
}
OUTPUT:
First(E)= { (, i, }
First(R)= { +, #, }
First(T)= { (, i, }
First(Y)= { *, #, }
First(F)= { (, i, }
-----------------------------------------------
Follow(E) = { $, ), }
Follow(R) = { $, ), }
Follow(T) = { +, $, ), }
Follow(Y) = { +, $, ), }
Follow(F) = { *, +, $, ), }
AIM:
4. To implement LL (1) parser.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
char s[20],stack[20];
void main()
{
char m[5][6][3]={"tb"," "," ","tb"," "," "," ","+tb"," "," ","n","n","fc"," "," ","fc"," "," "," ","n","*fc","
a ","n","n","i"," "," ","(e)"," "," "};
int size[5][6]={2,0,0,2,0,0,0,3,0,0,1,1,2,0,0,2,0,0,0,1,3,0,1,1,1,0,0,3,0,0};
int i,j,k,n,str1,str2;
clrscr();
printf("\n Enter the input string: ");
scanf("%s",s);
strcat(s,"$");
n=strlen(s);
stack[0]='$';
stack[1]='e';
i=1;
j=0;
printf("\nStack Input\n");
printf("__________________\n");
while((stack[i]!='$')&&(s[j]!='$'))
{
if(stack[i]==s[j])
{
i--;
j++;
}
switch(stack[i])
{
case 'e':
str1=0;
break;
case 'b':
str1=1;
break;
case 't':
str1=2;
break;
case 'c':
str1=3;
break;
case 'f':
str1=4;
break;
}
switch(s[j])
{ case 'i':
str2=0;
break;
case '+':
str2=1;
break;
case '*': str2=2;
break;
case '(': str2=3;
break;
case ')': str2=4;
break;
case '$': str2=5;
break;
}
if(m[str1][str2][0]=='\0')
{
printf("\nERROR");
exit(0);
}
else if(m[str1][str2][0]=='n')
i--;
else if(m[str1][str2][0]=='i')
stack[i]='i';
else
{
for(k=size[str1][str2]-1;k>=0;k--)
{
stack[i]=m[str1][str2][k];
i++;
}
i--;
}
for(k=0;k<=i;k++)
printf(" %c",stack[k]);
printf(" ");
for(k=j;k<=n;k++)
printf("%c",s[k]);
printf(" \n ");
}
printf("\n SUCCESS");
getch();
INPUT & OUTPUT:
Enter the input string: i*i+i
Stack INPUT
$bt i*i+i$
$bcf i*i+i$
$bci i*i+i$
$bc *i+i$
$bcf* *i+i$
$bcf i+i$
$bci i+i$
$bc +i$
$b +i$
$bt+ +i$
$bt i$
$bcf i$
$ bci i$
$bc $
$b $
$ $
success
AIM:
5. To implement Recursive Descent parser.
Grammar
E->TE'
E'->+TE/@ "@ represents null character"
T->FT'
T`->*FT'/@
F->(E)/ID
PROGRAM LOGIC:
Read the input string.
Write procedures for the non terminals
Verify the next token equals to non terminals if it satisfies match the non terminal.
If the input string does not match print error.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
char input[100];
int i,l;
void main()
{
clrscr();
printf("\nRecursive descent parsing for the following grammar\n");
printf("\nE->TE'\nE'->+TE'/@\nT->FT'\nT'->*FT'/@\nF->(E)/ID\n");
printf("\nEnter the string to be checked:");
gets(input);
if(E())
{
if(input[i+1]=='\0')
printf("\nString is accepted");
else
printf("\nString is not accepted");
}
else
printf("\nString not accepted");
21
getch();
}
E()
{
if(T())
{
if(EP())
return(1);
else
return(0);
}
else
return(0);
}
EP()
{
if(input[i]=='+')
{
i++;
if(T())
{
if(EP())
return(1);
else
return(0);
}
else
return(0);
}
else
return(1);
}
T()
{
if(F())
{
if(TP())
return(1);
else
return(0);
}
else
22
return(0);
}
TP()
{
if(input[i]=='*')
{
i++;
if(F())
{
if(TP())
return(1);
else
return(0);
}
else
return(0);
}
else
return(1);
}
F()
{
if(input[i]=='(')
{
i++;
if(E())
{
if(input[i]==')')
{
i++;
return(1);
}
else
return(0);
}
else
return(0);
}
else if(input[i]>='a'&&input[i]<='z'||input[i]>='A'&&input[i]<='Z')
{
i++;
return(1);
23
}
else
return(0);
}
INPUT & OUTPUT:
Recursive descent parsing for the following grammar
E->TE'
E'->+TE'/@
T->FT'
T'->*FT'/@
F->(E)/ID
Enter the string to be checked: (a+b)*c
String is accepted
Recursive descent parsing for the following grammar
E->TE'
E'->+TE'/@
T->FT'
T'->*FT'/@
F->(E)/ID
Enter the string to be checked: a/c+d
String is not accepted
AIM:
6. To implement a Symbol Table.
PROGRAM:
#include<stdio.h>
#include<math.h>
#include<string.h>
#include<ctype.h>
#include<stdlib.h>
void main()
{
int x=0, n, i=0,j=0;
void *mypointer,*T4Tutorials_address[5];
char ch,T4Tutorials_Search,T4Tutorials_Array2[15],T4Tutorials_Array3[15],c;
printf("Input the expression ending with $ sign:");
while((c=getchar())!='$')
{
T4Tutorials_Array2[i]=c;
i++;
}
n=i-1;
printf("Given Expression:");
i=0;
while(i<=n)
{
printf("%c",T4Tutorials_Array2[i]);
i++;
}
printf("\n Symbol Table display\n");
printf("Symbol \t addr \t type");
while(j<=n)
{
c=T4Tutorials_Array2[j];
if(isalpha(toascii(c)))
{
mypointer=malloc(c);
T4Tutorials_address[x]=mypointer;
T4Tutorials_Array3[x]=c;
printf("\n%c \t %d \t identifier\n",c,mypointer);
x++;
j++;
}
else
{
ch=c;
if(ch=='+'||ch=='-'||ch=='*'||ch=='=')
{
mypointer=malloc(ch);
T4Tutorials_address[x]=mypointer;
T4Tutorials_Array3[x]=ch;
printf("\n %c \t %d \t operator\n",ch,mypointer);
x++;
j++;
}}}}
OUTPUT:
AIM:
7. To identify that, for a given set of grammar, whether the string belongs to that grammar or
not.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main() {
char string[50];
int flag,count=o;
clrscr();
printf("The grammar is: S->aS, S->Sb, S->ab\n");
printf("Enter the string to be checked:\n");
gets(string);
if(string[0]=='a') {
flag=0;
for (count=1;string[count-1]!='\0';count++) {
if(string[count]=='b') {
flag=1;
continue;
} else if((flag==1)&&(string[count]=='a')) {
printf("The string does not belong to the specified grammar");
break;
} else if(string[count]=='a')
continue; else if(flag==1)&&(string[count]='\0')) {
printf("String accepted…..!!!!");
break;
} else {
printf("String not accepted");
}
}
}
getch();
}
OUTPUT:
The grammer is:
S->aS
S->Sb
S->ab
Enter the string to be checked:
aab
String accepted....!!!!