INDEX
S.No Experiments
1. Setting up LEX and YACC and studying these tools.
Write a lex program to implement the separation of tokens from a source program.
2.
Write a program to implement the lexical analysis phase of compiler.
3.
Develop simple language processors like desk calculator and assembler.
4.
Develop a lexical analyzer and a syntax analyzer for the same using the LEX and YACC
5. tools. Also implement the bookkeeper module.
Develop a simple calculator using LEX and YACC tools.
6.
Represent ‘C’ language using Context Free Grammar.
7.
Implement a program for symbol table using hashing .
8.
Implement a two-pass assembler.
9.
Add assignment statement, If then else statement and while loop to the calculator and
10. generate the three address code for the same.
Practical 1:
AIM:
Setting up LEX and YACC and studying these tools.
THEORY:
● LEX
Lex is a program that generates lexical analyzer. It is used with YACC parser generator.
The lexical analyzer is a program that transforms an input stream into a sequence of tokens.
It reads the input stream and produces the source code as output through implementing the lexical
analyzer in the C program.
Its function is as follows:
1) Firstly lexical analyzer creates a program lex.1 in the Lex language. Then Lex compiler runs the
lex.1 program and produces a C program lex.yy.c.
2) Finally C compiler runs the lex.yy.c program and produces an object program a.out.
3) a.out is lexical analyzer that transforms an input stream into a sequence of tokens.
● YACC
YACC stands for Yet Another Compiler Compiler. It provides a tool to produce a parser for a given
grammar. YACC is a program designed to compile a LALR (1) grammar. It is used to produce the source
code of the syntactic analyzer of the language produced by LALR (1) grammar. The input of YACC is the
rule or grammar and the output is a C program.
Downloading LEX and YACC:
● Download Flex 2.5.4a: http://gnuwin32.sourceforge.net/packages/flex.htm
● Download Bison 2.4.1: http://gnuwin32.sourceforge.net/packages/bison.htm
● Download DevC++: https://www.bloodshed.net/
Installing the Software and setting up the environment variable (path):
● Install Flex at “C:\GnuWin32”
● Install Bison at “C:\GnuWin32”
● Install DevC++ at “C:\Dev-Cpp”
● Open Environment Variables and set the path variables.
● Add “C:\GnuWin32\bin; C:\Dev-Cpp\TDM-GCC-64\bin” to path.
RESULT:
LEX and YACC tools have been installed and studied successfully.
Practical 2:
Write a lex program to implement separation of tokens from a source program.
Procedure:
1.)Open edit plus and make a lex file.
2.)Write your program in it.
3.)Now run your program in cmd with following commands:-
a.)flex filename.l
b.)gcc lex.yy.c
c.)a.exe
4.)Give the input and get desired output.
Code:
%{
#include <stdio.h>
int word = 0;
int id = 0;
int key = 0;
%}
%%
[0-9]+ { printf("<Numerical,%s>", yytext); word++; }
(int|for|while|double|char|main|if|do) { printf("<Keyword,%s>", yytext); word++; key++; }
[\+\=\-\*\%\/] { printf("<Operator,%s>", yytext); }
[a-zA-Z][a-zA-Z0-9_]* { printf("<identifier,%s>", yytext); word++; id++; }
[\,\.\:\"\'\;\(\)\{\}] { printf("<punctuation,%s>", yytext); }
[ \t\n] /* Ignore whitespace */
. { printf("<Unrecognized,%s>", yytext); }
%%
int main()
{ yylex();
printf("<No of words:%d>\n", word);
printf("<No of identifiers:%d>\n", id);
printf("<No of keywords:%d>\n", key);
return 0;}
int yywrap()
{ return 1; }
Output:
Practical 3:
Write a program to implement lexical analysis phase of compiler.
THEORY:
Token: It is basically a sequence of characters treated as a unit as it cannot be further broken down. Tokens are
keywords (int, char, float, const, goto, continue, etc.) identifiers (user-defined names), operators (+, -, *, /),
delimiters/punctuators like comma (,), semicolon(;), braces ({ }), special symbols, constants, strings.
LEX program Code:
%{
int n = 0 ;
int chk=0;
%}
%%
"while" | "if" | "else" | "main" | "return" | "printf" | "int" | "float" | "bool" {keyword();}
[a-zA-Z_]*[a-zA-Z0-9_]* {iden();}
"<=" | "==" | "=" | "+" | "-" | "*" | "++" | "<" | ">" | ">=" {oper();}
['"] {quotes();}
[(){}|,;] {separator();}
[0-9]*"."*[0-9]+ {f();}
[0-9]+ {i();}
.;
%%
void keyword(){
if(chk==0){
n++;
printf("keywords : %s\n", yytext);
}
}
void iden(){
if(chk==0){
n++;
printf("identifier : %s \n", yytext);
}
}
void oper(){
if(chk==0){
n++;
printf("operator : %s\n", yytext);
}
}
void separator(){
if(chk==0){
n++;
printf("separator : %s\n", yytext);
}
}
void f(){
if(chk==0){
n++;
printf("float : %s\n", yytext);
}
}
void i(){
if(chk==0){
n++;
printf("integer : %s\n", yytext);
}
}
void quotes(){
if(chk==1){
n++;
printf("quotes\n");
chk=0;
}
else{
chk=1;
}
}
int yywrap(){
return 1;
}
int main()
{
yylex();
printf("\n total no. of token = %d\n", n);
}
Output:
Practical-4:
Develop simple language processors like desk calculator and assembler.
Procedure:
1.) Open edit plus and make a lex file.
2.) Write your program in it.
3.) Now run your program in cmd with following commands:-
a.) flex filename.l
b.) gcc lex.yy.c
c.) a.exe
4.) Give the input and get desired output.
Code:
%{
#undef yywrap
#define yywrap() 1
int f1=0,f2=0;
char oper;
float op1=0,op2=0,ans=0;
void eval();
%}
DIGIT [0-9]
NUM {DIGIT}+(\.{DIGIT}+)?
OP [*/+-]
%%
{NUM} {
if(f1==0)
{
op1=atof(yytext);
f1=1;
}
else if(f2==-1)
{
op2=atof(yytext);
f2=1;
}
if((f1==1) && (f2==1))
{
eval();
f1=0;
f2=0;}
}
{OP} {
oper=(char) *yytext;
f2=-1;
}[\n] {
if(f1==1 && f2==1)
{
eval;
f1=0;
f2=0;
}}
%%
main()
{
yylex();
}
void eval()
{switch(oper)
{
case '+':
ans=op1+op2;
break;
case '-':
ans=op1-op2;
break;
case '*':
ans=op1*op2;
break;
case '/':
if(op2==0)
{
printf("ERROR");
return;
}
else
{
ans=op1/op2;
}
break;
default:
printf("operation not available");
break;
}
printf("The answer is = %lf",ans);
}
Output:
Practical 5 :
Develop a lexical analyzer and a syntax analyzer for the same using the LEX and YACC tools.
Calc.l file
%{
#include<stdio.h>
#include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ {
yylval=atoi(yytext);
return NUMBER;
}
[\t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}
Calc.y file
%{
#include<stdio.h>
int flag=0;
%}
%token NUMBER
%left '+' '-' '*' '/' '%' '(' ')'
%%
ArithmeticExpression: E{
printf("Result=%d\n", $$);
return 0;
};
E:E'+'E {$$=$1+$3;}
|E'-'E {$$=$1-$3;}
|E''E {$$=$1$3;}
|E'/'E {$$=$1/$3;}
|E'%'E {$$=$1%$3;}
|'('E')' {$$=$2;}
| NUMBER {$$=$1;}
;
%%
void main()
{
printf("\nEnter Any Arithmetic Expression :\n");
yyparse();
if(flag==0)
printf("\nEntered arithmetic expression is Valid\n\n");
}
void yyerror()
{
printf("\nEntered arithmetic expression is Invalid\n\n");
flag=1;
}
Output:
Practical 6 :
Develop a simple calculator using LEX and YACC tools.
Lex file
%{
#include<stdio.h> #include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ { yylval=atoi(yytext);
return NUMBER;
}
[\t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}
Yacc file
%{
#include<stdio.h> int flag=0;
%}
%token NUMBER
%left '+' '-' '*' '/' '%' '(' ')'
%%
ArithmeticExpression: E{ printf("Result=%d\n", $$);
return 0;
};
E:E'+'E {$$=$1+$3;}
|E'-'E {$$=$1-$3;}
|E'*'E {$$=$1*$3;}
|E'/'E {$$=$1/$3;}
|E'%'E {$$=$1%$3;}
|'('E')' {$$=$2;}
| NUMBER {$$=$1;}
;
%%
void main()
{
printf("\nEnter Any Arithmetic Expression :\n"); yyparse();
if(flag==0) printf("\nEntered arithmetic expression is Valid\n\n");
}
void yyerror()
{
printf("\nEntered arithmetic expression is Invalid\n\n"); flag=1;
}
Output:
Practical 7:
AIM:Represent ‘C’ language using Context Free Grammar .
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
using namespace std;
void ReadGrammar(string file, vector<string>& pro)
{
string line;
ifstream myfile;
myfile.open(file);
while (getline(myfile, line))
{
pro.push_back(line);
}
}
bool RecognizeString(vector<string>& pro, string str)
{
int n, v, s, p, i, l;
n = str.length();
v = pro.size();
vector< vector< vector<bool> > > dp(n + 1, vector< vector <bool> >(n + 1, vector<bool>(26, false)));
for (s = 0; s < n; ++s)
{
for (i = 0; i < v; ++i)
{
if (pro[i].length() == 4 && pro[i][3] == str[s])
{
dp[1][s][pro[i][0] - 'A'] = true;
}
}
}
for (l = 2; l <= n; ++l)
{
for (s = 0; s < n - l + 1; ++s)
{
for (p = 1; p <= l - 1; ++p)
{
for (i = 0; i < v; ++i)
{
if (pro[i].length() == 5 && dp[p][s][pro[i][3] - 'A'] && dp[l - p][s + p][pro[i][4] - 'A'])
{
dp[l][s][pro[i][0] - 'A'] = true;
}
}
}
}
}
return dp[n][0]['S' - 'A'];
int main()
{
vector<string> pro;
string str;
ReadGrammar("text.txt", pro);
cout << "Context-free Grammar" << endl;
for (int i = 0; i < pro.size(); ++i)
{
cout << pro[i] << endl;
}
cout << "Enter a String: ";
cin >> str;
if (RecognizeString(pro, str))
cout << "The input string can be derived using given CFL" << endl;
else
cout << "The input string cannot be derived using given CFL" << endl;
Text.txt file:
Output:
Practical 8:
AIM:
Implement a program for symbol table using hashing .
Code:
// C++ program to implement Symbol Table
#include <iostream>
using namespace std;
const int MAX = 100;
class Node {
string identifier, scope, type;
int lineNo;
Node* next;
public:
Node()
{
next = NULL;
}
Node(string key, string value, string type, int lineNo)
{
this->identifier = key;
this->scope = value;
this->type = type;
this->lineNo = lineNo;
next = NULL;
}
void print()
{
cout << "Identifier's Name:" << identifier
<< "\nType:" << type
<< "\nScope: " << scope
<< "\nLine Number: " << lineNo << endl;
}
friend class SymbolTable;
};
class SymbolTable {
Node* head[MAX];
public:
SymbolTable()
{
for (int i = 0; i < MAX; i++)
head[i] = NULL;
}
int hashf(string id); // hash function
bool insert(string id, string scope,
string Type, int lineno);
string find(string id);
bool deleteRecord(string id);
bool modify(string id, string scope,
string Type, int lineno);
};
// Function to modify an identifier
bool SymbolTable::modify(string id, string s,string t, int l)
{
int index = hashf(id);
Node* start = head[index];
if (start == NULL)
return "-1";
while (start != NULL) {
if (start->identifier == id) {
start->scope = s;
start->type = t;
start->lineNo = l;
return true;
}
start = start->next;
}
return false; // id not found
}
// Function to delete an identifier
bool SymbolTable::deleteRecord(string id)
{
int index = hashf(id);
Node* tmp = head[index];
Node* par = head[index];
// no identifier is present at that index
if (tmp == NULL) {
return false;
}
// only one identifier is present
if (tmp->identifier == id && tmp->next == NULL) {
tmp->next = NULL;
delete tmp;
return true;
}
while (tmp->identifier != id && tmp->next != NULL) {
par = tmp;
tmp = tmp->next;
}
if (tmp->identifier == id && tmp->next != NULL) {
par->next = tmp->next;
tmp->next = NULL;
delete tmp;
return true;
}
// delete at the end
else {
par->next = NULL;
tmp->next = NULL;
delete tmp;
return true;
}
return false;
}
// Function to find an identifier
string SymbolTable::find(string id)
{
int index = hashf(id);
Node* start = head[index];
if (start == NULL)
return "-1";
while (start != NULL) {
if (start->identifier == id) {
start->print();
return start->scope;
}
start = start->next;
}
return "-1"; // not found
}
// Function to insert an identifier
bool SymbolTable::insert(string id, string scope,string Type, int lineno)
{
int index = hashf(id);
Node* p = new Node(id, scope, Type, lineno);
if (head[index] == NULL) {
head[index] = p;
cout << "\n"
<< id << " inserted";
return true;
}
else {
Node* start = head[index];
while (start->next != NULL)
start = start->next;
start->next = p;
cout << "\n"
<< id << " inserted";
return true;
}
return false;
}
int SymbolTable::hashf(string id)
{
int asciiSum = 0;
for (int i = 0; i < id.length(); i++) {
asciiSum = asciiSum + id[i];
}
return (asciiSum % 100);
}
// Driver code
int main()
{
SymbolTable st;
string check;
cout << "**** SYMBOL_TABLE ****\n";
// insert 'if'
if (st.insert("if", "local", "keyword", 4))
cout << " -successfully";
else
cout << "\nFailed to insert.\n";
// insert 'number'
if (st.insert("number", "global", "variable", 2))
cout << " -successfully\n\n";
else
cout << "\nFailed to insert\n";
// find 'if'
check = st.find("if");
if (check != "-1")
cout << "Identifier Is present\n";
else
cout << "\nIdentifier Not Present\n";
// delete 'if'
if (st.deleteRecord("if"))
cout << "if Identifier is deleted\n";
else
cout << "\nFailed to delete\n";
// modify 'number'
if (st.modify("number", "global", "variable", 3))
cout << "\nNumber Identifier updated\n";
// find and print 'number'
check = st.find("number");
if (check != "-1")
cout << "Identifier Is present\n";
else
cout << "\nIdentifier Not Present";
return 0;
}
Output :
Practical 9:
AIM:Implement a two-pass assembler.
Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <conio.h>
struct opTable
{
char code[10], objcode[10];
} myOpT[3] = {
"LDA", "00",
"STA", "0C",
"LDCH", "50"};
struct symbolTable
{
char symbol[10];
int addr;
} mySymTab[10];
int startAddress, locCounter, symCount = 0, length;
char line[20], label[8], opcode[8], operand[8], programName[10];
void checkLabel()
{
int k, dupSymbol = 0;
for (k = 0; k < symCount; k++)
if (!strcmp(label, mySymTab[k].symbol))
{
mySymTab[k].addr = -1;
dupSymbol = 1;
break;
}
if (!dupSymbol)
{
strcpy(mySymTab[symCount].symbol, label);
mySymTab[symCount++].addr = locCounter;
}
}
void checkOpCode()
{
int k = 0, found = 0;
for (k = 0; k < 3; k++)
if (!strcmp(opcode, myOpT[k].code))
{
locCounter += 3;
found = 1;
break;
}
if (!found)
{
if (!strcmp(opcode, "WORD"))
locCounter += 3;
else if (!strcmp(opcode, "RESW"))
locCounter += (3 * atoi(operand));
else if (!strcmp(opcode, "RESB"))
locCounter += atoi(operand);
}
}
void readLine()
{
char buff[8], word1[8], word2[8], word3[8];
int i, j = 0, count = 0;
label[0] = opcode[0] = operand[0] = word1[0] = word2[0] = word3[0] = '\0';
for (i = 0; line[i] != '\0'; i++)
{
if (line[i] != ' ')
buff[j++] = line[i];
else
{
buff[j] = '\0';
strcpy(word3, word2);
strcpy(word2, word1);
strcpy(word1, buff);
j = 0;
count++;
}
}
buff[j - 1] = '\0';
strcpy(word3, word2);
strcpy(word2, word1);
strcpy(word1, buff);
switch (count)
{
case 0:
strcpy(opcode, word1);
break;
case 1:
{
strcpy(opcode, word2);
strcpy(operand, word1);
}
break;
case 2:
{
strcpy(label, word3);
strcpy(opcode, word2);
strcpy(operand, word1);
}
break;
}
}
void PASS1()
{
FILE *input, *inter;
input = fopen("input.txt", "r");
inter = fopen("intermediate.txt", "w");
printf("LOCATION LABEL\tOPERAND\tOPCODE\n");
printf(" ");
fgets(line, 20, input);
readLine();
if (!strcmp(opcode, "START"))
{
startAddress = atoi(operand);
locCounter = startAddress;
strcpy(programName, label);
fprintf(inter, "%s", line);
fgets(line, 20, input);
}
else
{
programName[0] = '\0';
startAddress = 0;
locCounter = 0;
}
printf("\n %d\t %s\t%s\t %s", locCounter, label, opcode, operand);
while (strcmp(line, "END") != 0)
{
readLine();
printf("\n %d\t %s \t%s\t %s", locCounter, label, opcode, operand);
if (label[0] != '\0')
checkLabel();
checkOpCode();
fprintf(inter, "%s %s %s\n", label, opcode, operand);
fgets(line, 20, input);
}
printf("\n %d\t\t%s", locCounter, line);
fprintf(inter, "%s", line);
fclose(inter);
fclose(input);
}
void PASS2()
{
FILE *inter, *output;
char record[30], part[6], value[5];
int currtxtlen = 0, foundopcode, foundoperand, chk, operandaddr, recaddr = 0;
inter = fopen("intermediate.txt", "r");
output = fopen("output.txt", "w");
fgets(line, 20, inter);
readLine();
if (!strcmp(opcode, "START"))
fgets(line, 20, inter);
printf("\n\nObject Code\n");
printf("\nH^ %s ^ %d ^ %d ", programName, startAddress, length);
fprintf(output, "\nH^ %s ^ %d ^ %d ", programName, startAddress, length);
recaddr = startAddress;
record[0] = '\0';
while (strcmp(line, "END") != 0)
{
operandaddr = foundoperand = foundopcode = 0;
value[0] = part[0] = '\0';
readLine();
for (chk = 0; chk < 3; chk++)
{
if (!strcmp(opcode, myOpT[chk].code))
{
foundopcode = 1;
strcpy(part, myOpT[chk].objcode);
if (operand[0] != '\0')
{
for (chk = 0; chk < symCount; chk++)
if (!strcmp(mySymTab[chk].symbol, operand))
{
itoa(mySymTab[chk].addr, value, 10);
strcat(part, value);
foundoperand = 1;
}
if (!foundoperand)
strcat(part, "err");
}
}
}
if (!foundopcode)
{
if (strcmp(opcode, "BYTE") == 0 || strcmp(opcode, "WORD") || strcmp(opcode, "RESB"))
{
strcat(part, operand);
}
}
if ((currtxtlen + strlen(part)) <= 8)
{
strcat(record, "^");
strcat(record, part);
currtxtlen += strlen(part);
}
else
{
printf("\nT^ %d ^%d %s", recaddr, currtxtlen, record);
fprintf(output, "\nT^ %d ^%d %s", recaddr, currtxtlen, record);
recaddr += currtxtlen;
currtxtlen = strlen(part);
strcpy(record, part);
}
fgets(line, 20, inter);
}
printf("\nT^ %d ^%d %s", recaddr, currtxtlen, record);
fprintf(output, "\nT^ %d ^%d %s", recaddr, currtxtlen, record);
printf("\nE^ %d\n", startAddress);
fprintf(output, "\nE^ %d\n", startAddress);
fclose(inter);
fclose(output);
}
int main()
{
PASS1();
length = locCounter - startAddress;
PASS2();
getch();
}
Output:
Practical 10:
AIM:Add assignment statement, If then else statement and while loop to the calculator and generate the three
address code for the same.
Code:
#include <stdio.h>
#include <string.h>
int i, choice, j, l, address = 100;
char userInput[10], expr[10], expr1[10], expr2[10], id1[5], op[5], id2[5];
int main()
{
printf("Enter the Expression : ");
scanf("%s", userInput);
strcpy(expr, userInput);
l = strlen(expr);
expr1[0] = '\0';
for (i = 0; i < 2; i++)
{
if (expr[i] == '+' || expr[i] == '-')
{
if (expr[i + 2] == '/' || expr[i + 2] == '*')
{
strrev(expr);
j = l - i - 1;
strncat(expr1, expr, j);
strrev(expr1);
printf("Three Address Code\nT = %s\nT1 = %c%cT\n", expr1,
expr[j + 1], expr[j]);
}
else
{
strncat(expr1, expr, i + 2);
printf("Three Address Code\nT = %s\nT1 = T%c%c\n", expr1, expr[i + 2], expr[i +3]);
}
}
else if (expr[i] == '/' || expr[i] == '*')
{
strncat(expr1, expr, i + 2);
printf("Three Address Code\nT = %s\nT1 = T%c%c\n", expr1, expr[i + 2],
expr[i + 3]);
}
}
return 0;
}
Output: