IIMT ENGINEERING COLLEGEMEERUT (U.P.
RecognizedbyAICTEandApprovedbyDr.APJAKTU, Lucknow
DEPARTMENTOFCOMPUTERSCIENCEAND ENGINNERING
LAB MANUAL FILE
SESSION(2024-2025)
NAME:-KULDEEP KUMAR
INDEX
COMPILER DESGIN
Program
No. ProgramName Date Signature
To install Flex, Bison and gcc on a Windows machine.
1
Design and implement a lexical analyzer for given language
using C and the lexical analyzer should ignore redundant
2 spaces, tabs and new lines.
Implementation of Lexical Analyzer using Lex Tool
3
Generate YACC specification for a few syntactic categories.
Program to recognize a valid arithmetic expression that uses
operator +, – , * and /.
4 .
Implementation of Calculator using LEX and YACC
5
Write program to convert NFA with ε transition to NFA without
6 ε transition.
Write program to convert NFA to DFA
7
Write program to find Simulate First and Follow of any given
8 grammar
Write a program to perform constant propagation.
9
Implement Intermediate code generation for simple expressions.
10
PRACTICALNo:-01
PRACTICALNo:-02
Design and implement a lexical analyzer for given language using C and the lexical analyzer should
ignore redundant spaces, tabs and new lines.
#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Define token types
typedef enum {
KEYWORD,
IDENTIFIER,
NUMBER,
OPERATOR,
UNKNOWN,
END_OF_FILE
} TokenType;
// Keywords list for simple language (example)
const char *keywords[] = {"int", "float", "if", "else", "while", "return"};
#define NUM_KEYWORDS 6
// Function to check if a string is a keyword
int is_keyword(const char *word) {
for (int i = 0; i < NUM_KEYWORDS; i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
// Function to check if a character is an operator
int is_operator(char c) {
return (c == '+' || c == '-' || c == '*' || c == '/' || c == '=' || c == '<' || c == '>');
}
// Function to handle identifier or keyword
TokenType handle_identifier_or_keyword(const char *word) {
if (is_keyword(word)) {
return KEYWORD;
} else {
return IDENTIFIER;
}
}
// Function to handle number
TokenType handle_number(const char *word) {
return NUMBER;
}
// Function to print the token type
void print_token(TokenType type, const char *lexeme) {
switch (type) {
case KEYWORD:
printf("Keyword: %s\n", lexeme);
break;
case IDENTIFIER:
printf("Identifier: %s\n", lexeme);
break;
case NUMBER:
printf("Number: %s\n", lexeme);
break;
case OPERATOR:
printf("Operator: %s\n", lexeme);
break;
case UNKNOWN:
printf("Unknown: %s\n", lexeme);
break;
case END_OF_FILE:
printf("End of File\n");
break;
}
}
// Lexical analyzer function
void lexical_analyzer(FILE *source_code) {
char c;
char lexeme[100];
int lexeme_index = 0;
TokenType current_type = UNKNOWN;
while ((c = fgetc(source_code)) != EOF) {
// Skip whitespace, tabs, and newlines
if (isspace(c)) {
if (lexeme_index > 0) {
lexeme[lexeme_index] = '\0';
if (isdigit(lexeme[0])) {
current_type = handle_number(lexeme);
} else if (isalpha(lexeme[0]) || lexeme[0] == '_') {
current_type = handle_identifier_or_keyword(lexeme);
}
print_token(current_type, lexeme);
lexeme_index = 0; // Reset lexeme
}
continue;
}
// Collect identifier or keyword characters
if (isalpha(c) || c == '_') {
lexeme[lexeme_index++] = c;
current_type = IDENTIFIER;
}
// Collect numbers
else if (isdigit(c)) {
lexeme[lexeme_index++] = c;
current_type = NUMBER
}
// Operators
else if (is_operator(c)) {
lexeme[0] = c;
lexeme[1] = '\0';
print_token(OPERATOR, lexeme);
}
// Handle unknown characters
else {
lexeme[0] = c;
lexeme[1] = '\0';
print_token(UNKNOWN, lexeme);
}
}
// Final check for any remaining lexeme
if (lexeme_index > 0) {
lexeme[lexeme_index] = '\0';
if (isdigit(lexeme[0])) {
current_type = handle_number(lexeme);
} else if (isalpha(lexeme[0]) || lexeme[0] == '_') {
current_type = handle_identifier_or_keyword(lexeme);
}
print_token(current_type, lexeme);
}
print_token(END_OF_FILE, "");
}
int main() {
FILE *source_code = fopen("source_code.txt", "r");
if (!source_code) {
printf("Error: Could not open file.\n");
return 1;
}
lexical_analyzer(source_code);
fclose(source_code);
return 0;
}
PRACTICALNo:-03
Implementation of Lexical Analyzer using Lex Tool
%{
#include <stdio.h>
#include <stdlib.h>
%}
%%
[0-9]+ { printf("NUMBER: %s\n", yytext); } // Match numbers
[+\-*/] { printf("OPERATOR: %s\n", yytext); } // Match operators
[ \t\n\r] { /* Skip whitespace */ }
. { printf("INVALID CHARACTER: %s\n", yytext); }
%%
int main() {
yylex(); // Invoke the lexer
return 0;
int yywrap() {
return 1;
PRACTICALNo:-04
Generate YACC specification for a few syntactic categories.
a) Program to recognize a valid arithmetic expression that uses operator +, – , * and /.
b) Program to recognize a valid variable which starts with a letter followed by any number of
letters or digits.
c) Implementation of Calculator using LEX and YACC
(C)Implementation of Calculator using LEX and YACC
YACC Code:-
%{
#include <stdio.h>
#include <stdlib.h>
%}
// Declare the types of tokens
%union {
int val; // To store values for numbers
}
%token <val> NUMBER
%token PLUS MINUS MUL DIV LPAREN RPAREN
%type <val> expr
%left PLUS MINUS
%left MUL DIV
%%
// Grammar rules and actions
input:
/* empty */
| input expr '\n' { printf("Result = %d\n", $2); }
;
expr:
expr PLUS expr { $$ = $1 + $3; }
| expr MINUS expr { $$ = $1 - $3; }
| expr MUL expr { $$ = $1 * $3; }
| expr DIV expr { $$ = $1 / $3; }
| LPAREN expr RPAREN { $$ = $2; }
| NUMBER { $$ = $1; }
;
%%
int main() {
printf("Enter an expression: ");
yyparse(); // Start parsing
return 0;
}
int yyerror(char *s) {
printf("Error: %s\n", s);
return 0;
}
Lex Code:-
%{
#include "y.tab.h" // Include YACC header to use token definitions
%}
digit [0-9]
%%
[ \t\n] ; // Ignore whitespaces and newlines
{digit}+ { yylval = atoi(yytext); return NUMBER; } // Recognize numbers
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MUL; }
"/" { return DIV; }
"(" { return LPAREN; }
")" { return RPAREN; }
%%
int yywrap() {
return 1;
}
PRACTICALNo:-05
Program to recognize a valid arithmetic expression that uses
operator +, – , * and /.c
A Lex file (expr.l) for tokenizing input.
%{
#include "y.tab.h"
%}
%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[ \t\n]+ ; // Ignore whitespace
[\+\-\*/] { return *yytext; }
. { return yytext[0]; }
%%
A Yacc file (expr.y) for parsing and validating syntax.
%{
#include <stdio.h>
#include <stdlib.h>
void yyerror(const char *s);
int yylex(void);
%}
%token NUMBER
%left '+' '-'
%left '*' '/'
%%
expr:
expr '+' expr
| expr '-' expr
| expr '*' expr
| expr '/' expr
| NUMBER
;
%%
void yyerror(const char *s) {
printf("Invalid expression\n");
}
int main() {
printf("Enter an arithmetic expression: ");
if (yyparse() == 0) {
printf("Valid expression\n");
}
return 0;
}
PRACTICALNo:-06
Write program to convert NFA with ε transition to NFA without ε
transition.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_STATES 100
#define MAX_SYMBOLS 10
// Data structures for the NFA
typedef struct {
int transitions[MAX_STATES][MAX_SYMBOLS];
int epsilon_transitions[MAX_STATES][MAX_STATES];
int start_state;
int accept_states[MAX_STATES];
int state_count;
int symbol_count;
} NFAWithEpsilon;
typedef struct {
int transitions[MAX_STATES][MAX_SYMBOLS];
int start_state;
int accept_states[MAX_STATES];
int state_count;
int symbol_count;
} NFAWithoutEpsilon;
// Function to calculate epsilon closure for a state
void epsilon_closure(NFAWithEpsilon *nfa, int state, int *closure) {
int stack[MAX_STATES], top = -1;
closure[state] = 1;
stack[++top] = state;
while (top >= 0) {
int current = stack[top--];
for (int next_state = 0; next_state < nfa->state_count; next_state++) {
if (nfa->epsilon_transitions[current][next_state] && !closure[next_state]) {
closure[next_state] = 1;
stack[++top] = next_state;
}
}
}
}
// Function to convert NFA with epsilon transitions to NFA without epsilon transitions
void convert_to_nfa_without_epsilon(NFAWithEpsilon *nfa, NFAWithoutEpsilon *new_nfa) {
int epsilon_closures[MAX_STATES][MAX_STATES] = {0}; // Stores epsilon closures
// Compute epsilon closures for all states
for (int state = 0; state < nfa->state_count; state++) {
epsilon_closure(nfa, state, epsilon_closures[state]);
}
new_nfa->state_count = nfa->state_count;
new_nfa->symbol_count = nfa->symbol_count;
new_nfa->start_state = nfa->start_state;
memset(new_nfa->accept_states, 0, sizeof(new_nfa->accept_states));
// Recalculate transitions for the new NFA
for (int state = 0; state < nfa->state_count; state++) {
for (int symbol = 0; symbol < nfa->symbol_count; symbol++) {
for (int substate = 0; substate < nfa->state_count; substate++) {
if (epsilon_closures[state][substate] && nfa->transitions[substate][symbol]) {
for (int next_state = 0; next_state < nfa->state_count; next_state++) {
if (nfa->transitions[substate][symbol] & (1 << next_state)) {
new_nfa->transitions[state][symbol] |= (1 << next_state);
}
}
}
}
}
}
// New accept states are any state that has an accept state in its epsilon closure
for (int state = 0; state < nfa->state_count; state++) {
for (int closure_state = 0; closure_state < nfa->state_count; closure_state++) {
if (epsilon_closures[state][closure_state] && nfa->accept_states[closure_state]) {
new_nfa->accept_states[state] = 1;
break;
}
}
}
}
// Function to display the NFA without epsilon transitions
void display_nfa_without_epsilon(NFAWithoutEpsilon *nfa) {
printf("Start State: %d\n", nfa->start_state);
printf("Accept States: ");
for (int i = 0; i < nfa->state_count; i++) {
if (nfa->accept_states[i]) {
printf("%d ", i);
}
}
printf("\n");
printf("Transitions:\n");
for (int state = 0; state < nfa->state_count; state++) {
for (int symbol = 0; symbol < nfa->symbol_count; symbol++) {
if (nfa->transitions[state][symbol]) {
printf("State %d on input %d -> ", state, symbol);
for (int next_state = 0; next_state < nfa->state_count; next_state++) {
if (nfa->transitions[state][symbol] & (1 << next_state)) {
printf("%d ", next_state);
}
}
printf("\n");
}
}
}
}
int main() {
NFAWithEpsilon nfa;
NFAWithoutEpsilon new_nfa;
// Define NFA with epsilon transitions
nfa.state_count = 3;
nfa.symbol_count = 2;
nfa.start_state = 0;
memset(nfa.accept_states, 0, sizeof(nfa.accept_states));
// Define the transitions (Use bitwise representation to store transitions)
nfa.transitions[0][0] = 1 << 1; // q0 -> q1 on 'a'
nfa.transitions[1][1] = 1 << 2; // q1 -> q2 on 'b'
nfa.transitions[2][0] = 1 << 1; // q2 -> q1 on 'a'
// Define epsilon transitions (Use bitwise representation to store epsilon transitions)
nfa.epsilon_transitions[0][2] = 1; // q0 -> q2 on epsilon
nfa.epsilon_transitions[1][2] = 1; // q1 -> q2 on epsilon
// Define accepting states
nfa.accept_states[2] = 1; // q2 is accepting
// Convert to NFA without epsilon transitions
convert_to_nfa_without_epsilon(&nfa, &new_nfa);
// Display the result
display_nfa_without_epsilon(&new_nfa);
return 0;
}
PRACTICALNo:-07
NFA to DFA Conversion Program in C.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_STATES 10
#define MAX_SYMBOLS 2 // Assume 'a' and 'b'
int nfa[MAX_STATES][MAX_SYMBOLS][MAX_STATES]; // nfa[state][symbol][next_states]
int nfa_num_states, dfa_num_states = 0;
int visited[1 << MAX_STATES];
int dfa[1 << MAX_STATES][MAX_SYMBOLS];
void add_transition(int state, int symbol, int next_state) {
nfa[state][symbol][next_state] = 1;
}
int is_state_in_set(int state_set, int state) {
return state_set & (1 << state);
}
int get_next_state_set(int state_set, int symbol) {
int next_state_set = 0;
for (int i = 0; i < nfa_num_states; i++) {
if (is_state_in_set(state_set, i)) {
for (int j = 0; j < nfa_num_states; j++) {
if (nfa[i][symbol][j])
next_state_set |= (1 << j);
}
}
}
return next_state_set;
}
void convert_nfa_to_dfa() {
int queue[1 << MAX_STATES], front = 0, rear = 0;
queue[rear++] = 1 << 0; // Start state is 0
visited[1 << 0] = 1;
while (front < rear) {
int current_set = queue[front++];
for (int symbol = 0; symbol < MAX_SYMBOLS; symbol++) {
int next_set = get_next_state_set(current_set, symbol);
dfa[current_set][symbol] = next_set;
if (!visited[next_set]) {
visited[next_set] = 1;
queue[rear++] = next_set;
}
}
}
dfa_num_states = rear;
}
void print_dfa() {
printf("\nDFA Transitions (state numbers represent NFA state sets in bitmask):\n");
for (int i = 0; i < (1 << nfa_num_states); i++) {
if (visited[i]) {
printf("From state %2d (", i);
for (int k = 0; k < nfa_num_states; k++)
if (i & (1 << k)) printf("%d", k);
printf("): ");
for (int j = 0; j < MAX_SYMBOLS; j++) {
printf(" on '%c' -> %2d (", 'a' + j, dfa[i][j]);
for (int k = 0; k < nfa_num_states; k++)
if (dfa[i][j] & (1 << k)) printf("%d", k);
printf(") ");
}
printf("\n");
}
}
}
int main() {
int num_transitions, from, to;
char symbol;
printf("Enter number of NFA states: ");
scanf("%d", &nfa_num_states);
printf("Enter number of transitions: ");
scanf("%d", &num_transitions);
printf("Enter transitions in the form: from_state symbol to_state\n");
for (int i = 0; i < num_transitions; i++) {
scanf("%d %c %d", &from, &symbol, &to);
add_transition(from, symbol - 'a', to);
}
convert_nfa_to_dfa();
print_dfa();
return 0;
}
PRACTICALNo:-08
Write program to find Simulate First and Follow of any given
grammar
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX 20
char production[MAX][MAX];
char first[MAX][MAX], follow[MAX][MAX];
int numProductions;
char nonTerminals[MAX];
int numNonTerminals = 0;
int isNonTerminal(char c) {
return (c >= 'A' && c <= 'Z');
}
int isTerminal(char c) {
return (c >= 'a' && c <= 'z');
}
int indexOfNonTerminal(char c) {
for (int i = 0; i < numNonTerminals; i++) {
if (nonTerminals[i] == c)
return i;
}
return -1;
}
void addToSet(char *set, char c) {
if (strchr(set, c) == NULL) {
int len = strlen(set);
set[len] = c;
set[len + 1] = '\0';
}
}
void computeFirst(char symbol, char *resultSet);
void firstOfProduction(char *prodBody, char *resultSet) {
if (*prodBody == '\0') return;
if (!isNonTerminal(*prodBody)) {
addToSet(resultSet, *prodBody);
} else {
char tempSet[MAX] = "";
computeFirst(*prodBody, tempSet);
for (int i = 0; tempSet[i]; i++) {
if (tempSet[i] != '#')
addToSet(resultSet, tempSet[i]);
}
if (strchr(tempSet, '#')) {
firstOfProduction(prodBody + 1, resultSet);
}
}
}
void computeFirst(char symbol, char *resultSet) {
int idx = indexOfNonTerminal(symbol);
for (int i = 0; i < numProductions; i++) {
if (production[i][0] == symbol) {
if (production[i][3] == '#') {
addToSet(resultSet, '#');
} else {
firstOfProduction(&production[i][3], resultSet);
}
}
}
}
void computeFollow(char symbol, char *resultSet) {
if (symbol == production[0][0]) {
addToSet(resultSet, '$'); // start symbol
}
for (int i = 0; i < numProductions; i++) {
char *body = &production[i][3];
for (int j = 0; body[j]; j++) {
if (body[j] == symbol) {
if (body[j + 1] != '\0') {
char tempSet[MAX] = "";
firstOfProduction(&body[j + 1], tempSet);
for (int k = 0; tempSet[k]; k++) {
if (tempSet[k] != '#') {
addToSet(resultSet, tempSet[k]);
}
}
if (strchr(tempSet, '#')) {
if (production[i][0] != symbol) {
computeFollow(production[i][0], resultSet);
}
}
} else if (production[i][0] != symbol) {
computeFollow(production[i][0], resultSet);
}
}
}
}
}
int main() {
printf("Enter number of productions: ");
scanf("%d", &numProductions);
getchar();
printf("Enter productions (e.g., A->aB or A-># for epsilon):\n");
for (int i = 0; i < numProductions; i++) {
fgets(production[i], MAX, stdin);
production[i][strcspn(production[i], "\n")] = '\0';
char nt = production[i][0];
if (indexOfNonTerminal(nt) == -1) {
nonTerminals[numNonTerminals++] = nt;
}
}
printf("\n--- FIRST Sets ---\n");
for (int i = 0; i < numNonTerminals; i++) {
char tempSet[MAX] = "";
computeFirst(nonTerminals[i], tempSet);
printf("FIRST(%c) = { ", nonTerminals[i]);
for (int j = 0; tempSet[j]; j++) {
printf("%c ", tempSet[j]);
}
printf("}\n");
strcpy(first[i], tempSet);
}
printf("\n--- FOLLOW Sets ---\n");
for (int i = 0; i < numNonTerminals; i++) {
char tempSet[MAX] = "";
computeFollow(nonTerminals[i], tempSet);
printf("FOLLOW(%c) = { ", nonTerminals[i]);
for (int j = 0; tempSet[j]; j++) {
printf("%c ", tempSet[j]);
}
printf("}\n");
strcpy(follow[i], tempSet);
}
return 0;
}
PRACTICALNo:-09
Write a program to perform constant propagation.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX 100
typedef struct {
char var[20];
int value;
int isConstant;
} Symbol;
Symbol table[MAX];
int symbolCount = 0;
int isNumber(char *s) {
for (int i = 0; s[i]; i++)
if (!isdigit(s[i])) return 0;
return 1;
}
int findSymbol(char *name) {
for (int i = 0; i < symbolCount; i++)
if (strcmp(table[i].var, name) == 0) return i;
return -1;
}
void updateSymbol(char *name, int value, int isConst) {
int idx = findSymbol(name);
if (idx == -1) {
strcpy(table[symbolCount].var, name);
table[symbolCount].value = value;
table[symbolCount].isConstant = isConst;
symbolCount++;
} else {
table[idx].value = value;
table[idx].isConstant = isConst;
}
}
int evaluate(char *left, char *op, char *right, int *result) {
int lval, rval;
int lidx = findSymbol(left), ridx = findSymbol(right);
if (isNumber(left)) {
lval = atoi(left);
} else if (lidx != -1 && table[lidx].isConstant) {
lval = table[lidx].value;
} else {
return 0;
}
if (isNumber(right)) {
rval = atoi(right);
} else if (ridx != -1 && table[ridx].isConstant) {
rval = table[ridx].value;
} else {
return 0;
}
if (strcmp(op, "+") == 0) *result = lval + rval;
else if (strcmp(op, "-") == 0) *result = lval - rval;
else if (strcmp(op, "*") == 0) *result = lval * rval;
else if (strcmp(op, "/") == 0) *result = rval ? lval / rval : 0;
else return 0;
return 1;
}
void propagateConstant(char *line) {
char lhs[20], op[5], rhs1[20], rhs2[20];
int value;
if (sscanf(line, "%s = %s %s %s;", lhs, rhs1, op, rhs2) == 4) {
if (evaluate(rhs1, op, rhs2, &value)) {
updateSymbol(lhs, value, 1);
printf("%s = %d;\n", lhs, value);
} else {
updateSymbol(lhs, 0, 0);
printf("%s = %s %s %s;\n", lhs, rhs1, op, rhs2);
}
} else if (sscanf(line, "%s = %s;", lhs, rhs1) == 2) {
if (isNumber(rhs1)) {
updateSymbol(lhs, atoi(rhs1), 1);
printf("%s = %d;\n", lhs, atoi(rhs1));
} else {
int idx = findSymbol(rhs1);
if (idx != -1 && table[idx].isConstant) {
updateSymbol(lhs, table[idx].value, 1);
printf("%s = %d;\n", lhs, table[idx].value);
} else {
updateSymbol(lhs, 0, 0);
printf("%s = %s;\n", lhs, rhs1);
}
}
}
}
int main() {
char line[100];
printf("Enter statements (end with empty line):\n");
while (1) {
fgets(line, sizeof(line), stdin);
if (strcmp(line, "\n") == 0) break;
propagateConstant(line);
}
return 0;
}
Input
a = 5;
b = a + 2;
c = b * 3;
Output
a = 5;
b = 7;
c = 21;
PRACTICALNo:-10
#include <stdio.h>
#include <ctype.h>
#include <string.h>
int tempCount = 1;
// Generate a new temporary variable name
char* newTemp() {
static char temp[5];
sprintf(temp, "t%d", tempCount++);
return temp;
}
// Simple structure to simulate intermediate code generation
void generateIC(char expr[]) {
char lhs, op, op1, op2;
int i = 0;
while (expr[i] == ' ') i++; // skip whitespace
lhs = expr[i++]; // get left-hand variable
while (expr[i] == ' ' || expr[i] == '=') i++; // skip = and whitespace
op1 = expr[i++];
while (expr[i] == ' ') i++;
op = expr[i++];
while (expr[i] == ' ') i++;
op2 = expr[i++];
// Respect operator precedence: * and / before + and -
if (op == '*' || op == '/') {
char* t1 = newTemp();
printf("%s = %c %c %c\n", t1, op1, op, op2);
printf("%c = %c + %s\n", lhs, lhs, t1); // very simplified assumption
} else {
char* t1 = newTemp();
printf("%s = %c %c %c\n", t1, op1, op, op2);
printf("%c = %s\n", lhs, t1);
}
}
int main() {
char expr[50];
printf("Enter an expression (e.g., a = b + c * d):\n");
fgets(expr, sizeof(expr), stdin);
generateIC(expr);
return 0;
}
Input
a=b+c*d
Output
t1 = c * d
t2 = b + t1
a = t2