asm_parser/asm_parser.c

350 lines
11 KiB
C
Raw Normal View History

/***************************************************************************
* file name : asm_parser.c *
* author : *
* description : the functions are declared in asm_parser.h *
* The intention of this library is to parse a .ASM file *
* *
* *
***************************************************************************
*
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "asm_parser.h"
2024-10-30 16:31:10 +08:00
int read_asm_file(char *filename, char program[ROWS][COLS]) {
FILE *file = fopen(filename, "r");
if (!file) {
printf("Error opening file %s\n", filename);
return -1;
}
int line_num = 0;
while (fgets(program[line_num], COLS, file) != NULL && line_num < ROWS) {
// Remove trailing newline
size_t len = strlen(program[line_num]);
if (len > 0 && program[line_num][len - 1] == '\n') {
program[line_num][len - 1] = '\0';
len--;
}
// Remove comments starting with ';' or '#'
char *comment = strchr(program[line_num], ';');
if (comment != NULL) {
*comment = '\0';
len = comment - program[line_num];
}
comment = strchr(program[line_num], '#');
if (comment != NULL) {
*comment = '\0';
len = comment - program[line_num];
}
// Trim trailing whitespace
while (len > 0 && isspace(program[line_num][len - 1])) {
program[line_num][--len] = '\0';
}
// Skip empty lines
if (len == 0) {
continue;
}
line_num++;
}
fclose(file);
return line_num;
}
int parse_instruction(char *instr, char *instr_bin_str) {
// Remove leading whitespace
while (isspace(*instr)) instr++;
if (*instr == '\0') {
// Empty line
return 0;
}
// Tokenize the instruction
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t");
if (token == NULL) {
// Empty or invalid instruction
return -1;
}
to_uppercase(token);
if (strcmp(token, "ADD") == 0) {
return parse_add(instr, instr_bin_str);
} else if (strcmp(token, "MUL") == 0) {
return parse_mul(instr, instr_bin_str);
} else if (strcmp(token, "SUB") == 0) {
return parse_sub(instr, instr_bin_str);
} else if (strcmp(token, "DIV") == 0) {
return parse_div(instr, instr_bin_str);
} else if (strcmp(token, "AND") == 0) {
return parse_and(instr, instr_bin_str);
} else if (strcmp(token, "OR") == 0) {
return parse_or(instr, instr_bin_str);
} else if (strcmp(token, "XOR") == 0) {
return parse_xor(instr, instr_bin_str);
}
// Add other instructions here
else {
printf("Unknown instruction: %s\n", token);
return -1;
}
}
int parse_reg(char *reg_str) {
if (reg_str[0] != 'R' && reg_str[0] != 'r') {
printf("Invalid register: %s\n", reg_str);
return -1;
}
int reg_num = atoi(&reg_str[1]);
if (reg_num < 0 || reg_num > 7) {
printf("Invalid register number: %d\n", reg_num);
return -1;
}
return reg_num;
}
void int_to_bin_str(int num, int bits, char *bin_str) {
bin_str[bits] = '\0';
for (int i = bits - 1; i >= 0; i--) {
bin_str[i] = (num % 2) + '0';
num /= 2;
}
}
void to_uppercase(char *str) {
for (; *str; ++str) {
*str = toupper(*str);
}
}
int parse_add(char *instr, char *instr_bin_str) {
// Instruction format: ADD Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'ADD'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_mul(char *instr, char *instr_bin_str) {
// Instruction format: MUL Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'MUL'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s001%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_sub(char *instr, char *instr_bin_str) {
// Instruction format: SUB Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'SUB'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_div(char *instr, char *instr_bin_str) {
// Instruction format: DIV Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'DIV'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_and(char *instr, char *instr_bin_str) {
// Instruction format: AND Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'AND'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_or(char *instr, char *instr_bin_str) {
// Instruction format: OR Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'OR'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_xor(char *instr, char *instr_bin_str) {
// Instruction format: XOR Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'XOR'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt);
return 1;
}
unsigned short int str_to_bin(char *instr_bin_str) {
unsigned short int result = 0;
for (int i = 0; i < 16; i++) {
result <<= 1;
if (instr_bin_str[i] == '1') {
result |= 1;
} else if (instr_bin_str[i] != '0') {
// Invalid character
printf("Invalid binary string: %s\n", instr_bin_str);
return 0;
}
}
return result;
}
int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count) {
FILE *file = fopen(filename, "wb");
if (!file) {
printf("Error opening file %s for writing\n", filename);
return -1;
}
// Write the code header: xCADE, address (start at 0), n (instr_count)
unsigned short int header[3];
header[0] = 0xCADE;
header[1] = 0x0000; // Starting address, assume 0 for now
header[2] = instr_count;
fwrite(header, sizeof(unsigned short int), 3, file);
// Write the instructions
fwrite(program_bin, sizeof(unsigned short int), instr_count, file);
fclose(file);
return 0;
}
/* to do - implement all the functions in asm_parser.h */