asm_parser/asm_parser.c

1043 lines
25 KiB
C
Raw Normal View History

/***************************************************************************
* file name : asm_parser.c *
* author : *
* description : the functions are declared in asm_parser.h *
* The intention of this library is to parse a .ASM file *
* *
* *
***************************************************************************
*
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "asm_parser.h"
2024-10-31 11:46:14 +08:00
#include <stdint.h>
2024-10-30 16:31:10 +08:00
int read_asm_file(char *filename, char program[ROWS][COLS]) {
FILE *file = fopen(filename, "r");
if (!file) {
2024-10-30 17:34:54 +08:00
printf("error: read_asm_file failed\n");
return 2;
2024-10-30 16:31:10 +08:00
}
int line_num = 0;
2024-10-31 11:46:14 +08:00
while (fgets(program[line_num], COLS, file) != NULL) {
2024-10-30 16:31:10 +08:00
// Remove trailing newline
size_t len = strlen(program[line_num]);
if (len > 0 && program[line_num][len - 1] == '\n') {
program[line_num][len - 1] = '\0';
len--;
}
2024-10-31 11:46:14 +08:00
trim(program[line_num]);
if (strlen(program[line_num]) == 0) {
continue;
}
2024-10-30 16:31:10 +08:00
// Remove comments starting with ';' or '#'
char *comment = strchr(program[line_num], ';');
if (comment != NULL) {
*comment = '\0';
len = comment - program[line_num];
}
2024-10-31 11:46:14 +08:00
// /* Here, we remove this code, because it may cause immediate value cannot be detected */
// comment = strchr(program[line_num], '#');
// if (comment != NULL) {
// *comment = '\0';
// len = comment - program[line_num];
// }
2024-10-30 16:31:10 +08:00
while (len > 0 && isspace(program[line_num][len - 1])) {
program[line_num][--len] = '\0';
}
// Skip empty lines
if (len == 0) {
continue;
}
line_num++;
2024-10-31 11:46:14 +08:00
if (line_num >= ROWS) {
printf("error: read_asm_file failed - file too large\n");
fclose(file);
return 2;
}
}
if (!feof(file)) {
printf("error: read_asm_file failed - could not read the file completely\n");
fclose(file);
return 2;
2024-10-30 16:31:10 +08:00
}
fclose(file);
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
int parse_instruction(char *instr, char *instr_bin_str) {
// Remove leading whitespace
while (isspace(*instr)) instr++;
if (*instr == '\0') {
// Empty line
return 0;
}
2024-10-31 11:46:14 +08:00
trim(instr);
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces
while (isspace(*p)) p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
if (len > 0) {
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
// Skip commas
int comma_count = 0;
while (*p != '\0' && (isspace(*p) || *p == ',')) {
if (*p == ',') {
comma_count++;
}
p++;
}
if (comma_count > 1) {
printf("error: parse_instruction failed - too many commas\n");
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 3;
}
}
if (token_count == 0) {
2024-10-30 17:34:54 +08:00
printf("error: parse_instruction failed\n");
return 3;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
// Convert opcode to uppercase
to_uppercase(tokens[0]);
// Now tokens[0] is opcode, tokens[1..] are operands
if (strcmp(tokens[0], "ADD") == 0) {
int ret = parse_add(instr, instr_bin_str);
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else if (strcmp(tokens[0], "MUL") == 0) {
int ret = parse_mul(instr, instr_bin_str);
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
}
// Handle other opcodes similarly
else if (strcmp(tokens[0], "SUB") == 0) {
int ret = parse_sub(instr, instr_bin_str);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else if (strcmp(tokens[0], "DIV") == 0) {
int ret = parse_div(instr, instr_bin_str);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else if (strcmp(tokens[0], "AND") == 0) {
int ret = parse_and(instr, instr_bin_str);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else if (strcmp(tokens[0], "OR") == 0) {
int ret = parse_or(instr, instr_bin_str);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else if (strcmp(tokens[0], "XOR") == 0) {
int ret = parse_xor(instr, instr_bin_str);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return ret;
} else {
2024-10-30 17:34:54 +08:00
printf("error: parse_instruction failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 3;
2024-10-30 16:31:10 +08:00
}
}
2024-10-31 11:46:14 +08:00
void trim(char *str) {
// Remove leading spaces
char *start = str;
while (isspace(*start)) start++;
memmove(str, start, strlen(start) + 1);
// Remove trailing spaces
char *end = str + strlen(str) - 1;
while (end > str && isspace(*end)) end--;
*(end + 1) = '\0';
}
2024-10-30 17:34:54 +08:00
int parse_reg(char reg_num, char *instr_bin_str) {
if (reg_num < '0' || reg_num > '7') {
printf("error: parse_reg failed\n");
return 5;
2024-10-30 16:31:10 +08:00
}
2024-10-30 17:34:54 +08:00
int reg = reg_num - '0';
char bin_str[4];
int_to_bin_str(reg, 3, bin_str);
strcat(instr_bin_str, bin_str);
return 0;
2024-10-30 16:31:10 +08:00
}
void int_to_bin_str(int num, int bits, char *bin_str) {
bin_str[bits] = '\0';
for (int i = bits - 1; i >= 0; i--) {
2024-10-31 11:46:14 +08:00
bin_str[i] = ((num >> (bits - 1 - i)) & 1) + '0';
2024-10-30 16:31:10 +08:00
}
}
void to_uppercase(char *str) {
for (; *str; ++str) {
*str = toupper(*str);
}
}
int parse_add(char *instr, char *instr_bin_str) {
// Opcode: 0001
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0001"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
printf("error: parse_add() failed, token_count = %d\n", token_count);
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_add() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_add() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_add() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_add() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rt or Immediate
if (tokens[3][0] == 'R') {
// Register
strcat(instr_bin_str, "000"); // Bit[5] = 0
char rt_num = tokens[3][1];
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_add() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
} else {
// Immediate
int imm_value;
ret = parse_imm5(tokens[3], &imm_value);
if (ret != 0) {
printf("error: parse_add() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
strcat(instr_bin_str, "1"); // Bit[5] = 1
// Append imm5 bits
char imm_bits[6];
int_to_bin_str(imm_value & 0x1F, 5, imm_bits);
strcat(instr_bin_str, imm_bits);
2024-10-30 17:34:54 +08:00
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
2024-10-30 17:34:54 +08:00
}
2024-10-31 11:46:14 +08:00
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
int parse_mul(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Similar to parse_add, but sub-opcode is '001'
2024-10-30 16:31:10 +08:00
// Opcode: 0001
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0001"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
printf("error: parse_mul() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Sub-opcode for MUL
strcat(instr_bin_str, "001");
// Parse Rt
if (tokens[3][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rt_num = tokens[3][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_mul() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
int parse_imm5(char *imm_str, int *imm_value) {
int value = 0;
if (imm_str[0] == '#') {
// Decimal immediate
value = atoi(&imm_str[1]);
} else if (imm_str[0] == 'x') {
// Hex immediate
sscanf(&imm_str[1], "%x", &value);
} else if (imm_str[0] == '0' && imm_str[1] == 'x') {
// Hex immediate
sscanf(&imm_str[2], "%x", &value);
} else {
printf("error: invalid immediate value\n");
return 4;
}
// Check if value fits in signed 5-bit
if (value < -16 || value > 15) {
printf("error: immediate value out of range\n");
return 4;
}
*imm_value = value;
return 0;
}
// Function to parse SUB instruction
2024-10-30 16:31:10 +08:00
int parse_sub(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Opcode: 0001
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0001"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
printf("error: parse_sub() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Sub-opcode for SUB
strcat(instr_bin_str, "010");
// Parse Rt
if (tokens[3][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rt_num = tokens[3][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_sub() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
// Function to parse DIV instruction
2024-10-30 16:31:10 +08:00
int parse_div(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Opcode: 0001
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0001"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
printf("error: parse_div() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Sub-opcode for DIV
strcat(instr_bin_str, "011");
// Parse Rt
if (tokens[3][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rt_num = tokens[3][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_div() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
// Function to parse AND instruction
2024-10-30 16:31:10 +08:00
int parse_and(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Opcode: 0101
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0101"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
2024-10-30 17:34:54 +08:00
}
2024-10-31 11:46:14 +08:00
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_and() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_and() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rd_num = tokens[1][1];
int ret = parse_reg(rd_num, instr_bin_str); // Rd
2024-10-30 17:34:54 +08:00
if (ret != 0) {
printf("error: parse_and() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_and() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
ret = parse_reg(rs_num, instr_bin_str); // Rs
2024-10-30 17:34:54 +08:00
if (ret != 0) {
printf("error: parse_and() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rt or Immediate
if (tokens[3][0] == 'R') {
// Register
strcat(instr_bin_str, "000"); // Bit[5] = 0
char rt_num = tokens[3][1];
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_and() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
} else {
// Immediate
int imm_value;
ret = parse_imm5(tokens[3], &imm_value);
if (ret != 0) {
printf("error: parse_and() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
strcat(instr_bin_str, "1"); // Bit[5] = 1
// Append imm5 bits
char imm_bits[6];
int_to_bin_str(imm_value & 0x1F, 5, imm_bits);
strcat(instr_bin_str, imm_bits);
}
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
// Function to parse OR instruction
2024-10-30 16:31:10 +08:00
int parse_or(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Opcode: 0101
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0101"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
printf("error: parse_or() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Sub-opcode for OR
strcat(instr_bin_str, "010");
// Parse Rt
if (tokens[3][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rt_num = tokens[3][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_or() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
// Function to parse XOR instruction
2024-10-30 16:31:10 +08:00
int parse_xor(char *instr, char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
// Opcode: 0101
2024-10-30 17:34:54 +08:00
strcpy(instr_bin_str, "0101"); // Opcode
2024-10-31 11:46:14 +08:00
// Tokenize the instruction line
2024-10-30 16:31:10 +08:00
char instr_copy[COLS];
strcpy(instr_copy, instr);
2024-10-31 11:46:14 +08:00
char *tokens[10];
int token_count = 0;
char *p = instr_copy;
while (*p != '\0') {
// Skip spaces and commas
while (isspace(*p) || *p == ',') p++;
if (*p == '\0') break;
// Collect token
char *start = p;
while (*p != '\0' && !isspace(*p) && *p != ',') p++;
size_t len = p - start;
tokens[token_count] = (char *)malloc(len + 1);
strncpy(tokens[token_count], start, len);
tokens[token_count][len] = '\0';
token_count++;
}
if (token_count != 4) {
2024-10-30 17:34:54 +08:00
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Enforce uppercase for operands
to_uppercase(tokens[1]);
to_uppercase(tokens[2]);
to_uppercase(tokens[3]);
// Parse Rd
if (tokens[1][0] != 'R') {
printf("error: parse_xor() failed\n");
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
return 4;
}
char rd_num = tokens[1][1];
2024-10-30 17:34:54 +08:00
int ret = parse_reg(rd_num, instr_bin_str); // Rd
if (ret != 0) {
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Parse Rs
if (tokens[2][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rs_num = tokens[2][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rs_num, instr_bin_str); // Rs
if (ret != 0) {
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Sub-opcode for XOR
strcat(instr_bin_str, "011");
// Parse Rt
if (tokens[3][0] != 'R') {
2024-10-30 17:34:54 +08:00
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
char rt_num = tokens[3][1];
2024-10-30 17:34:54 +08:00
ret = parse_reg(rt_num, instr_bin_str); // Rt
if (ret != 0) {
printf("error: parse_xor() failed\n");
2024-10-31 11:46:14 +08:00
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 4;
}
2024-10-31 11:46:14 +08:00
// Free allocated memory
for (int i = 0; i < token_count; i++) {
free(tokens[i]);
}
2024-10-30 17:34:54 +08:00
return 0;
2024-10-30 16:31:10 +08:00
}
2024-10-31 11:46:14 +08:00
void write_uint16_big_endian(FILE *file, uint16_t value) {
uint8_t bytes[2];
bytes[0] = (value >> 8) & 0xFF; // High byte
bytes[1] = value & 0xFF; // Low byte
fwrite(bytes, sizeof(uint8_t), 2, file);
}
2024-10-30 16:31:10 +08:00
unsigned short int str_to_bin(char *instr_bin_str) {
2024-10-31 11:46:14 +08:00
if (strlen(instr_bin_str) != 16) {
printf("error: str_to_bin failed, invalid length: %s\n", instr_bin_str);
return 6;
}
2024-10-30 16:31:10 +08:00
unsigned short int result = 0;
for (int i = 0; i < 16; i++) {
result <<= 1;
if (instr_bin_str[i] == '1') {
result |= 1;
} else if (instr_bin_str[i] != '0') {
// Invalid character
2024-10-30 17:34:54 +08:00
printf("error: str_to_bin failed\n");
return 6;
2024-10-30 16:31:10 +08:00
}
}
return result;
}
int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count) {
FILE *file = fopen(filename, "wb");
if (!file) {
2024-10-30 17:34:54 +08:00
printf("error: write_obj_file failed\n");
return 7;
2024-10-30 16:31:10 +08:00
}
// Write the code header: xCADE, address (start at 0), n (instr_count)
unsigned short int header[3];
header[0] = 0xCADE;
header[1] = 0x0000; // Starting address, assume 0 for now
header[2] = instr_count;
2024-10-31 11:46:14 +08:00
// Write header in big-endian order
write_uint16_big_endian(file, header[0]);
write_uint16_big_endian(file, header[1]);
write_uint16_big_endian(file, header[2]);
// Write the instructions in big-endian order
for (int i = 0; i < instr_count; i++) {
write_uint16_big_endian(file, program_bin[i]);
}
2024-10-30 16:31:10 +08:00
fclose(file);
return 0;
2024-10-30 17:34:54 +08:00
}