From ebc3c88a36d96997250e1abd689ae1b7120bc29b Mon Sep 17 00:00:00 2001 From: feng-arch Date: Thu, 31 Oct 2024 11:46:14 +0800 Subject: [PATCH] fix some bug about double comma --- Makefile | 3 +- asm_parser.c | 852 +++++++++++++++++++++++++++++++++++++++++++-------- asm_parser.h | 5 +- assembler.c | 9 +- test1.asm | 8 +- 5 files changed, 733 insertions(+), 144 deletions(-) diff --git a/Makefile b/Makefile index 4a3a8a8..ce9d6ac 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = gcc -CFLAGS = -Wall -Wextra -I. +CFLAGS = -Wall -Wextra -I. -g SRC_ASSEMBLER = assembler.c SRC_ASM_PARSER = asm_parser.c OBJ_ASM_PARSER = asm_parser.o @@ -15,6 +15,7 @@ asm_parser.o: $(SRC_ASM_PARSER) clean: rm -f $(OBJ_ASM_PARSER) + clobber: clean rm -f $(TARGET) rm -f *~ diff --git a/asm_parser.c b/asm_parser.c index e612e15..91faefc 100644 --- a/asm_parser.c +++ b/asm_parser.c @@ -13,6 +13,8 @@ #include #include #include "asm_parser.h" +#include + int read_asm_file(char *filename, char program[ROWS][COLS]) { FILE *file = fopen(filename, "r"); @@ -21,25 +23,32 @@ int read_asm_file(char *filename, char program[ROWS][COLS]) { return 2; } int line_num = 0; - while (fgets(program[line_num], COLS, file) != NULL && line_num < ROWS) { + while (fgets(program[line_num], COLS, file) != NULL) { // Remove trailing newline size_t len = strlen(program[line_num]); if (len > 0 && program[line_num][len - 1] == '\n') { program[line_num][len - 1] = '\0'; len--; } + + trim(program[line_num]); + + if (strlen(program[line_num]) == 0) { + continue; + } // Remove comments starting with ';' or '#' char *comment = strchr(program[line_num], ';'); if (comment != NULL) { *comment = '\0'; len = comment - program[line_num]; } - comment = strchr(program[line_num], '#'); - if (comment != NULL) { - *comment = '\0'; - len = comment - program[line_num]; - } - // Trim trailing whitespace + + // /* Here, we remove this code, because it may cause immediate value cannot be detected */ + // comment = strchr(program[line_num], '#'); + // if (comment != NULL) { + // *comment = '\0'; + // len = comment - program[line_num]; + // } while (len > 0 && isspace(program[line_num][len - 1])) { program[line_num][--len] = '\0'; } @@ -48,6 +57,16 @@ int read_asm_file(char *filename, char program[ROWS][COLS]) { continue; } line_num++; + if (line_num >= ROWS) { + printf("error: read_asm_file failed - file too large\n"); + fclose(file); + return 2; + } + } + if (!feof(file)) { + printf("error: read_asm_file failed - could not read the file completely\n"); + fclose(file); + return 2; } fclose(file); return 0; @@ -60,38 +79,124 @@ int parse_instruction(char *instr, char *instr_bin_str) { // Empty line return 0; } - // Tokenize the instruction + trim(instr); + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); - if (token == NULL) { - // Empty or invalid instruction + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces + while (isspace(*p)) p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + if (len > 0) { + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + // Skip commas + int comma_count = 0; + while (*p != '\0' && (isspace(*p) || *p == ',')) { + if (*p == ',') { + comma_count++; + } + p++; + } + if (comma_count > 1) { + printf("error: parse_instruction failed - too many commas\n"); + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 3; + } + } + + if (token_count == 0) { printf("error: parse_instruction failed\n"); return 3; } - to_uppercase(token); - if (strcmp(token, "ADD") == 0) { - return parse_add(instr, instr_bin_str); - } else if (strcmp(token, "MUL") == 0) { - return parse_mul(instr, instr_bin_str); - } else if (strcmp(token, "SUB") == 0) { - return parse_sub(instr, instr_bin_str); - } else if (strcmp(token, "DIV") == 0) { - return parse_div(instr, instr_bin_str); - } else if (strcmp(token, "AND") == 0) { - return parse_and(instr, instr_bin_str); - } else if (strcmp(token, "OR") == 0) { - return parse_or(instr, instr_bin_str); - } else if (strcmp(token, "XOR") == 0) { - return parse_xor(instr, instr_bin_str); + + // Convert opcode to uppercase + to_uppercase(tokens[0]); + + // Now tokens[0] is opcode, tokens[1..] are operands + if (strcmp(tokens[0], "ADD") == 0) { + int ret = parse_add(instr, instr_bin_str); + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else if (strcmp(tokens[0], "MUL") == 0) { + int ret = parse_mul(instr, instr_bin_str); + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; } - // Add other instructions here - else { + // Handle other opcodes similarly + else if (strcmp(tokens[0], "SUB") == 0) { + int ret = parse_sub(instr, instr_bin_str); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else if (strcmp(tokens[0], "DIV") == 0) { + int ret = parse_div(instr, instr_bin_str); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else if (strcmp(tokens[0], "AND") == 0) { + int ret = parse_and(instr, instr_bin_str); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else if (strcmp(tokens[0], "OR") == 0) { + int ret = parse_or(instr, instr_bin_str); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else if (strcmp(tokens[0], "XOR") == 0) { + int ret = parse_xor(instr, instr_bin_str); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return ret; + } else { printf("error: parse_instruction failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 3; } } +void trim(char *str) { + // Remove leading spaces + char *start = str; + while (isspace(*start)) start++; + memmove(str, start, strlen(start) + 1); + + // Remove trailing spaces + char *end = str + strlen(str) - 1; + while (end > str && isspace(*end)) end--; + *(end + 1) = '\0'; +} + int parse_reg(char reg_num, char *instr_bin_str) { if (reg_num < '0' || reg_num > '7') { printf("error: parse_reg failed\n"); @@ -107,8 +212,7 @@ int parse_reg(char reg_num, char *instr_bin_str) { void int_to_bin_str(int num, int bits, char *bin_str) { bin_str[bits] = '\0'; for (int i = bits - 1; i >= 0; i--) { - bin_str[i] = (num % 2) + '0'; - num /= 2; + bin_str[i] = ((num >> (bits - 1 - i)) & 1) + '0'; } } @@ -119,311 +223,786 @@ void to_uppercase(char *str) { } int parse_add(char *instr, char *instr_bin_str) { - // Instruction format: ADD Rd, Rs, Rt // Opcode: 0001 strcpy(instr_bin_str, "0001"); // Opcode - // Tokenize the instruction to get registers + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'ADD' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { - printf("error: parse_add() failed\n"); + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { + printf("error: parse_add() failed, token_count = %d\n", token_count); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; // Assuming 'R' is at token[0] + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "000"); // Sub-opcode for ADD - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { - printf("error: parse_add() failed\n"); - return 4; + + // Parse Rt or Immediate + if (tokens[3][0] == 'R') { + // Register + strcat(instr_bin_str, "000"); // Bit[5] = 0 + char rt_num = tokens[3][1]; + ret = parse_reg(rt_num, instr_bin_str); // Rt + if (ret != 0) { + printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + } else { + // Immediate + int imm_value; + ret = parse_imm5(tokens[3], &imm_value); + if (ret != 0) { + printf("error: parse_add() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + strcat(instr_bin_str, "1"); // Bit[5] = 1 + // Append imm5 bits + char imm_bits[6]; + int_to_bin_str(imm_value & 0x1F, 5, imm_bits); + strcat(instr_bin_str, imm_bits); } - char rt_num = token[1]; - ret = parse_reg(rt_num, instr_bin_str); // Rt - if (ret != 0) { - printf("error: parse_add() failed\n"); - return 4; + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); } + return 0; } - int parse_mul(char *instr, char *instr_bin_str) { - // Instruction format: MUL Rd, Rs, Rt + // Similar to parse_add, but sub-opcode is '001' // Opcode: 0001 strcpy(instr_bin_str, "0001"); // Opcode - // Tokenize the instruction to get registers + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'MUL' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "001"); // Sub-opcode for MUL - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { + + // Sub-opcode for MUL + strcat(instr_bin_str, "001"); + + // Parse Rt + if (tokens[3][0] != 'R') { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rt_num = token[1]; + char rt_num = tokens[3][1]; ret = parse_reg(rt_num, instr_bin_str); // Rt if (ret != 0) { printf("error: parse_mul() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 0; } +int parse_imm5(char *imm_str, int *imm_value) { + int value = 0; + if (imm_str[0] == '#') { + // Decimal immediate + value = atoi(&imm_str[1]); + } else if (imm_str[0] == 'x') { + // Hex immediate + sscanf(&imm_str[1], "%x", &value); + } else if (imm_str[0] == '0' && imm_str[1] == 'x') { + // Hex immediate + sscanf(&imm_str[2], "%x", &value); + } else { + printf("error: invalid immediate value\n"); + return 4; + } + // Check if value fits in signed 5-bit + if (value < -16 || value > 15) { + printf("error: immediate value out of range\n"); + return 4; + } + *imm_value = value; + return 0; +} +// Function to parse SUB instruction int parse_sub(char *instr, char *instr_bin_str) { - // Similar to parse_add, with sub-opcode '010' + // Opcode: 0001 strcpy(instr_bin_str, "0001"); // Opcode + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'SUB' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "010"); // Sub-opcode for SUB - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { + + // Sub-opcode for SUB + strcat(instr_bin_str, "010"); + + // Parse Rt + if (tokens[3][0] != 'R') { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rt_num = token[1]; + char rt_num = tokens[3][1]; ret = parse_reg(rt_num, instr_bin_str); // Rt if (ret != 0) { printf("error: parse_sub() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 0; } +// Function to parse DIV instruction int parse_div(char *instr, char *instr_bin_str) { - // Similar to parse_add, with sub-opcode '011' + // Opcode: 0001 strcpy(instr_bin_str, "0001"); // Opcode + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'DIV' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "011"); // Sub-opcode for DIV - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { + + // Sub-opcode for DIV + strcat(instr_bin_str, "011"); + + // Parse Rt + if (tokens[3][0] != 'R') { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rt_num = token[1]; + char rt_num = tokens[3][1]; ret = parse_reg(rt_num, instr_bin_str); // Rt if (ret != 0) { printf("error: parse_div() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 0; } +// Function to parse AND instruction int parse_and(char *instr, char *instr_bin_str) { - // Opcode: 0101, sub-opcode '000' + // Opcode: 0101 strcpy(instr_bin_str, "0101"); // Opcode + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'AND' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "000"); // Sub-opcode for AND - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { - printf("error: parse_and() failed\n"); - return 4; + + // Parse Rt or Immediate + if (tokens[3][0] == 'R') { + // Register + strcat(instr_bin_str, "000"); // Bit[5] = 0 + char rt_num = tokens[3][1]; + ret = parse_reg(rt_num, instr_bin_str); // Rt + if (ret != 0) { + printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + } else { + // Immediate + int imm_value; + ret = parse_imm5(tokens[3], &imm_value); + if (ret != 0) { + printf("error: parse_and() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + strcat(instr_bin_str, "1"); // Bit[5] = 1 + // Append imm5 bits + char imm_bits[6]; + int_to_bin_str(imm_value & 0x1F, 5, imm_bits); + strcat(instr_bin_str, imm_bits); } - char rt_num = token[1]; - ret = parse_reg(rt_num, instr_bin_str); // Rt - if (ret != 0) { - printf("error: parse_and() failed\n"); - return 4; + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); } + return 0; } +// Function to parse OR instruction int parse_or(char *instr, char *instr_bin_str) { - // Opcode: 0101, sub-opcode '010' + // Opcode: 0101 strcpy(instr_bin_str, "0101"); // Opcode + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'OR' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "010"); // Sub-opcode for OR - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { + + // Sub-opcode for OR + strcat(instr_bin_str, "010"); + + // Parse Rt + if (tokens[3][0] != 'R') { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rt_num = token[1]; + char rt_num = tokens[3][1]; ret = parse_reg(rt_num, instr_bin_str); // Rt if (ret != 0) { printf("error: parse_or() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 0; } +// Function to parse XOR instruction int parse_xor(char *instr, char *instr_bin_str) { - // Opcode: 0101, sub-opcode '011' + // Opcode: 0101 strcpy(instr_bin_str, "0101"); // Opcode + + // Tokenize the instruction line char instr_copy[COLS]; strcpy(instr_copy, instr); - char *token = strtok(instr_copy, " ,\t"); // Skip 'XOR' - token = strtok(NULL, " ,\t"); // Rd - if (token == NULL) { + char *tokens[10]; + int token_count = 0; + char *p = instr_copy; + while (*p != '\0') { + // Skip spaces and commas + while (isspace(*p) || *p == ',') p++; + if (*p == '\0') break; + + // Collect token + char *start = p; + while (*p != '\0' && !isspace(*p) && *p != ',') p++; + size_t len = p - start; + tokens[token_count] = (char *)malloc(len + 1); + strncpy(tokens[token_count], start, len); + tokens[token_count][len] = '\0'; + token_count++; + } + + if (token_count != 4) { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rd_num = token[1]; + + // Enforce uppercase for operands + to_uppercase(tokens[1]); + to_uppercase(tokens[2]); + to_uppercase(tokens[3]); + + // Parse Rd + if (tokens[1][0] != 'R') { + printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 4; + } + char rd_num = tokens[1][1]; int ret = parse_reg(rd_num, instr_bin_str); // Rd if (ret != 0) { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - token = strtok(NULL, " ,\t"); // Rs - if (token == NULL) { + + // Parse Rs + if (tokens[2][0] != 'R') { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rs_num = token[1]; + char rs_num = tokens[2][1]; ret = parse_reg(rs_num, instr_bin_str); // Rs if (ret != 0) { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - strcat(instr_bin_str, "011"); // Sub-opcode for XOR - token = strtok(NULL, " ,\t"); // Rt - if (token == NULL) { + + // Sub-opcode for XOR + strcat(instr_bin_str, "011"); + + // Parse Rt + if (tokens[3][0] != 'R') { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } - char rt_num = token[1]; + char rt_num = tokens[3][1]; ret = parse_reg(rt_num, instr_bin_str); // Rt if (ret != 0) { printf("error: parse_xor() failed\n"); + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } return 4; } + + // Free allocated memory + for (int i = 0; i < token_count; i++) { + free(tokens[i]); + } + return 0; } +void write_uint16_big_endian(FILE *file, uint16_t value) { + uint8_t bytes[2]; + bytes[0] = (value >> 8) & 0xFF; // High byte + bytes[1] = value & 0xFF; // Low byte + fwrite(bytes, sizeof(uint8_t), 2, file); +} + unsigned short int str_to_bin(char *instr_bin_str) { + if (strlen(instr_bin_str) != 16) { + printf("error: str_to_bin failed, invalid length: %s\n", instr_bin_str); + return 6; + } unsigned short int result = 0; for (int i = 0; i < 16; i++) { result <<= 1; @@ -449,9 +1028,16 @@ int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int ins header[0] = 0xCADE; header[1] = 0x0000; // Starting address, assume 0 for now header[2] = instr_count; - fwrite(header, sizeof(unsigned short int), 3, file); - // Write the instructions - fwrite(program_bin, sizeof(unsigned short int), instr_count, file); + + // Write header in big-endian order + write_uint16_big_endian(file, header[0]); + write_uint16_big_endian(file, header[1]); + write_uint16_big_endian(file, header[2]); + + // Write the instructions in big-endian order + for (int i = 0; i < instr_count; i++) { + write_uint16_big_endian(file, program_bin[i]); + } fclose(file); return 0; } \ No newline at end of file diff --git a/asm_parser.h b/asm_parser.h index 593caf3..9dc7397 100644 --- a/asm_parser.h +++ b/asm_parser.h @@ -13,6 +13,7 @@ #define ASM_PARSER_H #include +#include #define ROWS 100 #define COLS 255 @@ -27,10 +28,12 @@ int parse_div(char *instr, char *instr_bin_str); int parse_and(char *instr, char *instr_bin_str); int parse_or(char *instr, char *instr_bin_str); int parse_xor(char *instr, char *instr_bin_str); -/* add additional helper functions to support other instructions */ +int parse_imm5(char *imm_str, int *imm_value); /* add additional helper functions to support other instructions */ unsigned short int str_to_bin(char *instr_bin_str); int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count); void int_to_bin_str(int num, int bits, char *bin_str); void to_uppercase(char *str); +void trim(char *str); +void write_uint16_big_endian(FILE *file, uint16_t value); #endif diff --git a/assembler.c b/assembler.c index 9c5243c..95052a7 100644 --- a/assembler.c +++ b/assembler.c @@ -37,13 +37,18 @@ int main(int argc, char **argv) { unsigned short int bin = str_to_bin(instr_bin_str); if (bin == 6) { // Error code from str_to_bin printf("Error on line %d: %s\n", i + 1, line); - return i + 1; + return 6; + } + // if program_bin is full, return error + if (instr_count >= ROWS) { + printf("Error: Program too large\n"); + return 0; } program_bin[instr_count] = bin; instr_count++; } else { printf("Error on line %d: %s\n", i + 1, line); - return i + 1; + return ret; } // ret == 0 means successful parsing } diff --git a/test1.asm b/test1.asm index ae0fc24..ce5def0 100644 --- a/test1.asm +++ b/test1.asm @@ -1,7 +1 @@ -ADD R1, R0, R1 -MUL R2, R1, R1 -SUB R3, R2, R1 -DIV R1, R3, R2 -AND R1, R2, R3 -OR R1, R3, R2 -XOR R1, R3, R2 +ADD R1, R0 #5