diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..af22c9d --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +.idea/ +.vscode/ + +*.class + +.guides/secure/grading-config/*.cfg +.guides/secure/test-libs/* + +.guides/secure/test-cases/**/*.java +.guides/secure/user-libs/*.jar +.guides/secure/user-submissions/*.java +*.txt + +libs/*.jar +submit/*.java +tests/* +!tests/.gitkeep + +TEST-junit-*.xml + +*.o +assembler +*.obj diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8e37ca2 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +CC = gcc +CFLAGS = -Wall -Wextra -I. +SRC_ASSEMBLER = assembler.c +SRC_ASM_PARSER = asm_parser.c +OBJ_ASM_PARSER = asm_parser.o +TARGET = assembler +all: $(TARGET) + +assembler: $(OBJ_ASM_PARSER) $(SRC_ASSEMBLER) + $(CC) $(CFLAGS) -o $(TARGET) $(OBJ_ASM_PARSER) $(SRC_ASSEMBLER) + +asm_parser.o: $(SRC_ASM_PARSER) + $(CC) $(CFLAGS) -c $(SRC_ASM_PARSER) + +clean: + rm -f $(OBJ_ASM_PARSER) + +clobber: clean + rm -f $(TARGET) + rm -f *~ + rm -f *.bak + rm -f *.tmp + rm -f *.o + rm -f core + +# Phony targets +.PHONY: all clean clobber \ No newline at end of file diff --git a/asm_parser.c b/asm_parser.c index 8c7d174..e7de8ec 100644 --- a/asm_parser.c +++ b/asm_parser.c @@ -14,4 +14,337 @@ #include #include "asm_parser.h" +int read_asm_file(char *filename, char program[ROWS][COLS]) { + FILE *file = fopen(filename, "r"); + if (!file) { + printf("Error opening file %s\n", filename); + return -1; + } + int line_num = 0; + while (fgets(program[line_num], COLS, file) != NULL && line_num < ROWS) { + // Remove trailing newline + size_t len = strlen(program[line_num]); + if (len > 0 && program[line_num][len - 1] == '\n') { + program[line_num][len - 1] = '\0'; + len--; + } + // Remove comments starting with ';' or '#' + char *comment = strchr(program[line_num], ';'); + if (comment != NULL) { + *comment = '\0'; + len = comment - program[line_num]; + } + comment = strchr(program[line_num], '#'); + if (comment != NULL) { + *comment = '\0'; + len = comment - program[line_num]; + } + // Trim trailing whitespace + while (len > 0 && isspace(program[line_num][len - 1])) { + program[line_num][--len] = '\0'; + } + // Skip empty lines + if (len == 0) { + continue; + } + line_num++; + } + fclose(file); + return line_num; +} + +int parse_instruction(char *instr, char *instr_bin_str) { + // Remove leading whitespace + while (isspace(*instr)) instr++; + if (*instr == '\0') { + // Empty line + return 0; + } + // Tokenize the instruction + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); + if (token == NULL) { + // Empty or invalid instruction + return -1; + } + to_uppercase(token); + if (strcmp(token, "ADD") == 0) { + return parse_add(instr, instr_bin_str); + } else if (strcmp(token, "MUL") == 0) { + return parse_mul(instr, instr_bin_str); + } else if (strcmp(token, "SUB") == 0) { + return parse_sub(instr, instr_bin_str); + } else if (strcmp(token, "DIV") == 0) { + return parse_div(instr, instr_bin_str); + } else if (strcmp(token, "AND") == 0) { + return parse_and(instr, instr_bin_str); + } else if (strcmp(token, "OR") == 0) { + return parse_or(instr, instr_bin_str); + } else if (strcmp(token, "XOR") == 0) { + return parse_xor(instr, instr_bin_str); + } + // Add other instructions here + else { + printf("Unknown instruction: %s\n", token); + return -1; + } +} + +int parse_reg(char *reg_str) { + if (reg_str[0] != 'R' && reg_str[0] != 'r') { + printf("Invalid register: %s\n", reg_str); + return -1; + } + int reg_num = atoi(®_str[1]); + if (reg_num < 0 || reg_num > 7) { + printf("Invalid register number: %d\n", reg_num); + return -1; + } + return reg_num; +} + +void int_to_bin_str(int num, int bits, char *bin_str) { + bin_str[bits] = '\0'; + for (int i = bits - 1; i >= 0; i--) { + bin_str[i] = (num % 2) + '0'; + num /= 2; + } +} + +void to_uppercase(char *str) { + for (; *str; ++str) { + *str = toupper(*str); + } +} + +int parse_add(char *instr, char *instr_bin_str) { + // Instruction format: ADD Rd, Rs, Rt + // Opcode: 0001 + char opcode[] = "0001"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'ADD' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_mul(char *instr, char *instr_bin_str) { + // Instruction format: MUL Rd, Rs, Rt + // Opcode: 0001 + char opcode[] = "0001"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'MUL' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s001%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_sub(char *instr, char *instr_bin_str) { + // Instruction format: SUB Rd, Rs, Rt + // Opcode: 0001 + char opcode[] = "0001"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'SUB' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_div(char *instr, char *instr_bin_str) { + // Instruction format: DIV Rd, Rs, Rt + // Opcode: 0001 + char opcode[] = "0001"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'DIV' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_and(char *instr, char *instr_bin_str) { + // Instruction format: AND Rd, Rs, Rt + // Opcode: 0101 + char opcode[] = "0101"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'AND' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_or(char *instr, char *instr_bin_str) { + // Instruction format: OR Rd, Rs, Rt + // Opcode: 0101 + char opcode[] = "0101"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'OR' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt); + return 1; +} + +int parse_xor(char *instr, char *instr_bin_str) { + // Instruction format: XOR Rd, Rs, Rt + // Opcode: 0101 + char opcode[] = "0101"; + char ddd[4], sss[4], ttt[4]; + // Tokenize the instruction to get registers + char instr_copy[COLS]; + strcpy(instr_copy, instr); + char *token = strtok(instr_copy, " ,\t"); // Skip 'XOR' + token = strtok(NULL, " ,\t"); // Rd + if (token == NULL) return -1; + int rd = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rs + if (token == NULL) return -1; + int rs = parse_reg(token); + token = strtok(NULL, " ,\t"); // Rt + if (token == NULL) return -1; + int rt = parse_reg(token); + if (rd < 0 || rs < 0 || rt < 0) return -1; + // Convert rd, rs, rt to 3-bit binary strings + int_to_bin_str(rd, 3, ddd); + int_to_bin_str(rs, 3, sss); + int_to_bin_str(rt, 3, ttt); + // Build the binary instruction string + sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt); + return 1; +} + +unsigned short int str_to_bin(char *instr_bin_str) { + unsigned short int result = 0; + for (int i = 0; i < 16; i++) { + result <<= 1; + if (instr_bin_str[i] == '1') { + result |= 1; + } else if (instr_bin_str[i] != '0') { + // Invalid character + printf("Invalid binary string: %s\n", instr_bin_str); + return 0; + } + } + return result; +} + +int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count) { + FILE *file = fopen(filename, "wb"); + if (!file) { + printf("Error opening file %s for writing\n", filename); + return -1; + } + // Write the code header: xCADE, address (start at 0), n (instr_count) + unsigned short int header[3]; + header[0] = 0xCADE; + header[1] = 0x0000; // Starting address, assume 0 for now + header[2] = instr_count; + fwrite(header, sizeof(unsigned short int), 3, file); + // Write the instructions + fwrite(program_bin, sizeof(unsigned short int), instr_count, file); + fclose(file); + return 0; +} + /* to do - implement all the functions in asm_parser.h */ \ No newline at end of file diff --git a/asm_parser.h b/asm_parser.h index e1eba35..800da29 100644 --- a/asm_parser.h +++ b/asm_parser.h @@ -9,14 +9,28 @@ * */ +#ifndef ASM_PARSER_H +#define ASM_PARSER_H + +#include + #define ROWS 100 #define COLS 255 -int read_asm_file (char* filename, char program [ROWS][COLS] ) ; -int parse_instruction (char* instr, char* instr_bin_str) ; -int parse_reg (char reg_num, char* instr_bin_str) ; -int parse_add (char* instr, char* instr_bin_str ) ; -int parse_mul (char* instr, char* instr_bin_str ) ; +int read_asm_file(char *filename, char program[ROWS][COLS]); +int parse_instruction(char *instr, char *instr_bin_str); +int parse_reg(char *reg_str); +int parse_add(char *instr, char *instr_bin_str); +int parse_mul(char *instr, char *instr_bin_str); +int parse_sub(char *instr, char *instr_bin_str); +int parse_div(char *instr, char *instr_bin_str); +int parse_and(char *instr, char *instr_bin_str); +int parse_or(char *instr, char *instr_bin_str); +int parse_xor(char *instr, char *instr_bin_str); /* add additional helper functions to support other instructions */ -unsigned short int str_to_bin (char* instr_bin_str) ; -int write_obj_file (char* filename, unsigned short int program_bin[ROWS] ) ; +/* add additional helper functions to support other instructions */ +unsigned short int str_to_bin(char *instr_bin_str); +int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count); +void int_to_bin_str(int num, int bits, char *bin_str); +void to_uppercase(char *str); +#endif diff --git a/assembler.c b/assembler.c index efe906f..6da4af8 100644 --- a/assembler.c +++ b/assembler.c @@ -14,11 +14,50 @@ #include #include "asm_parser.h" -int main(int argc, char** argv) { - - char* filename = NULL ; // name of ASM file - char program [ROWS][COLS] ; // ASM file line-by-line - char program_bin_str [ROWS][17] ; // instructions converted to a binary string - unsigned short int program_bin [ROWS] ; // instructions in binary (HEX) - +int main(int argc, char **argv) { + if (argc < 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + char *filename = argv[1]; // name of ASM file + char program[ROWS][COLS]; // ASM file line-by-line + char program_bin_str[ROWS][17]; // instructions converted to a binary string + unsigned short int program_bin[ROWS]; // instructions in binary (HEX) + int num_lines = read_asm_file(filename, program); + if (num_lines < 0) { + return 1; + } + int instr_count = 0; + for (int i = 0; i < num_lines; i++) { + char *line = program[i]; + char instr_bin_str[17]; + int ret = parse_instruction(line, instr_bin_str); + if (ret == 1) { + strcpy(program_bin_str[instr_count], instr_bin_str); + program_bin[instr_count] = str_to_bin(instr_bin_str); + instr_count++; + } else if (ret == 0) { + // Skip empty or comment line + continue; + } else { + printf("Error parsing line %d: %s\n", i + 1, line); + return 1; + } + } + // Write the object file + char obj_filename[256]; + strcpy(obj_filename, filename); + char *dot = strrchr(obj_filename, '.'); + if (dot != NULL) { + strcpy(dot, ".obj"); + } else { + strcat(obj_filename, ".obj"); + } + int ret = write_obj_file(obj_filename, program_bin, instr_count); + if (ret < 0) { + printf("Error writing object file\n"); + return 1; + } + printf("Successfully assembled %s to %s\n", filename, obj_filename); + return 0; }