first complete version

This commit is contained in:
feng-arch 2024-10-30 16:31:10 +08:00
parent 48fc633f43
commit 1ca6f8e40c
5 changed files with 450 additions and 14 deletions

23
.gitignore vendored Normal file
View File

@ -0,0 +1,23 @@
.idea/
.vscode/
*.class
.guides/secure/grading-config/*.cfg
.guides/secure/test-libs/*
.guides/secure/test-cases/**/*.java
.guides/secure/user-libs/*.jar
.guides/secure/user-submissions/*.java
*.txt
libs/*.jar
submit/*.java
tests/*
!tests/.gitkeep
TEST-junit-*.xml
*.o
assembler
*.obj

27
Makefile Normal file
View File

@ -0,0 +1,27 @@
CC = gcc
CFLAGS = -Wall -Wextra -I.
SRC_ASSEMBLER = assembler.c
SRC_ASM_PARSER = asm_parser.c
OBJ_ASM_PARSER = asm_parser.o
TARGET = assembler
all: $(TARGET)
assembler: $(OBJ_ASM_PARSER) $(SRC_ASSEMBLER)
$(CC) $(CFLAGS) -o $(TARGET) $(OBJ_ASM_PARSER) $(SRC_ASSEMBLER)
asm_parser.o: $(SRC_ASM_PARSER)
$(CC) $(CFLAGS) -c $(SRC_ASM_PARSER)
clean:
rm -f $(OBJ_ASM_PARSER)
clobber: clean
rm -f $(TARGET)
rm -f *~
rm -f *.bak
rm -f *.tmp
rm -f *.o
rm -f core
# Phony targets
.PHONY: all clean clobber

View File

@ -14,4 +14,337 @@
#include <stdlib.h>
#include "asm_parser.h"
int read_asm_file(char *filename, char program[ROWS][COLS]) {
FILE *file = fopen(filename, "r");
if (!file) {
printf("Error opening file %s\n", filename);
return -1;
}
int line_num = 0;
while (fgets(program[line_num], COLS, file) != NULL && line_num < ROWS) {
// Remove trailing newline
size_t len = strlen(program[line_num]);
if (len > 0 && program[line_num][len - 1] == '\n') {
program[line_num][len - 1] = '\0';
len--;
}
// Remove comments starting with ';' or '#'
char *comment = strchr(program[line_num], ';');
if (comment != NULL) {
*comment = '\0';
len = comment - program[line_num];
}
comment = strchr(program[line_num], '#');
if (comment != NULL) {
*comment = '\0';
len = comment - program[line_num];
}
// Trim trailing whitespace
while (len > 0 && isspace(program[line_num][len - 1])) {
program[line_num][--len] = '\0';
}
// Skip empty lines
if (len == 0) {
continue;
}
line_num++;
}
fclose(file);
return line_num;
}
int parse_instruction(char *instr, char *instr_bin_str) {
// Remove leading whitespace
while (isspace(*instr)) instr++;
if (*instr == '\0') {
// Empty line
return 0;
}
// Tokenize the instruction
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t");
if (token == NULL) {
// Empty or invalid instruction
return -1;
}
to_uppercase(token);
if (strcmp(token, "ADD") == 0) {
return parse_add(instr, instr_bin_str);
} else if (strcmp(token, "MUL") == 0) {
return parse_mul(instr, instr_bin_str);
} else if (strcmp(token, "SUB") == 0) {
return parse_sub(instr, instr_bin_str);
} else if (strcmp(token, "DIV") == 0) {
return parse_div(instr, instr_bin_str);
} else if (strcmp(token, "AND") == 0) {
return parse_and(instr, instr_bin_str);
} else if (strcmp(token, "OR") == 0) {
return parse_or(instr, instr_bin_str);
} else if (strcmp(token, "XOR") == 0) {
return parse_xor(instr, instr_bin_str);
}
// Add other instructions here
else {
printf("Unknown instruction: %s\n", token);
return -1;
}
}
int parse_reg(char *reg_str) {
if (reg_str[0] != 'R' && reg_str[0] != 'r') {
printf("Invalid register: %s\n", reg_str);
return -1;
}
int reg_num = atoi(&reg_str[1]);
if (reg_num < 0 || reg_num > 7) {
printf("Invalid register number: %d\n", reg_num);
return -1;
}
return reg_num;
}
void int_to_bin_str(int num, int bits, char *bin_str) {
bin_str[bits] = '\0';
for (int i = bits - 1; i >= 0; i--) {
bin_str[i] = (num % 2) + '0';
num /= 2;
}
}
void to_uppercase(char *str) {
for (; *str; ++str) {
*str = toupper(*str);
}
}
int parse_add(char *instr, char *instr_bin_str) {
// Instruction format: ADD Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'ADD'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_mul(char *instr, char *instr_bin_str) {
// Instruction format: MUL Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'MUL'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s001%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_sub(char *instr, char *instr_bin_str) {
// Instruction format: SUB Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'SUB'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_div(char *instr, char *instr_bin_str) {
// Instruction format: DIV Rd, Rs, Rt
// Opcode: 0001
char opcode[] = "0001";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'DIV'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_and(char *instr, char *instr_bin_str) {
// Instruction format: AND Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'AND'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s000%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_or(char *instr, char *instr_bin_str) {
// Instruction format: OR Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'OR'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s010%s", opcode, ddd, sss, ttt);
return 1;
}
int parse_xor(char *instr, char *instr_bin_str) {
// Instruction format: XOR Rd, Rs, Rt
// Opcode: 0101
char opcode[] = "0101";
char ddd[4], sss[4], ttt[4];
// Tokenize the instruction to get registers
char instr_copy[COLS];
strcpy(instr_copy, instr);
char *token = strtok(instr_copy, " ,\t"); // Skip 'XOR'
token = strtok(NULL, " ,\t"); // Rd
if (token == NULL) return -1;
int rd = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rs
if (token == NULL) return -1;
int rs = parse_reg(token);
token = strtok(NULL, " ,\t"); // Rt
if (token == NULL) return -1;
int rt = parse_reg(token);
if (rd < 0 || rs < 0 || rt < 0) return -1;
// Convert rd, rs, rt to 3-bit binary strings
int_to_bin_str(rd, 3, ddd);
int_to_bin_str(rs, 3, sss);
int_to_bin_str(rt, 3, ttt);
// Build the binary instruction string
sprintf(instr_bin_str, "%s%s%s011%s", opcode, ddd, sss, ttt);
return 1;
}
unsigned short int str_to_bin(char *instr_bin_str) {
unsigned short int result = 0;
for (int i = 0; i < 16; i++) {
result <<= 1;
if (instr_bin_str[i] == '1') {
result |= 1;
} else if (instr_bin_str[i] != '0') {
// Invalid character
printf("Invalid binary string: %s\n", instr_bin_str);
return 0;
}
}
return result;
}
int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count) {
FILE *file = fopen(filename, "wb");
if (!file) {
printf("Error opening file %s for writing\n", filename);
return -1;
}
// Write the code header: xCADE, address (start at 0), n (instr_count)
unsigned short int header[3];
header[0] = 0xCADE;
header[1] = 0x0000; // Starting address, assume 0 for now
header[2] = instr_count;
fwrite(header, sizeof(unsigned short int), 3, file);
// Write the instructions
fwrite(program_bin, sizeof(unsigned short int), instr_count, file);
fclose(file);
return 0;
}
/* to do - implement all the functions in asm_parser.h */

View File

@ -9,14 +9,28 @@
*
*/
#ifndef ASM_PARSER_H
#define ASM_PARSER_H
#include <ctype.h>
#define ROWS 100
#define COLS 255
int read_asm_file (char* filename, char program [ROWS][COLS] ) ;
int parse_instruction (char* instr, char* instr_bin_str) ;
int parse_reg (char reg_num, char* instr_bin_str) ;
int parse_add (char* instr, char* instr_bin_str ) ;
int parse_mul (char* instr, char* instr_bin_str ) ;
int read_asm_file(char *filename, char program[ROWS][COLS]);
int parse_instruction(char *instr, char *instr_bin_str);
int parse_reg(char *reg_str);
int parse_add(char *instr, char *instr_bin_str);
int parse_mul(char *instr, char *instr_bin_str);
int parse_sub(char *instr, char *instr_bin_str);
int parse_div(char *instr, char *instr_bin_str);
int parse_and(char *instr, char *instr_bin_str);
int parse_or(char *instr, char *instr_bin_str);
int parse_xor(char *instr, char *instr_bin_str);
/* add additional helper functions to support other instructions */
unsigned short int str_to_bin (char* instr_bin_str) ;
int write_obj_file (char* filename, unsigned short int program_bin[ROWS] ) ;
/* add additional helper functions to support other instructions */
unsigned short int str_to_bin(char *instr_bin_str);
int write_obj_file(char *filename, unsigned short int program_bin[ROWS], int instr_count);
void int_to_bin_str(int num, int bits, char *bin_str);
void to_uppercase(char *str);
#endif

View File

@ -14,11 +14,50 @@
#include <stdlib.h>
#include "asm_parser.h"
int main(int argc, char** argv) {
char* filename = NULL ; // name of ASM file
char program [ROWS][COLS] ; // ASM file line-by-line
char program_bin_str [ROWS][17] ; // instructions converted to a binary string
unsigned short int program_bin [ROWS] ; // instructions in binary (HEX)
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: %s <assembly_file.asm>\n", argv[0]);
return 1;
}
char *filename = argv[1]; // name of ASM file
char program[ROWS][COLS]; // ASM file line-by-line
char program_bin_str[ROWS][17]; // instructions converted to a binary string
unsigned short int program_bin[ROWS]; // instructions in binary (HEX)
int num_lines = read_asm_file(filename, program);
if (num_lines < 0) {
return 1;
}
int instr_count = 0;
for (int i = 0; i < num_lines; i++) {
char *line = program[i];
char instr_bin_str[17];
int ret = parse_instruction(line, instr_bin_str);
if (ret == 1) {
strcpy(program_bin_str[instr_count], instr_bin_str);
program_bin[instr_count] = str_to_bin(instr_bin_str);
instr_count++;
} else if (ret == 0) {
// Skip empty or comment line
continue;
} else {
printf("Error parsing line %d: %s\n", i + 1, line);
return 1;
}
}
// Write the object file
char obj_filename[256];
strcpy(obj_filename, filename);
char *dot = strrchr(obj_filename, '.');
if (dot != NULL) {
strcpy(dot, ".obj");
} else {
strcat(obj_filename, ".obj");
}
int ret = write_obj_file(obj_filename, program_bin, instr_count);
if (ret < 0) {
printf("Error writing object file\n");
return 1;
}
printf("Successfully assembled %s to %s\n", filename, obj_filename);
return 0;
}