From 887875959aa84af92291db334898aaa20956e632 Mon Sep 17 00:00:00 2001 From: allexanderbergmans Date: Fri, 3 Jul 2026 12:17:10 +0200 Subject: init --- gen/docs/parser.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 gen/docs/parser.c (limited to 'gen/docs/parser.c') diff --git a/gen/docs/parser.c b/gen/docs/parser.c new file mode 100644 index 0000000..2852070 --- /dev/null +++ b/gen/docs/parser.c @@ -0,0 +1,256 @@ +#include "parser.h" +#include +#include +#include +#include +#include + +static void str_trim(char *s) { + char *e; + while (isspace((unsigned char)*s)) s++; + if (*s == 0) return; + e = s + strlen(s) - 1; + while (e > s && isspace((unsigned char)*e)) e--; + *(e + 1) = '\0'; +} + +static int str_split(const char *line, char **key, char **val) { + static char buf[MAX_LINE]; + strncpy(buf, line, sizeof(buf) - 1); + buf[sizeof(buf) - 1] = '\0'; + char *eq = strchr(buf, ' '); + if (!eq) { *key = buf; *val = ""; return 0; } + *eq = '\0'; + *key = buf; + *val = eq + 1; + str_trim(*key); + str_trim(*val); + return 1; +} + +static int str_to_int(const char *s) { + if (strncmp(s, "0x", 2) == 0 || strncmp(s, "0X", 2) == 0) + return (int)strtol(s, NULL, 16); + return atoi(s); +} + +static int str_to_bool(const char *s) { + return strcmp(s, "true") == 0 || strcmp(s, "yes") == 0 || strcmp(s, "1") == 0; +} + +static int parse_block(FILE *f, const char *block_type, const char *block_name, IsaDb *db) { + char line[MAX_LINE]; + + if (strcasecmp(block_type, "FORMAT") == 0) { + if (db->num_formats >= MAX_FMTS) { + fprintf(stderr, "Too many formats (max %d)\n", MAX_FMTS); + return -1; + } + IsaFormat *fmt = &db->formats[db->num_formats]; + strncpy(fmt->name, block_name, MAX_NAME - 1); + fmt->width = 32; + fmt->num_fields = 0; + + while (fgets(line, sizeof(line), f)) { + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n') continue; + if (strncmp(trimmed, "END", 3) == 0) break; + + char *key, *val; + str_split(trimmed, &key, &val); + + if (strcasecmp(key, "WIDTH") == 0) { + fmt->width = str_to_int(val); + } else if (strcasecmp(key, "FIELD") == 0) { + if (fmt->num_fields >= MAX_FIELDS) { + fprintf(stderr, "Too many fields in format %s\n", fmt->name); + continue; + } + char fname[64]; + int high, low; + if (sscanf(val, "%63s %d:%d", fname, &high, &low) >= 3) { + strncpy(fmt->fields[fmt->num_fields].name, fname, MAX_LABEL - 1); + fmt->fields[fmt->num_fields].high = high; + fmt->fields[fmt->num_fields].low = low; + fmt->num_fields++; + } + } + } + db->num_formats++; + } else if (strcasecmp(block_type, "REGISTER") == 0) { + if (db->num_registers >= MAX_REGS) { + fprintf(stderr, "Too many registers (max %d)\n", MAX_REGS); + return -1; + } + IsaRegister *reg = &db->registers[db->num_registers]; + memset(reg, 0, sizeof(*reg)); + strncpy(reg->name, block_name, MAX_NAME - 1); + reg->preserve = -1; + + while (fgets(line, sizeof(line), f)) { + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n') continue; + if (strncmp(trimmed, "END", 3) == 0) break; + + char *key, *val; + str_split(trimmed, &key, &val); + + if (strcasecmp(key, "ABBR") == 0) + strncpy(reg->abbr, val, MAX_LABEL - 1); + else if (strcasecmp(key, "DESC") == 0) + strncpy(reg->desc, val, MAX_DESC - 1); + else if (strcasecmp(key, "PRESERVE") == 0) + reg->preserve = str_to_bool(val); + else if (strcasecmp(key, "CALLER") == 0) + reg->caller_saved = str_to_bool(val); + else if (strcasecmp(key, "ARG") == 0) + reg->arg_reg = str_to_bool(val); + else if (strcasecmp(key, "INDEX") == 0) + reg->index = str_to_int(val); + } + if (reg->preserve == -1) reg->preserve = 0; + db->num_registers++; + } else if (strcasecmp(block_type, "INSTRUCTION") == 0) { + if (db->num_instructions >= MAX_INSTS) { + fprintf(stderr, "Too many instructions (max %d)\n", MAX_INSTS); + return -1; + } + IsaInstruction *inst = &db->instructions[db->num_instructions]; + memset(inst, 0, sizeof(*inst)); + strncpy(inst->name, block_name, MAX_NAME - 1); + inst->funct3_valid = 0; + inst->funct7_valid = 0; + + while (fgets(line, sizeof(line), f)) { + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n') continue; + if (strncmp(trimmed, "END", 3) == 0) break; + + char *key, *val; + str_split(trimmed, &key, &val); + + if (strcasecmp(key, "FORMAT") == 0) + strncpy(inst->fmt_name, val, MAX_NAME - 1); + else if (strcasecmp(key, "OPCODE") == 0) + inst->opcode = (uint32_t)str_to_int(val); + else if (strcasecmp(key, "FUNCT3") == 0) { + inst->funct3 = (uint32_t)str_to_int(val); + inst->funct3_valid = 1; + } else if (strcasecmp(key, "FUNCT7") == 0) { + inst->funct7 = (uint32_t)str_to_int(val); + inst->funct7_valid = 1; + } else if (strcasecmp(key, "OPERANDS") == 0) + strncpy(inst->operands, val, MAX_OPERANDS - 1); + else if (strcasecmp(key, "DESC") == 0) + strncpy(inst->desc, val, MAX_DESC - 1); + else if (strcasecmp(key, "NOTE") == 0) + strncpy(inst->note, val, MAX_NOTE - 1); + else if (strcasecmp(key, "CATEGORY") == 0) + strncpy(inst->category, val, MAX_LABEL - 1); + else if (strcasecmp(key, "IMM") == 0) + inst->has_imm = str_to_bool(val); + } + db->num_instructions++; + } else if (strcasecmp(block_type, "CSR") == 0) { + if (db->num_csrs >= MAX_CSRS) { + fprintf(stderr, "Too many CSRs (max %d)\n", MAX_CSRS); + return -1; + } + IsaCsr *csr = &db->csrs[db->num_csrs]; + memset(csr, 0, sizeof(*csr)); + strncpy(csr->name, block_name, MAX_NAME - 1); + + while (fgets(line, sizeof(line), f)) { + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n') continue; + if (strncmp(trimmed, "END", 3) == 0) break; + + char *key, *val; + str_split(trimmed, &key, &val); + + if (strcasecmp(key, "NUMBER") == 0) + csr->number = str_to_int(val); + else if (strcasecmp(key, "DESC") == 0) + strncpy(csr->desc, val, MAX_DESC - 1); + } + db->num_csrs++; + } else if (strcasecmp(block_type, "ARCH") == 0) { + while (fgets(line, sizeof(line), f)) { + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n') continue; + if (strncmp(trimmed, "END", 3) == 0) break; + + char *key, *val; + str_split(trimmed, &key, &val); + + if (strcasecmp(key, "NAME") == 0) + strncpy(db->arch_name, val, MAX_NAME - 1); + else if (strcasecmp(key, "VERSION") == 0) + strncpy(db->arch_version, val, MAX_LABEL - 1); + else if (strcasecmp(key, "DATE") == 0) + strncpy(db->arch_date, val, MAX_LABEL - 1); + else if (strcasecmp(key, "STATUS") == 0) + strncpy(db->arch_status, val, MAX_LABEL - 1); + } + } + + return 0; +} + +int isa_parse_file(const char *path, IsaDb *db) { + FILE *f = fopen(path, "r"); + if (!f) { + fprintf(stderr, "Error: cannot open %s\n", path); + return -1; + } + + char line[MAX_LINE]; + int line_num = 0; + + while (fgets(line, sizeof(line), f)) { + line_num++; + char *trimmed = line; + while (isspace((unsigned char)*trimmed)) trimmed++; + if (*trimmed == '#' || *trimmed == '\n' || *trimmed == '\r') continue; + + char type[MAX_LABEL], name[MAX_NAME]; + if (sscanf(trimmed, "%31s %63s", type, name) >= 2) { + if (parse_block(f, type, name, db) != 0) { + fprintf(stderr, "Error parsing %s in %s line %d\n", type, path, line_num); + fclose(f); + return -1; + } + } + } + + fclose(f); + return 0; +} + +int isa_parse_dir(const char *dir, IsaDb *db) { + DIR *d = opendir(dir); + if (!d) { + fprintf(stderr, "Error: cannot open directory %s\n", dir); + return -1; + } + + struct dirent *entry; + while ((entry = readdir(d)) != NULL) { + if (entry->d_type != DT_REG) continue; + const char *ext = strrchr(entry->d_name, '.'); + if (!ext || strcasecmp(ext, ".isa") != 0) continue; + + char path[MAX_PATH]; + snprintf(path, sizeof(path), "%s/%s", dir, entry->d_name); + printf(" Parsing: %s\n", path); + isa_parse_file(path, db); + } + + closedir(d); + return 0; +} -- cgit v1.3