这是我在C中实现的brainfuck编译器。构建/运行项目的指令(GitHub repo)可以找到这里。
我首先是converting,x86程序集的brainfuck源代码,然后使用ld和as生成可执行文件(目前只为linux生成32位可执行文件)。我本来打算没有依赖关系,但是ELF格式一开始太吓人了,所以我现在使用的是as ( GNU汇编程序)和ld ( GNU链接器)。
脑操数组的大小目前设置为30,000,并在堆上分配,每个单元格分配一个字节。阴性细胞指数现在应该是UB。
我在编写编译器方面有过的zero经验,而且我非常肯定这不是编译器的方法。我甚至还没有实现过词条/解析器。而且,我认为在开关的情况下实现它会更容易,因为语言是如此简单。
话虽如此,我认为我在这个编译器方面做得很好。编译后的程序运行x2的速度和这一样快,当我使用它生成mandelbrot时。
有一些关于头文件中的函数的信息。希望能提高可读性和理解性。
如有任何改进/建议,我将不胜感激。
我目前不关心优化编译器。我有几个主意。但是,如果我遗漏了一些非常明显的(很容易实现),我希望您能提到它们。
#pragma once
/*
Some general techniques used.
1) edi always contains the address in memory of the current cell
2) edx is always 1 (for read and write syscalls).
*/
/** The assembly instructions for allocating 30,000 bytes of memory.
Uses the brk() system call without any library wrappers.
NOTE: brk() zeros out memory during first call. No need to initialize to 0.*/
const char ALLOCATE_MEMORY[] = ".intel_syntax noprefix\n"
".global _start\n"
"_start:\n"
"mov eax,0x2d\n"
"xor ebx,ebx\n"
"int 0x80\n"
"add eax,30000\n"
"mov ebx,eax\n"
"mov eax,0x2d\n"
"int 0x80\n"
"mov edi,eax\n"
"sub edi,30000\n"
//Sets edx=1 for future read and write calls. This is never modified.
"xor edx,edx\n"
"inc edx\n";
/** The assembly instructions for increasing the current cell pointer. */
const char INCREMENT_POINTER[] = "add edi,0x%02x\n";
/** The assembly instructions for decreasing the current cell pointer. */
const char DECREMENT_POINTER[] = "sub edi,0x%02x\n";
/** The assembly instruction to increase the value at current cell */
const char INCREMENT[] = "add byte ptr [edi],0x%02x\n";
/** The assembly instruction to decrease the value at current cell */
const char DECREMENT[] = "sub byte ptr [edi],0x%02x\n";
/** The assembly instructions for starting a loop in bf. LABEL references are not present and are calculated at compile time. */
const char LOOP_START[] =
//"_LABEL_XXXX:\n"
"cmp byte ptr [edi],0x00\n"
//"je _LABEL_XXXX\n";
"je ";
/** The assembly instructions for ending a loop in bf. LABEL references are not present and are calculated at compile time. */
const char LOOP_END[] =
//"_LABELXXXX:\n"
"cmp byte ptr [edi],0x00\n"
//"jne _LABEL_XXXX\n";
"jne ";
/** The assembly instructions for the write syscall. Executes the raw syscall. The value is printed to stdout. */
const char WRITE_CHAR[] = "mov eax,0x04\n"
"xor ebx,ebx\n"
"inc ebx\n"
"mov ecx,edi\n"
"int 0x80\n";
/** The assembly instructions for the read syscall. Executes the raw syscall. The value is stored in the current cell. */
const char READ_CHAR[] = "mov eax,0x03\n"
"xor ebx,ebx\n"
"mov ecx,edi\n"
"int 0x80\n";
/** The assembly instructions for the exit syscall. Executes the raw syscall.*/
const char SYS_EXIT_CALL[] = "mov eax,0x01\n"
"xor ebx,ebx\n"
"int 0x80\n";#pragma once
#include
/** The structure which represents the parsed global args.*/
struct global_args_t
{
/** The path to the source brainfuck code.*/
char *input_file;
/** The path to the output executable file.*/
char *output_file;
} global_args; ///< The variable which is populated by the parse_args() function.
/**
* @brief Print the help text
*/
void print_help();
/**
* @brief Parse the args provided from the command line
*
* The global_args struct is populated from this function.
*
* @param argc Number of arguments.
* @param argv The list of arguments.
*/
void parse_args(int argc, char **argv);
/**
* @brief Verifies if the brainfuck source file is valid
*
* The file seek is set to 0 after the verfication of the source is complete.
*
* @param input_fp The brainfuck input source file.
*/
void verify_syntax(FILE *input_fp);
/**
* @brief Creates the .s assembly source file that contains the list of assembly instructions for the brainfuck program.
*
* Calls assemble() after checking if the output file could be created.
*
* @param input_fp The brainfuck input source file.
* @param output_file The .s assembly file.
*/
void create_assembly_source(FILE *input_fp, const char *output_file);
/**
* @brief Creates the .s assembly source file that contains the list of assembly instructions for the brainfuck program.
*
* Writes the assembly instructions by parsing the source and refering to the bf_asm.h constants.
*
* @param input_fp The brainfuck input source file.
* @param output_fp The .s assembly file.
*/
void assemble(FILE *input_fp, FILE *output_fp);
/**
* @brief Creates the .o elf file from the .s assembly file.
*
* Calls the GNU assembler 'as'.
*
* @param input_file The file path to the .s assembly file.
*/
void execute_assembler(char *input_file);
/**
* @brief Creates the executable file from the .o elf file.
*
* Calls the GNU linker 'ld'.
*
* @param input_file The file path to the .o elf file.
*/
void execute_linker(const char *input_file);
/**
* @brief Creates the executable file from the .o elf file.
*
* Compiles the source.
* This function calls create_assembly_source(),execute_assembler() and execute_linker() with proper arguments.
*
* @param input_fp The brainfuck source file.
*/
void compile(FILE *input_fp);
/**
* @brief The main start point of the compiler.
*
* @param argc The number of arguments.
* @param argv Pointer to the argument list.
*/
int main(int argc, char **argv);#pragma once
#include
#include
/** A Node on the stack */
struct StackNode
{
/** The address of loop start */
unsigned int jmp_address;
/** The address of loop end */
unsigned int second_jmp_address;
/** The address of the next node in stack */
struct StackNode *next;
};
/**
* @brief Creates a new node
*
* Creates a new node in the stack.
* Memory is allocated on the heap.
* The next stackNode is automatically set to NULL.
*
* @param jmp_addr The jump label index for the start of loop.
* @param second_jmp_addr The jump label index for the end of loop.
* @return The new node created.
*/
struct StackNode *newNode(int jmp_addr, int second_jmp_addr);
/**
* @brief Checks if the stack is empty.
*
* @param root The current stack top.
* @return If stack is empty
*/
bool isEmpty(struct StackNode *root);
/**
* @brief Push a new node to top of stack
*
* @param root The current stack top.
* @param jmp_addr The jump label index for the start of loop.
* @param second_jmp_addr The jump label index for the end of loop.
*/
void push(struct StackNode **root, int jmp_addr, int second_jmp_addr);
/**
* @brief Pop the top of stack and free memory allocated on the heap.
* The next node becomes the top of the stack.
*
* @param root The current stack top.
*/
bool pop(struct StackNode **root);#include
#include
#include
#include "bf_asm.h"
#include "stack.h"
#include "compiler.h"
void verify_syntax(FILE *inputfp)
{
char c;
unsigned int stack_count = 0;
unsigned int line = 0;
while ((c = fgetc(inputfp)) != EOF)
{
if (c == ']')
stack_count--;
if (c == '[')
stack_count++;
if (c == '\n')
line++;
if (stack_count < 0)
{
fprintf(stderr, "SYNTAX ERROR: Unexpected instruction '%c' in line %u. A corresponding bracket couldn't be found.\n", c, line);
exit(EXIT_FAILURE);
}
}
if (stack_count > 0)
{
fprintf(stderr, "SYNTAX ERROR: Brackets are not balanced. Found %u stray brackets.\n", stack_count);
exit(EXIT_FAILURE);
}
fseek(inputfp, 0, SEEK_SET);
}
void assemble(FILE *inputfp, FILE *outputfp)
{
char c;
fputs(ALLOCATE_MEMORY, outputfp);
unsigned int jmp_label = 0;
struct StackNode *stack = NULL;
while ((c = fgetc(inputfp)) != EOF)
{
switch (c)
{
case '+':
case '-':
{
int counter = (c == '+') ? 1 : -1;
while ((c = fgetc(inputfp)) != EOF) //Counts the total value to be put into the cell
{
if (c == '+')
counter++;
if (c == '-')
counter--;
if (c == '<' || c == '>' || c == '.' || c == ',' || c == ']' || c == '[')
{
fseek(inputfp, -1, SEEK_CUR);
break;
}
}
if (counter > 0)
fprintf(outputfp, INCREMENT, counter);
else if (counter < 0)
fprintf(outputfp, DECREMENT, (-counter));
break;
}
case '>':
case '<':
{
int counter = (c == '>') ? 1 : -1;
while ((c = fgetc(inputfp)) != EOF) //Counts the total value be increased to the pointer of cell.
{
if (c == '>')
counter++;
if (c == '<')
counter--;
if (c == '+' || c == '-' || c == '.' || c == ',' || c == ']' || c == '[')
{
fseek(inputfp, -1, SEEK_CUR);
break;
}
}
if (counter > 0)
fprintf(outputfp, INCREMENT_POINTER, (unsigned int)(counter * sizeof(char) * 4));
else if (counter < 0)
fprintf(outputfp, DECREMENT_POINTER, (unsigned int)((-counter) * sizeof(char) * 4));
break;
}
case '.':
fputs(WRITE_CHAR, outputfp);
break;
case ',':
fputs(READ_CHAR, outputfp);
break;
case '[':
{
push(&stack, jmp_label, jmp_label + 1);
fprintf(outputfp, "_LABEL_%u:\n", jmp_label);
fputs(LOOP_START, outputfp);
fprintf(outputfp, "_LABEL_%u\n", jmp_label + 1);
jmp_label += 2;
break;
}
case ']':
{
fprintf(outputfp, "_LABEL_%u:\n", stack->second_jmp_address);
fputs(LOOP_END, outputfp);
fprintf(outputfp, "_LABEL_%u\n", stack->jmp_address);
pop(&stack);
}
default:;
}
}
fputs(SYS_EXIT_CALL, outputfp);
}
void print_help()
{
puts("Usage: bf [OPTION] -f [INPUT FILE]\n"
"Compiles brainfuck code to 32-bit executable file.\n"
"\nThe cell size is set to 30,000 and one byte is allocated to each cell.\n"
"\nThe default output file is 'a.out'.\n"
"This compiler also uses the GNU linker 'ld' and the GNU assembler 'as'.\n"
"Make sure both of these are installed and in your path before running this program.\n"
"\nOptions:\n"
" -f \t\tSpecifies the input file.\n"
" -o \t\tSpecifies the output file.\n"
" -h \t\tDisplay the help message.\n");
}
void parse_args(int argc, char **argv)
{
global_args.output_file = "a.out";
global_args.input_file = NULL;
int opt;
while ((opt = getopt(argc, argv, "ho:f:")) != -1)
{
if (opt == 'h')
{
print_help();
exit(EXIT_SUCCESS);
}
if (opt == 'o')
global_args.output_file = optarg;
if (opt == 'f')
global_args.input_file = optarg;
}
if (global_args.input_file == NULL)
{
fputs("ERROR: No input file was specified.\n\n", stderr);
print_help();
exit(EXIT_FAILURE);
}
}
void create_assembly_source(FILE *inputfp, const char *assembler_output_file)
{
FILE *assembler_output_fp = fopen(assembler_output_file, "w");
if (assembler_output_fp == NULL)
{
fprintf(stderr, "The file %s couldn't be opened while assembling code.", assembler_output_file);
exit(EXIT_FAILURE);
}
assemble(inputfp, assembler_output_fp);
fclose(assembler_output_fp);
}
void execute_assembler(char *assembler_input_file)
{
const char *start_command = "as --32 ";
char *command = (char *)malloc(sizeof(char) * (strlen(start_command) + strlen(assembler_input_file) * 2 + 5));
if (command == NULL)
{
fputs("FATAL: Memory allocation failed.", stderr);
exit(EXIT_FAILURE);
}
strcpy(command, start_command);
strcat(command, assembler_input_file);
strcat(command, " -o ");
assembler_input_file[strlen(assembler_input_file) - 1] = 'o';
strcat(command, assembler_input_file);
assembler_input_file[strlen(assembler_input_file) - 1] = 's';
system(command);
remove(assembler_input_file);
free(command);
}
void execute_linker(const char *linker_input_file)
{
const char *start_command = "ld -m elf_i386 -s ";
char *command = (char *)malloc(sizeof(char) * (strlen(start_command) + strlen(linker_input_file) * 2 + 4));
if (command == NULL)
{
fputs("FATAL: Memory allocation failed.", stderr);
exit(EXIT_FAILURE);
}
strcpy(command, start_command);
strcat(command, linker_input_file);
strcat(command, " -o ");
strcat(command, global_args.output_file);
system(command);
remove(linker_input_file);
free(command);
}
void compile(FILE *inputfp)
{
int str_size = strlen(global_args.output_file) + 3;
char *input_file = (char *)malloc(str_size * sizeof(char));
if (input_file == NULL)
{
fputs("FATAL: Memory allocation failed.", stderr);
exit(EXIT_FAILURE);
}
strcpy(input_file, global_args.output_file);
strcat(input_file, ".s");
create_assembly_source(inputfp, input_file);
execute_assembler(input_file);
input_file[strlen(input_file) - 1] = 'o'; //Converting input_file to .o which was created by the assembler.
execute_linker(input_file);
free(input_file);
}
int main(int argc, char **argv)
{
parse_args(argc, argv);
FILE *inputfp = fopen(global_args.input_file, "r");
if (inputfp == NULL)
{
fputs("ERROR: The input file couldn't be opened.", stderr);
exit(EXIT_FAILURE);
}
verify_syntax(inputfp);
compile(inputfp);
fclose(inputfp);
return EXIT_SUCCESS;
}#include "stack.h"
#include
#include
struct StackNode *newNode(int jmp_addr, int second_jmp_addr)
{
struct StackNode *stack_node =
(struct StackNode *)malloc(sizeof(struct StackNode));
if (stack_node == NULL)
{
fputs("FATAL: Memory allocation failed.", stderr);
exit(EXIT_FAILURE);
}
stack_node->jmp_address = jmp_addr;
stack_node->second_jmp_address = second_jmp_addr;
stack_node->next = NULL;
return stack_node;
}
bool isEmpty(struct StackNode *root)
{
return !root;
}
void push(struct StackNode **root, int jmp_addr, int second_jmp_addr)
{
struct StackNode *stack_node = newNode(jmp_addr, second_jmp_addr);
stack_node->next = *root;
*root = stack_node;
}
bool pop(struct StackNode **root)
{
if (isEmpty(*root))
return false;
struct StackNode *temp = *root;
*root = (*root)->next;
free(temp);
return true;
}发布于 2019-03-20 23:06:11
https://codereview.stackexchange.com/questions/215574
复制相似问题