From 5ec29c60311cf8adfbafe488ebad6ea9c13ad8b5 Mon Sep 17 00:00:00 2001 From: Pierre Pronchery Date: Tue, 28 Jul 2009 11:22:32 +0000 Subject: [PATCH] Added a CppParser class --- Makefile | 2 + src/Makefile | 5 +- src/common.h | 24 +- src/cpp.c | 989 +++----------------------------------------- src/parser.c | 1016 ++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 41 ++ src/project.conf | 7 +- src/scanner.c | 24 +- 8 files changed, 1130 insertions(+), 978 deletions(-) create mode 100644 src/parser.c create mode 100644 src/parser.h diff --git a/Makefile b/Makefile index a9bd34d..910f793 100644 --- a/Makefile +++ b/Makefile @@ -25,10 +25,12 @@ dist: $(PACKAGE)-$(VERSION)/include/Makefile \ $(PACKAGE)-$(VERSION)/include/project.conf \ $(PACKAGE)-$(VERSION)/src/cpp.c \ + $(PACKAGE)-$(VERSION)/src/parser.c \ $(PACKAGE)-$(VERSION)/src/scanner.c \ $(PACKAGE)-$(VERSION)/src/main.c \ $(PACKAGE)-$(VERSION)/src/Makefile \ $(PACKAGE)-$(VERSION)/src/common.h \ + $(PACKAGE)-$(VERSION)/src/parser.h \ $(PACKAGE)-$(VERSION)/src/project.conf \ $(PACKAGE)-$(VERSION)/Makefile \ $(PACKAGE)-$(VERSION)/COPYING \ diff --git a/src/Makefile b/src/Makefile index b4b1add..8547911 100644 --- a/src/Makefile +++ b/src/Makefile @@ -19,7 +19,7 @@ INSTALL = install all: $(TARGETS) -libcpp_OBJS = cpp.o scanner.o +libcpp_OBJS = cpp.o parser.o scanner.o libcpp_CFLAGS = $(CPPFLAGSF) $(CPPFLAGS) $(CFLAGSF) $(CFLAGS) -fPIC libcpp_LDFLAGS = $(LDFLAGSF) $(LDFLAGS) -L $(LIBDIR) -Wl,-rpath $(LIBDIR) -l System @@ -40,6 +40,9 @@ cpp: $(cpp_OBJS) libcpp.so cpp.o: cpp.c common.h ../include/cpp.h $(CC) $(libcpp_CFLAGS) -c cpp.c +parser.o: parser.c parser.h ../include/cpp.h + $(CC) $(libcpp_CFLAGS) -c parser.c + scanner.o: scanner.c common.h ../include/cpp.h $(CC) $(libcpp_CFLAGS) -c scanner.c diff --git a/src/common.h b/src/common.h index 3d0898b..c9f4b7a 100644 --- a/src/common.h +++ b/src/common.h @@ -18,6 +18,7 @@ #ifndef _CPP_COMMON_H # define _CPP_COMMON_H +# include "parser.h" # include "cpp.h" @@ -35,27 +36,10 @@ typedef enum _CppScope CPP_SCOPE_TAKEN } CppScope; -/* FIXME make a subtype for the actual parser instead of the "toplevel" hack */ struct _Cpp { - int filters; - Parser * parser; - /* for cpp_filter_newlines */ - int newlines_last; - int newlines_last_cnt; - /* for cpp_filter_trigraphs */ - int trigraphs_last; - int trigraphs_last_cnt; - /* to queue a token */ - int queue_ready; - TokenCode queue_code; - String * queue_string; - /* for cpp_callback_directive */ - int directive_newline; - int directive_control; /* for include directives */ - Cpp * toplevel; - Cpp * subparser; + CppParser * parser; char ** paths; size_t paths_cnt; /* for substitutions */ @@ -66,4 +50,8 @@ struct _Cpp size_t scopes_cnt; }; + +/* functions */ +char * cpp_path_lookup(Cpp * cpp, char const * filename); + #endif /* !_CPP_COMMON_H */ diff --git a/src/cpp.c b/src/cpp.c index 062399e..4544a91 100644 --- a/src/cpp.c +++ b/src/cpp.c @@ -13,903 +13,20 @@ * You should have received a copy of the Creative Commons Attribution- * NonCommercial-ShareAlike 3.0 along with cpp; if not, browse to * http://creativecommons.org/licenses/by-nc-sa/3.0/ */ -/* FIXME: - * - fix includes (system vs regular, inclusion order) - * - potential memory leak with tokens' data - * - add a filter for the "%" operator - * - add a way to tokenize input from a string (and handle "#" and "##") */ -#include #include -#include +#include #include #include #include -#include -#include -#include #include +#include "cpp.h" #include "common.h" -#ifdef DEBUG -# define DEBUG_CALLBACK() fprintf(stderr, "DEBUG: %s('%c' 0x%x)\n", __func__, \ - c, c); -#else -# define DEBUG_CALLBACK() -#endif - /* Cpp */ -/* private */ -/* types */ -typedef struct _CppOperator -{ - CppCode code; - char const * string; -} CppOperator; - - -/* variables */ -/* operators */ -static const CppOperator _cpp_operators[] = -{ - { CPP_CODE_OPERATOR_AEQUALS, "&=" }, - { CPP_CODE_OPERATOR_DAMPERSAND, "&&" }, - { CPP_CODE_OPERATOR_AMPERSAND, "&" }, - { CPP_CODE_OPERATOR_RBRACKET, ":>" }, - { CPP_CODE_OPERATOR_COLON, ":" }, - { CPP_CODE_OPERATOR_BEQUALS, "|=" }, - { CPP_CODE_OPERATOR_DBAR, "||" }, - { CPP_CODE_OPERATOR_BAR, "|" }, - { CPP_CODE_OPERATOR_DIVEQUALS, "/=" }, - { CPP_CODE_OPERATOR_DIVIDE, "/" }, - { CPP_CODE_OPERATOR_DOTDOTDOT, "..." }, - { CPP_CODE_OPERATOR_DOT, "." }, - { CPP_CODE_OPERATOR_DEQUALS, "==" }, - { CPP_CODE_OPERATOR_EQUALS, "=" }, - { CPP_CODE_OPERATOR_DGEQUALS, ">>=" }, - { CPP_CODE_OPERATOR_GEQUALS, ">=" }, - { CPP_CODE_OPERATOR_DGREATER, ">>" }, - { CPP_CODE_OPERATOR_GREATER, ">" }, - { CPP_CODE_OPERATOR_DHASH, "##" }, - { CPP_CODE_OPERATOR_HASH, "#" }, - { CPP_CODE_OPERATOR_LBRACE, "{" }, - { CPP_CODE_OPERATOR_LBRACKET, "[" }, - { CPP_CODE_OPERATOR_DLEQUALS, "<<=" }, - { CPP_CODE_OPERATOR_DLESS, "<<" }, - { CPP_CODE_OPERATOR_LBRACKET, "<:" }, - { CPP_CODE_OPERATOR_LBRACE, "<%" }, - { CPP_CODE_OPERATOR_LEQUALS, "<=" }, - { CPP_CODE_OPERATOR_LESS, "<" }, - { CPP_CODE_OPERATOR_LPAREN, "(" }, - { CPP_CODE_OPERATOR_MGREATER, "->" }, - { CPP_CODE_OPERATOR_DMINUS, "--" }, - { CPP_CODE_OPERATOR_MEQUALS, "-=" }, - { CPP_CODE_OPERATOR_MINUS, "-" }, - { CPP_CODE_OPERATOR_RBRACE, "%>" }, - { CPP_CODE_OPERATOR_DHASH, "%:%:" }, - { CPP_CODE_OPERATOR_HASH, "%:" }, - { CPP_CODE_OPERATOR_MODEQUALS, "%=" }, - { CPP_CODE_OPERATOR_MODULO, "%" }, - { CPP_CODE_OPERATOR_NEQUALS, "!=" }, - { CPP_CODE_OPERATOR_NOT, "!" }, - { CPP_CODE_OPERATOR_DPLUS, "++" }, - { CPP_CODE_OPERATOR_PEQUALS, "+=" }, - { CPP_CODE_OPERATOR_PLUS, "+" }, - { CPP_CODE_OPERATOR_QUESTION, "?" }, - { CPP_CODE_OPERATOR_RBRACE, "}" }, - { CPP_CODE_OPERATOR_RBRACKET, "]" }, - { CPP_CODE_OPERATOR_RPAREN, ")" }, - { CPP_CODE_OPERATOR_SEMICOLON, ";" }, - { CPP_CODE_OPERATOR_TILDE, "~" }, - { CPP_CODE_OPERATOR_TEQUALS, "*=" }, - { CPP_CODE_OPERATOR_TIMES, "*" }, - { CPP_CODE_OPERATOR_XEQUALS, "^=" }, - { CPP_CODE_OPERATOR_XOR, "^" } -}; -static const size_t _cpp_operators_cnt = sizeof(_cpp_operators) - / sizeof(*_cpp_operators); - -/* directives */ -static const char * _cpp_directives[] = -{ - "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", - "include", "line", "pragma", "undef", "warning", NULL -}; - - -/* prototypes */ -/* useful */ -static int _cpp_isword(int c); -static char * _cpp_parse_word(Parser * parser, int c); - -/* filters */ -static int _cpp_filter_newlines(int * c, void * data); -static int _cpp_filter_trigraphs(int * c, void * data); - -/* callbacks */ -static int _cpp_callback_dequeue(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_header(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_control(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_whitespace(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_newline(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_otherspace(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_comment(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_comma(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_operator(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_quote(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_directive(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_word(Parser * parser, Token * token, int c, - void * data); -static int _cpp_callback_unknown(Parser * parser, Token * token, int c, - void * data); - - -/* Cpp */ -/* private */ -/* cpp_isword */ -static int _cpp_isword(int c) -{ - return isalnum(c) || c == '_' || c == '$'; -} - - -/* cpp_parse_word */ -static char * _cpp_parse_word(Parser * parser, int c) -{ - char * str = NULL; - size_t len = 0; - char * p; - - do - { - if((p = realloc(str, len + 2)) == NULL) - { - error_set_code(1, "%s", strerror(errno)); - free(str); - return NULL; - } - str = p; - str[len++] = c; - } - while(_cpp_isword((c = parser_scan_filter(parser)))); - str[len] = '\0'; - return str; -} - - -/* filters */ -/* cpp_filter_newlines */ -static int _cpp_filter_newlines(int * c, void * data) -{ - Cpp * cpp = data; - - if(cpp->newlines_last_cnt != 0) - { - cpp->newlines_last_cnt--; - *c = cpp->newlines_last; - return 0; - } - if(*c != '\\') - return 0; - if((*c = parser_scan(cpp->parser)) == '\n') - { - *c = parser_scan(cpp->parser); /* skip the newline */ - return 0; - } - cpp->newlines_last = *c; - cpp->newlines_last_cnt = 1; - *c = '\\'; - return 1; -} - - -/* cpp_filter_trigraphs */ -static int _trigraphs_get(int last, int * c); - -static int _cpp_filter_trigraphs(int * c, void * data) -{ - Cpp * cpp = data; - - if(cpp->trigraphs_last_cnt == 2) - { - cpp->trigraphs_last_cnt--; - *c = '?'; - return 0; - } - else if(cpp->trigraphs_last_cnt == 1) - { - cpp->trigraphs_last_cnt--; - *c = cpp->trigraphs_last; - return 0; - } - if(*c != '?') - return 0; - if((cpp->trigraphs_last = parser_scan(cpp->parser)) != '?') - { - cpp->trigraphs_last_cnt = 1; - return 1; - } - cpp->trigraphs_last = parser_scan(cpp->parser); - if(_trigraphs_get(cpp->trigraphs_last, c) != 0) - { -#ifdef DEBUG - fprintf(stderr, "DEBUG: last=%c\n", cpp->trigraphs_last); -#endif - cpp->trigraphs_last_cnt = 2; - return 2; - } -#ifdef DEBUG - fprintf(stderr, "DEBUG: filtered \"??%c\" into \"%c\"\n", - cpp->trigraphs_last, *c); -#endif - return 0; -} - -static int _trigraphs_get(int last, int * c) -{ - switch(last) - { - case '=': - *c = '#'; - break; - case '/': - *c = '\\'; - break; - case '\'': - *c = '^'; - break; - case '(': - *c = '['; - break; - case ')': - *c = ']'; - break; - case '!': - *c = '|'; - break; - case '<': - *c = '{'; - break; - case '>': - *c = '}'; - break; - case '-': - *c = '~'; - break; - default: - return 1; - } - return 0; -} - - -/* callbacks */ -/* cpp_callback_whitespace */ -static int _cpp_callback_whitespace(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char * str = NULL; - size_t len = 0; - char * p; - - if(!isspace(c)) - return 1; - DEBUG_CALLBACK(); - do - { - if(c != '\n') - continue; - if((p = realloc(str, len + 2)) == NULL) - { - free(str); - return -1; - } - str = p; - str[len++] = c; - } - while(isspace((c = parser_scan_filter(parser)))); - token_set_code(token, CPP_CODE_WHITESPACE); - if(str != NULL) /* some newlines were encountered */ - { - str[len] = '\0'; - token_set_string(token, str); - cpp->directive_newline = 1; - cpp->queue_ready = 1; - if(_cpp_callback_dequeue(parser, token, c, cpp) == 0) /* XXX */ - { - cpp->queue_ready = 1; - cpp->queue_code = CPP_CODE_WHITESPACE; - cpp->queue_string = str; - } - else - free(str); - return 0; - } - token_set_string(token, " "); - if(cpp->queue_code != CPP_CODE_NULL) - { - if(cpp->queue_string != NULL) - string_append(&cpp->queue_string, " "); - } - return 0; -} - - -/* cpp_callback_newline */ -static int _cpp_callback_newline(Parser * parser, Token * token, int c, - void * data) -{ - int ret = 0; - Cpp * cpp = data; - - if(c != '\n') - return 1; - DEBUG_CALLBACK(); - cpp->directive_newline = 1; - cpp->queue_ready = 1; - parser_scan_filter(parser); - if(_cpp_callback_dequeue(parser, token, c, cpp) == 0) /* XXX */ - { - cpp->queue_ready = 1; - cpp->queue_code = CPP_CODE_NEWLINE; - cpp->queue_string = string_new("\n"); - } - else - { - token_set_code(token, CPP_CODE_NEWLINE); - token_set_string(token, "\n"); - } - return ret; -} - - -/* cpp_callback_otherspace */ -static int _cpp_callback_otherspace(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char * str = NULL; - size_t len = 0; - char * p; - - assert(c != '\n'); - if(!isspace(c)) - return 1; - DEBUG_CALLBACK(); - do - { - if((p = realloc(str, len + 2)) == NULL) - { - free(str); - return -1; - } - str = p; - str[len++] = c; - } - while(isspace((c = parser_scan_filter(parser))) && c != '\n'); - token_set_code(token, CPP_CODE_WHITESPACE); - if(str != NULL) - { - str[len] = '\0'; - token_set_string(token, str); - free(str); - } - else - token_set_string(token, " "); - return 0; -} - - -/* cpp_callback_comment */ -static int _cpp_callback_comment(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char * str = NULL; - size_t len = 2; - char * p; - - if(c != '/') - return 1; - DEBUG_CALLBACK(); - if((c = parser_scan_filter(parser)) != '*') - { - if(cpp->queue_code == CPP_CODE_NULL) - token_set_code(token, CPP_CODE_OPERATOR_DIVIDE); - else - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - token_set_string(token, "/"); - return 0; - } - for(c = parser_scan_filter(parser); c != EOF;) - { - if(!(cpp->filters & CPP_FILTER_COMMENT)) - { - if((p = realloc(str, len + 3)) == NULL) - return -error_set_code(1, "%s", strerror( - errno)); - str = p; - str[len++] = c; - } - if(c == '*') - { - if((c = parser_scan_filter(parser)) == '/') - break; - } - else - c = parser_scan_filter(parser); - } - if(c == EOF) - return -error_set_code(1, "%s", "End of file within a comment"); - if(str != NULL) - { - str[0] = '/'; - str[1] = '*'; - str[len++] = '/'; - str[len] = '\0'; - token_set_code(token, CPP_CODE_COMMENT); - token_set_string(token, str); - free(str); - } - else - { - token_set_code(token, CPP_CODE_WHITESPACE); - token_set_string(token, " "); - } - parser_scan_filter(parser); - return 0; -} - - -/* cpp_callback_dequeue */ -static int _dequeue_include(Cpp * cpp, Token * token, char const * str); -static char * _include_path(Cpp * cpp, char const * str); -static char * _path_lookup(Cpp * cpp, char const * path, int system); -static char * _lookup_error(char const * path, int system); - -static int _cpp_callback_dequeue(Parser * parser, Token * token, int c, - void * data) -{ - int ret = 0; - Cpp * cpp = data; - - if(cpp->queue_ready == 0) - return 1; - cpp->queue_ready = 0; - if(cpp->queue_code == CPP_CODE_NULL && cpp->queue_string == NULL) - return 1; - DEBUG_CALLBACK(); - token_set_code(token, cpp->queue_code); - switch(cpp->queue_code) - { - case CPP_CODE_META_DEFINE: - case CPP_CODE_META_IFDEF: - case CPP_CODE_META_IFNDEF: - case CPP_CODE_META_UNDEF: - token_set_string(token, ""); - token_set_data(token, cpp->queue_string); - cpp->queue_string = NULL; - break; - case CPP_CODE_META_INCLUDE: - token_set_string(token, ""); - ret = _dequeue_include(cpp, token, cpp->queue_string); - break; - case CPP_CODE_META_ERROR: - case CPP_CODE_META_WARNING: - token_set_string(token, (cpp->queue_string != NULL) - ? cpp->queue_string : ""); - break; - case CPP_CODE_NEWLINE: /* XXX these two shouldn't be here */ - case CPP_CODE_WHITESPACE: - token_set_string(token, cpp->queue_string); - cpp->queue_string = NULL; - break; - default: - token_set_string(token, ""); - break; - } - cpp->queue_code = CPP_CODE_NULL; - string_delete(cpp->queue_string); - cpp->queue_string = NULL; - cpp->directive_newline = 1; - cpp->directive_control = 0; - return ret; -} - -static int _dequeue_include(Cpp * cpp, Token * token, char const * str) -{ - char * path = NULL; - - if((path = _include_path(cpp, str)) == NULL - && (path = _include_path(cpp->toplevel, str)) == NULL) - { - token_set_code(token, CPP_CODE_META_ERROR); - token_set_string(token, error_get()); - return 0; - } - if((cpp->subparser = cpp_new(path, cpp->filters)) == NULL) - { - free(path); - return -1; - } - free(path); - cpp->subparser->toplevel = cpp->toplevel; - return 0; -} - -static char * _include_path(Cpp * cpp, char const * str) - /* FIXME use presets for path discovery and then dirname(filename) */ -{ - int d; - size_t len; - char * path = NULL; - char * p; - -#ifdef DEBUG - fprintf(stderr, "DEBUG: %s(%p, \"%s\")\n", __func__, cpp, str); -#endif - if(str[0] == '"') - d = str[0]; - else if(str[0] == '<') - d = '>'; - else - { - error_set("%s", "Invalid include directive"); - return NULL; - } - len = strlen(str); - if(len < 3 || str[len - 1] != d) - { - error_set("%s", "Invalid include directive"); - return NULL; - } - if((path = strdup(&str[1])) == NULL) - { - error_set("%s", strerror(errno)); - return NULL; - } - path[len - 2] = '\0'; - p = _path_lookup(cpp, path, d == '>'); - free(path); - return p; -} - -static char * _path_lookup(Cpp * cpp, char const * path, int system) -{ - size_t i; - char * buf = NULL; - char * p; - struct stat st; - - for(i = 0; i < cpp->paths_cnt; i++) - { - if((p = realloc(buf, strlen(cpp->paths[i]) + strlen(path) + 2)) - == NULL) - { - error_set("%s", strerror(errno)); - free(buf); - return NULL; - } - buf = p; - sprintf(buf, "%s/%s", cpp->paths[i], path); -#ifdef DEBUG - fprintf(stderr, "DEBUG: stat(\"%s\", %p)\n", buf, &st); -#endif - if(stat(buf, &st) == 0) - return buf; - if(errno != ENOENT) - break; - } - free(buf); - return _lookup_error(path, system); -} - -static char * _lookup_error(char const * path, int system) -{ - error_set("%s%c%s%c: %s", "Cannot include ", system ? '<' : '"', path, - system ? '>' : '"', strerror(errno)); - return NULL; -} - - -/* cpp_callback_header */ -static int _cpp_callback_header(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char end; - char * str = NULL; - size_t len = 0; - char * p; - - if(cpp->directive_control != 1 - || cpp->queue_code != CPP_CODE_META_INCLUDE - || (c != '<' && c != '"')) - return 1; - DEBUG_CALLBACK(); - end = (c == '<') ? '>' : '"'; - while((p = realloc(str, len + 3)) != NULL) - { - str = p; - str[len++] = c; - if((c = parser_scan_filter(parser)) == EOF || c == '\n') - break; - else if(c == end) - break; - } - if(p == NULL) /* there was an error with realloc() */ - { - error_set_code(1, "%s", strerror(errno)); - free(str); - return -1; - } - else if(c == end) /* the header name is properly closed */ - { - str[len++] = c; - parser_scan_filter(parser); - } - str[len] = '\0'; - token_set_code(token, CPP_CODE_META_LINE); - token_set_string(token, str); - if(cpp->queue_string == NULL) - cpp->queue_string = str; - else - { - free(str); - cpp->queue_code = CPP_CODE_META_ERROR; - free(cpp->queue_string); - /* XXX may be followed by junk */ - cpp->queue_string = strdup("Syntax error"); - } - return 0; -} - - -/* cpp_callback_control */ -static int _cpp_callback_control(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - - if(cpp->directive_newline != 1 || c != '#') - { - cpp->directive_newline = 0; - return 1; - } - DEBUG_CALLBACK(); - parser_scan_filter(parser); - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - token_set_string(token, "#"); - cpp->directive_newline = 0; - cpp->directive_control = 1; - cpp->queue_code = CPP_CODE_NULL; - return 0; -} - - -/* cpp_callback_comma */ -static int _cpp_callback_comma(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - - if(c != ',') - return 1; - DEBUG_CALLBACK(); - token_set_code(token, CPP_CODE_COMMA); - token_set_string(token, ","); - if(cpp->queue_code != CPP_CODE_NULL) - { - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - string_append(&cpp->queue_string, ","); - } - parser_scan_filter(parser); - return 0; -} - - -/* cpp_callback_operator */ -static int _cpp_callback_operator(Parser * parser, Token * token, int c, - void * data) - /* FIXME probably fails for ".." and similar cases */ -{ - Cpp * cpp = data; - size_t i; - const size_t j = sizeof(_cpp_operators) / sizeof(*_cpp_operators); - size_t pos; - - for(i = 0; i < _cpp_operators_cnt; i++) - if(_cpp_operators[i].string[0] == c) - break; - if(i == _cpp_operators_cnt) /* nothing found */ - return 1; - DEBUG_CALLBACK(); - for(pos = 0; i < j;) - { - if(_cpp_operators[i].string[pos] == '\0') - break; - if(c == _cpp_operators[i].string[pos]) - { - c = parser_scan_filter(parser); - pos++; - } - else - i++; - } - if(i == j) /* should not happen */ - return -1; - token_set_code(token, _cpp_operators[i].code); - token_set_string(token, _cpp_operators[i].string); - if(cpp->queue_code != CPP_CODE_NULL) - { - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - if(cpp->queue_string == NULL) - cpp->queue_string = string_new( - _cpp_operators[i].string); - else - string_append(&cpp->queue_string, - _cpp_operators[i].string); - } - return 0; -} - - -/* cpp_callback_quote */ -static int _cpp_callback_quote(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - int escape = 0; - char * str = NULL; - size_t len = 0; - char * p; - - if(c == '\'') - token_set_code(token, CPP_CODE_SQUOTE); - else if(c == '"') - token_set_code(token, CPP_CODE_DQUOTE); - else - return 1; - DEBUG_CALLBACK(); - while((p = realloc(str, len + 3)) != NULL) - { - str = p; - str[len++] = c; - if((c = parser_scan_filter(parser)) == EOF || c == '\n') - break; - if(escape) - escape = 0; - else if(c == str[0]) - break; - else if(c == '\\') - escape = 1; - } - if(p == NULL) /* there was an error with realloc() */ - { - error_set_code(1, "%s", strerror(errno)); - free(str); - return -1; - } - else if(c == str[0]) /* the quoted string is properly closed */ - { - str[len++] = str[0]; - parser_scan_filter(parser); - } /* XXX else we should probably issue a warning */ - str[len] = '\0'; - token_set_string(token, str); - if(cpp->queue_code != CPP_CODE_NULL) - { - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - if(cpp->queue_string == NULL) - cpp->queue_string = string_new(str); - else - string_append(&cpp->queue_string, str); - } - free(str); - return 0; -} - - -/* cpp_callback_directive */ -static int _cpp_callback_directive(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char * str; - size_t i; - - if(cpp->directive_control != 1 || cpp->queue_code != CPP_CODE_NULL - || !_cpp_isword(c)) - return 1; - DEBUG_CALLBACK(); - if((str = _cpp_parse_word(parser, c)) == NULL) - return -1; - for(i = 0; _cpp_directives[i] != NULL; i++) - if(strcmp(str, _cpp_directives[i]) == 0) - break; - if(_cpp_directives[i] != NULL) - { - cpp->queue_code = CPP_CODE_META_FIRST + i; - cpp->queue_string = NULL; - } - else - { - cpp->queue_code = CPP_CODE_META_ERROR; - cpp->queue_string = string_new_append("Invalid directive: #", - str, ":", NULL); /* XXX check for errors */ - } - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - token_set_string(token, str); - free(str); - return 0; -} - - -/* cpp_callback_word */ -static int _cpp_callback_word(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char * str; - - if(!_cpp_isword(c)) - return 1; - DEBUG_CALLBACK(); - if((str = _cpp_parse_word(parser, c)) == NULL) - return -1; - token_set_code(token, CPP_CODE_WORD); - token_set_string(token, str); - if(cpp->queue_code != CPP_CODE_NULL) - { - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - if(cpp->queue_string == NULL) - cpp->queue_string = string_new(str); - else - string_append(&cpp->queue_string, str); - } - free(str); - return 0; -} - - -/* cpp_callback_unknown */ -static int _cpp_callback_unknown(Parser * parser, Token * token, int c, - void * data) -{ - Cpp * cpp = data; - char buf[2] = "\0"; - - if(c == EOF) - return 1; - DEBUG_CALLBACK(); - buf[0] = c; - parser_scan(parser); - token_set_code(token, CPP_CODE_UNKNOWN); - token_set_string(token, buf); - if(cpp->queue_code != CPP_CODE_NULL) - { - token_set_code(token, CPP_CODE_META_LINE); /* XXX */ - string_append(&cpp->queue_string, buf); - } - return 0; -} - - /* public */ /* functions */ /* cpp_new */ @@ -922,11 +39,7 @@ Cpp * cpp_new(char const * filename, int filters) if((cpp = object_new(sizeof(*cpp))) == NULL) return NULL; memset(cpp, 0, sizeof(*cpp)); - cpp->filters = filters; - cpp->parser = parser_new(filename); - cpp->directive_newline = 1; - cpp->directive_control = 0; - cpp->toplevel = cpp; + cpp->parser = cppparser_new(cpp, NULL, filename, filters); if((p = strdup(filename)) != NULL) { r = cpp_path_add(cpp, dirname(p)); /* FIXME inclusion order */ @@ -937,26 +50,6 @@ Cpp * cpp_new(char const * filename, int filters) cpp_delete(cpp); return NULL; } - parser_add_filter(cpp->parser, _cpp_filter_newlines, cpp); - if(cpp->filters & CPP_FILTER_TRIGRAPH) - parser_add_filter(cpp->parser, _cpp_filter_trigraphs, cpp); - parser_add_callback(cpp->parser, _cpp_callback_dequeue, cpp); - if(cpp->filters & CPP_FILTER_WHITESPACE) - parser_add_callback(cpp->parser, _cpp_callback_whitespace, cpp); - else - { - parser_add_callback(cpp->parser, _cpp_callback_newline, cpp); - parser_add_callback(cpp->parser, _cpp_callback_otherspace, cpp); - } - parser_add_callback(cpp->parser, _cpp_callback_comment, cpp); - parser_add_callback(cpp->parser, _cpp_callback_header, cpp); - parser_add_callback(cpp->parser, _cpp_callback_control, cpp); - parser_add_callback(cpp->parser, _cpp_callback_comma, cpp); - parser_add_callback(cpp->parser, _cpp_callback_operator, cpp); - parser_add_callback(cpp->parser, _cpp_callback_quote, cpp); - parser_add_callback(cpp->parser, _cpp_callback_directive, cpp); - parser_add_callback(cpp->parser, _cpp_callback_word, cpp); - parser_add_callback(cpp->parser, _cpp_callback_unknown, cpp); return cpp; } @@ -966,30 +59,17 @@ void cpp_delete(Cpp * cpp) { size_t i; - if(cpp->toplevel == cpp) + for(i = 0; i < cpp->defines_cnt; i++) { - for(i = 0; i < cpp->defines_cnt; i++) - { - free(cpp->defines[i].name); - free(cpp->defines[i].value); - } - free(cpp->defines); - for(i = 0; i < cpp->paths_cnt; i++) - free(cpp->paths[i]); - free(cpp->paths); + free(cpp->defines[i].name); + free(cpp->defines[i].value); } - else - { - assert(cpp->defines_cnt == 0); - assert(cpp->paths_cnt == 1); - free(cpp->paths[0]); - free(cpp->paths); - assert(cpp->scopes_cnt == 0); - } - if(cpp->subparser != NULL) - cpp_delete(cpp->subparser); + free(cpp->defines); + for(i = 0; i < cpp->paths_cnt; i++) + free(cpp->paths[i]); + free(cpp->paths); if(cpp->parser != NULL) - parser_delete(cpp->parser); + cppparser_delete(cpp->parser); if(cpp->scopes != NULL) free(cpp->scopes); object_delete(cpp); @@ -1000,7 +80,7 @@ void cpp_delete(Cpp * cpp) /* cpp_get_filename */ char const * cpp_get_filename(Cpp * cpp) { - return parser_get_filename(cpp->parser); + return cppparser_get_filename(cpp->parser); } @@ -1009,7 +89,6 @@ int cpp_is_defined(Cpp * cpp, char const * name) { size_t i; - cpp = cpp->toplevel; for(i = 0; i < cpp->defines_cnt; i++) if(strcmp(cpp->defines[i].name, name) == 0) return 1; @@ -1029,7 +108,6 @@ int cpp_define_add(Cpp * cpp, char const * name, char const * value) fprintf(stderr, "DEBUG: %s(cpp, \"%s\", \"%s\")\n", __func__, name, value); #endif - cpp = cpp->toplevel; for(i = 0; i < cpp->defines_cnt; i++) if(strcmp(cpp->defines[i].name, name) == 0) break; @@ -1062,7 +140,6 @@ int cpp_define_remove(Cpp * cpp, char const * name) #ifdef DEBUG fprintf(stderr, "DEBUG: %s(cpp, \"%s\")\n", __func__, name); #endif - cpp = cpp->toplevel; for(i = 0; i < cpp->defines_cnt; i++) if(strcmp(cpp->defines[i].name, name) == 0) break; @@ -1085,7 +162,6 @@ int cpp_path_add(Cpp * cpp, char const * path) { char ** p; - cpp = cpp->toplevel; #ifdef DEBUG fprintf(stderr, "DEBUG: %s(cpp, \"%s\")\n", __func__, path); #endif @@ -1097,3 +173,44 @@ int cpp_path_add(Cpp * cpp, char const * path) cpp->paths_cnt++; return 0; } + + +/* cpp_path_lookup */ +static char * _lookup_error(char const * path); + +String * cpp_path_lookup(Cpp * cpp, char const * filename) +{ + size_t len = strlen(filename); + size_t i; + char * buf = NULL; + char * p; + struct stat st; + + for(i = 0; i < cpp->paths_cnt; i++) + { + if((p = realloc(buf, strlen(cpp->paths[i]) + len + 2)) == NULL) + { + error_set("%s", strerror(errno)); + free(buf); + return NULL; + } + buf = p; + sprintf(buf, "%s/%s", cpp->paths[i], filename); +#ifdef DEBUG + fprintf(stderr, "DEBUG: stat(\"%s\", %p)\n", buf, &st); +#endif + if(stat(buf, &st) == 0) + return buf; + if(errno != ENOENT) + break; + } + free(buf); + return _lookup_error(filename); +} + +static char * _lookup_error(char const * filename) +{ + error_set("%s%s%s%s", "Cannot include <", filename, ">: ", + strerror(errno)); + return NULL; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..0bc26ae --- /dev/null +++ b/src/parser.c @@ -0,0 +1,1016 @@ +/* $Id$ */ +/* Copyright (c) 2009 Pierre Pronchery */ +/* This file is part of DeforaOS Devel cpp */ +/* cpp is not free software; you can redistribute it and/or modify it under the + * terms of the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 + * Unported as published by the Creative Commons organization. + * + * cpp is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the Creative Commons Attribution-NonCommercial- + * ShareAlike 3.0 Unported license for more details. + * + * You should have received a copy of the Creative Commons Attribution- + * NonCommercial-ShareAlike 3.0 along with cpp; if not, browse to + * http://creativecommons.org/licenses/by-nc-sa/3.0/ */ +/* FIXME: + * - potential memory leak with tokens' data + * - add a filter for the "%" operator + * - add a way to tokenize input from a string (and handle "#" and "##") */ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "parser.h" +#include "common.h" + +#ifdef DEBUG +# define DEBUG_CALLBACK() fprintf(stderr, "DEBUG: %s('%c' 0x%x)\n", __func__, \ + c, c); +#else +# define DEBUG_CALLBACK() +#endif + + +/* CppParser */ +/* private */ +/* types */ +typedef struct _CppOperator +{ + CppCode code; + char const * string; +} CppOperator; + +struct _CppParser +{ + Cpp * cpp; + CppParser * parent; + + Parser * parser; + int filters; + + /* for cpp_filter_newlines */ + int newlines_last; + int newlines_last_cnt; + /* for cpp_filter_trigraphs */ + int trigraphs_last; + int trigraphs_last_cnt; + /* for cpp_callback_directive */ + int directive_newline; + int directive_control; + /* to queue a token */ + int queue_ready; + TokenCode queue_code; + String * queue_string; + + CppParser * subparser; +}; + + +/* variables */ +/* operators */ +static const CppOperator _cpp_operators[] = +{ + { CPP_CODE_OPERATOR_AEQUALS, "&=" }, + { CPP_CODE_OPERATOR_DAMPERSAND, "&&" }, + { CPP_CODE_OPERATOR_AMPERSAND, "&" }, + { CPP_CODE_OPERATOR_RBRACKET, ":>" }, + { CPP_CODE_OPERATOR_COLON, ":" }, + { CPP_CODE_OPERATOR_BEQUALS, "|=" }, + { CPP_CODE_OPERATOR_DBAR, "||" }, + { CPP_CODE_OPERATOR_BAR, "|" }, + { CPP_CODE_OPERATOR_DIVEQUALS, "/=" }, + { CPP_CODE_OPERATOR_DIVIDE, "/" }, + { CPP_CODE_OPERATOR_DOTDOTDOT, "..." }, + { CPP_CODE_OPERATOR_DOT, "." }, + { CPP_CODE_OPERATOR_DEQUALS, "==" }, + { CPP_CODE_OPERATOR_EQUALS, "=" }, + { CPP_CODE_OPERATOR_DGEQUALS, ">>=" }, + { CPP_CODE_OPERATOR_GEQUALS, ">=" }, + { CPP_CODE_OPERATOR_DGREATER, ">>" }, + { CPP_CODE_OPERATOR_GREATER, ">" }, + { CPP_CODE_OPERATOR_DHASH, "##" }, + { CPP_CODE_OPERATOR_HASH, "#" }, + { CPP_CODE_OPERATOR_LBRACE, "{" }, + { CPP_CODE_OPERATOR_LBRACKET, "[" }, + { CPP_CODE_OPERATOR_DLEQUALS, "<<=" }, + { CPP_CODE_OPERATOR_DLESS, "<<" }, + { CPP_CODE_OPERATOR_LBRACKET, "<:" }, + { CPP_CODE_OPERATOR_LBRACE, "<%" }, + { CPP_CODE_OPERATOR_LEQUALS, "<=" }, + { CPP_CODE_OPERATOR_LESS, "<" }, + { CPP_CODE_OPERATOR_LPAREN, "(" }, + { CPP_CODE_OPERATOR_MGREATER, "->" }, + { CPP_CODE_OPERATOR_DMINUS, "--" }, + { CPP_CODE_OPERATOR_MEQUALS, "-=" }, + { CPP_CODE_OPERATOR_MINUS, "-" }, + { CPP_CODE_OPERATOR_RBRACE, "%>" }, + { CPP_CODE_OPERATOR_DHASH, "%:%:" }, + { CPP_CODE_OPERATOR_HASH, "%:" }, + { CPP_CODE_OPERATOR_MODEQUALS, "%=" }, + { CPP_CODE_OPERATOR_MODULO, "%" }, + { CPP_CODE_OPERATOR_NEQUALS, "!=" }, + { CPP_CODE_OPERATOR_NOT, "!" }, + { CPP_CODE_OPERATOR_DPLUS, "++" }, + { CPP_CODE_OPERATOR_PEQUALS, "+=" }, + { CPP_CODE_OPERATOR_PLUS, "+" }, + { CPP_CODE_OPERATOR_QUESTION, "?" }, + { CPP_CODE_OPERATOR_RBRACE, "}" }, + { CPP_CODE_OPERATOR_RBRACKET, "]" }, + { CPP_CODE_OPERATOR_RPAREN, ")" }, + { CPP_CODE_OPERATOR_SEMICOLON, ";" }, + { CPP_CODE_OPERATOR_TILDE, "~" }, + { CPP_CODE_OPERATOR_TEQUALS, "*=" }, + { CPP_CODE_OPERATOR_TIMES, "*" }, + { CPP_CODE_OPERATOR_XEQUALS, "^=" }, + { CPP_CODE_OPERATOR_XOR, "^" } +}; +static const size_t _cpp_operators_cnt = sizeof(_cpp_operators) + / sizeof(*_cpp_operators); + +/* directives */ +static const char * _cpp_directives[] = +{ + "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", + "include", "line", "pragma", "undef", "warning", NULL +}; + + +/* prototypes */ +/* useful */ +static int _cpp_isword(int c); +static char * _cpp_parse_word(Parser * parser, int c); + +/* filters */ +static int _cpp_filter_newlines(int * c, void * data); +static int _cpp_filter_trigraphs(int * c, void * data); + +/* callbacks */ +static int _cpp_callback_dequeue(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_header(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_control(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_whitespace(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_newline(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_otherspace(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_comment(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_comma(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_operator(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_quote(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_directive(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_word(Parser * parser, Token * token, int c, + void * data); +static int _cpp_callback_unknown(Parser * parser, Token * token, int c, + void * data); + + +/* CppParser */ +/* private */ +/* cpp_isword */ +static int _cpp_isword(int c) +{ + return isalnum(c) || c == '_' || c == '$'; +} + + +/* cpp_parse_word */ +static char * _cpp_parse_word(Parser * parser, int c) +{ + char * str = NULL; + size_t len = 0; + char * p; + + do + { + if((p = realloc(str, len + 2)) == NULL) + { + error_set_code(1, "%s", strerror(errno)); + free(str); + return NULL; + } + str = p; + str[len++] = c; + } + while(_cpp_isword((c = parser_scan_filter(parser)))); + str[len] = '\0'; + return str; +} + + +/* filters */ +/* cpp_filter_newlines */ +static int _cpp_filter_newlines(int * c, void * data) +{ + CppParser * cpp = data; + + if(cpp->newlines_last_cnt != 0) + { + cpp->newlines_last_cnt--; + *c = cpp->newlines_last; + return 0; + } + if(*c != '\\') + return 0; + if((*c = parser_scan(cpp->parser)) == '\n') + { + *c = parser_scan(cpp->parser); /* skip the newline */ + return 0; + } + cpp->newlines_last = *c; + cpp->newlines_last_cnt = 1; + *c = '\\'; + return 1; +} + + +/* cpp_filter_trigraphs */ +static int _trigraphs_get(int last, int * c); + +static int _cpp_filter_trigraphs(int * c, void * data) +{ + CppParser * cpp = data; + + if(cpp->trigraphs_last_cnt == 2) + { + cpp->trigraphs_last_cnt--; + *c = '?'; + return 0; + } + else if(cpp->trigraphs_last_cnt == 1) + { + cpp->trigraphs_last_cnt--; + *c = cpp->trigraphs_last; + return 0; + } + if(*c != '?') + return 0; + if((cpp->trigraphs_last = parser_scan(cpp->parser)) != '?') + { + cpp->trigraphs_last_cnt = 1; + return 1; + } + cpp->trigraphs_last = parser_scan(cpp->parser); + if(_trigraphs_get(cpp->trigraphs_last, c) != 0) + { +#ifdef DEBUG + fprintf(stderr, "DEBUG: last=%c\n", cpp->trigraphs_last); +#endif + cpp->trigraphs_last_cnt = 2; + return 2; + } +#ifdef DEBUG + fprintf(stderr, "DEBUG: filtered \"??%c\" into \"%c\"\n", + cpp->trigraphs_last, *c); +#endif + return 0; +} + +static int _trigraphs_get(int last, int * c) +{ + switch(last) + { + case '=': + *c = '#'; + break; + case '/': + *c = '\\'; + break; + case '\'': + *c = '^'; + break; + case '(': + *c = '['; + break; + case ')': + *c = ']'; + break; + case '!': + *c = '|'; + break; + case '<': + *c = '{'; + break; + case '>': + *c = '}'; + break; + case '-': + *c = '~'; + break; + default: + return 1; + } + return 0; +} + + +/* callbacks */ +/* cpp_callback_whitespace */ +static int _cpp_callback_whitespace(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + char * str = NULL; + size_t len = 0; + char * p; + + if(!isspace(c)) + return 1; + DEBUG_CALLBACK(); + do + { + if(c != '\n') + continue; + if((p = realloc(str, len + 2)) == NULL) + { + free(str); + return -1; + } + str = p; + str[len++] = c; + } + while(isspace((c = parser_scan_filter(parser)))); + token_set_code(token, CPP_CODE_WHITESPACE); + if(str != NULL) /* some newlines were encountered */ + { + str[len] = '\0'; + token_set_string(token, str); + cpp->directive_newline = 1; + cpp->queue_ready = 1; + if(_cpp_callback_dequeue(parser, token, c, cpp) == 0) /* XXX */ + { + cpp->queue_ready = 1; + cpp->queue_code = CPP_CODE_WHITESPACE; + cpp->queue_string = str; + } + else + free(str); + return 0; + } + token_set_string(token, " "); + if(cpp->queue_code != CPP_CODE_NULL) + { + if(cpp->queue_string != NULL) + string_append(&cpp->queue_string, " "); + } + return 0; +} + + +/* cpp_callback_newline */ +static int _cpp_callback_newline(Parser * parser, Token * token, int c, + void * data) +{ + int ret = 0; + CppParser * cpp = data; + + if(c != '\n') + return 1; + DEBUG_CALLBACK(); + cpp->directive_newline = 1; + cpp->queue_ready = 1; + parser_scan_filter(parser); + if(_cpp_callback_dequeue(parser, token, c, cpp) == 0) /* XXX */ + { + cpp->queue_ready = 1; + cpp->queue_code = CPP_CODE_NEWLINE; + cpp->queue_string = string_new("\n"); + } + else + { + token_set_code(token, CPP_CODE_NEWLINE); + token_set_string(token, "\n"); + } + return ret; +} + + +/* cpp_callback_otherspace */ +static int _cpp_callback_otherspace(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cppparser = data; + char * str = NULL; + size_t len = 0; + char * p; + + assert(c != '\n'); + if(!isspace(c)) + return 1; + DEBUG_CALLBACK(); + do + { + if((p = realloc(str, len + 2)) == NULL) + { + free(str); + return -1; + } + str = p; + str[len++] = c; + } + while(isspace((c = parser_scan_filter(parser))) && c != '\n'); + token_set_code(token, CPP_CODE_WHITESPACE); + if(str != NULL) + { + str[len] = '\0'; + token_set_string(token, str); + free(str); + } + else + token_set_string(token, " "); + return 0; +} + + +/* cpp_callback_comment */ +static int _cpp_callback_comment(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + char * str = NULL; + size_t len = 2; + char * p; + + if(c != '/') + return 1; + DEBUG_CALLBACK(); + if((c = parser_scan_filter(parser)) != '*') + { + if(cpp->queue_code == CPP_CODE_NULL) + token_set_code(token, CPP_CODE_OPERATOR_DIVIDE); + else + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + token_set_string(token, "/"); + return 0; + } + for(c = parser_scan_filter(parser); c != EOF;) + { + if(!(cpp->filters & CPP_FILTER_COMMENT)) + { + if((p = realloc(str, len + 3)) == NULL) + return -error_set_code(1, "%s", strerror( + errno)); + str = p; + str[len++] = c; + } + if(c == '*') + { + if((c = parser_scan_filter(parser)) == '/') + break; + } + else + c = parser_scan_filter(parser); + } + if(c == EOF) + return -error_set_code(1, "%s", "End of file within a comment"); + if(str != NULL) + { + str[0] = '/'; + str[1] = '*'; + str[len++] = '/'; + str[len] = '\0'; + token_set_code(token, CPP_CODE_COMMENT); + token_set_string(token, str); + free(str); + } + else + { + token_set_code(token, CPP_CODE_WHITESPACE); + token_set_string(token, " "); + } + parser_scan_filter(parser); + return 0; +} + + +/* cpp_callback_dequeue */ +static int _dequeue_include(CppParser * cpp, Token * token, char const * str); +static char * _include_path(CppParser * cpp, char const * str); +static char * _path_lookup(CppParser * cpp, char const * path, int system); + +static int _cpp_callback_dequeue(Parser * parser, Token * token, int c, + void * data) +{ + int ret = 0; + CppParser * cpp = data; + + if(cpp->queue_ready == 0) + return 1; + cpp->queue_ready = 0; + if(cpp->queue_code == CPP_CODE_NULL && cpp->queue_string == NULL) + return 1; + DEBUG_CALLBACK(); + token_set_code(token, cpp->queue_code); + switch(cpp->queue_code) + { + case CPP_CODE_META_DEFINE: + case CPP_CODE_META_IFDEF: + case CPP_CODE_META_IFNDEF: + case CPP_CODE_META_UNDEF: + token_set_string(token, ""); + token_set_data(token, cpp->queue_string); + cpp->queue_string = NULL; + break; + case CPP_CODE_META_INCLUDE: + token_set_string(token, ""); + ret = _dequeue_include(cpp, token, cpp->queue_string); + break; + case CPP_CODE_META_ERROR: + case CPP_CODE_META_WARNING: + token_set_string(token, (cpp->queue_string != NULL) + ? cpp->queue_string : ""); + break; + case CPP_CODE_NEWLINE: /* XXX these two shouldn't be here */ + case CPP_CODE_WHITESPACE: + token_set_string(token, cpp->queue_string); + cpp->queue_string = NULL; + break; + default: + token_set_string(token, ""); + break; + } + cpp->queue_code = CPP_CODE_NULL; + string_delete(cpp->queue_string); + cpp->queue_string = NULL; + cpp->directive_newline = 1; + cpp->directive_control = 0; + return ret; +} + +static int _dequeue_include(CppParser * cp, Token * token, char const * str) +{ + char * path = NULL; + + if((path = _include_path(cp, str)) == NULL) + { + token_set_code(token, CPP_CODE_META_ERROR); + token_set_string(token, error_get()); + return 0; + } + if((cp->subparser = cppparser_new(cp->cpp, cp, path, cp->filters)) + == NULL) + { + free(path); + return -1; + } + free(path); + return 0; +} + +static char * _include_path(CppParser * cpp, char const * str) +{ + int d; + size_t len; + char * path = NULL; + char * p; + +#ifdef DEBUG + fprintf(stderr, "DEBUG: %s(%p, \"%s\")\n", __func__, cpp, str); +#endif + if(str[0] == '"') + d = str[0]; + else if(str[0] == '<') + d = '>'; + else + { + error_set("%s", "Invalid include directive"); + return NULL; + } + len = strlen(str); + if(len < 3 || str[len - 1] != d) + { + error_set("%s", "Invalid include directive"); + return NULL; + } + if((path = strdup(&str[1])) == NULL) + { + error_set("%s", strerror(errno)); + return NULL; + } + path[len - 2] = '\0'; + p = _path_lookup(cpp, path, d == '>'); + free(path); + return p; +} + +static char * _path_lookup(CppParser * cp, char const * path, int system) +{ + Cpp * cpp = cp->cpp; + char const * filename; + char * p; + char * q; + char * r; + struct stat st; + + if(system != 0) + return cpp_path_lookup(cp->cpp, path); + for(; cp != NULL; cp = cp->parent) + { + filename = parser_get_filename(cp->parser); + if((p = string_new(filename)) == NULL) + return NULL; + q = dirname(p); + if((r = string_new(q)) == NULL || string_append(&r, "/") != 0 + || string_append(&r, path) != 0) + { + string_delete(r); + string_delete(p); + return NULL; + } + string_delete(p); +#ifdef DEBUG + fprintf(stderr, "DEBUG: stat(\"%s\", %p)\n", r, &st); +#endif + if(stat(r, &st) == 0) + return r; + error_set("%s: %s", r, strerror(errno)); + string_delete(r); + } + return cpp_path_lookup(cpp, path); /* XXX errors change "" into <> */ +} + + +/* cpp_callback_header */ +static int _cpp_callback_header(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cp = data; + char end; + char * str = NULL; + size_t len = 0; + char * p; + + if(cp->directive_control != 1 || cp->queue_code != CPP_CODE_META_INCLUDE + || (c != '<' && c != '"')) + return 1; + DEBUG_CALLBACK(); + end = (c == '<') ? '>' : '"'; + while((p = realloc(str, len + 3)) != NULL) + { + str = p; + str[len++] = c; + if((c = parser_scan_filter(parser)) == EOF || c == '\n') + break; + else if(c == end) + break; + } + if(p == NULL) /* there was an error with realloc() */ + { + error_set_code(1, "%s", strerror(errno)); + free(str); + return -1; + } + else if(c == end) /* the header name is properly closed */ + { + str[len++] = c; + parser_scan_filter(parser); + } + str[len] = '\0'; + token_set_code(token, CPP_CODE_META_LINE); + token_set_string(token, str); + if(cp->queue_string == NULL) + cp->queue_string = str; + else + { + free(str); + cp->queue_code = CPP_CODE_META_ERROR; + free(cp->queue_string); + /* XXX may be followed by junk */ + cp->queue_string = strdup("Syntax error"); + } + return 0; +} + + +/* cpp_callback_control */ +static int _cpp_callback_control(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + + if(cpp->directive_newline != 1 || c != '#') + { + cpp->directive_newline = 0; + return 1; + } + DEBUG_CALLBACK(); + parser_scan_filter(parser); + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + token_set_string(token, "#"); + cpp->directive_newline = 0; + cpp->directive_control = 1; + cpp->queue_code = CPP_CODE_NULL; + return 0; +} + + +/* cpp_callback_comma */ +static int _cpp_callback_comma(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + + if(c != ',') + return 1; + DEBUG_CALLBACK(); + token_set_code(token, CPP_CODE_COMMA); + token_set_string(token, ","); + if(cpp->queue_code != CPP_CODE_NULL) + { + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + string_append(&cpp->queue_string, ","); + } + parser_scan_filter(parser); + return 0; +} + + +/* cpp_callback_operator */ +static int _cpp_callback_operator(Parser * parser, Token * token, int c, + void * data) + /* FIXME probably fails for ".." and similar cases */ +{ + CppParser * cpp = data; + size_t i; + const size_t j = sizeof(_cpp_operators) / sizeof(*_cpp_operators); + size_t pos; + + for(i = 0; i < _cpp_operators_cnt; i++) + if(_cpp_operators[i].string[0] == c) + break; + if(i == _cpp_operators_cnt) /* nothing found */ + return 1; + DEBUG_CALLBACK(); + for(pos = 0; i < j;) + { + if(_cpp_operators[i].string[pos] == '\0') + break; + if(c == _cpp_operators[i].string[pos]) + { + c = parser_scan_filter(parser); + pos++; + } + else + i++; + } + if(i == j) /* should not happen */ + return -1; + token_set_code(token, _cpp_operators[i].code); + token_set_string(token, _cpp_operators[i].string); + if(cpp->queue_code != CPP_CODE_NULL) + { + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + if(cpp->queue_string == NULL) + cpp->queue_string = string_new( + _cpp_operators[i].string); + else + string_append(&cpp->queue_string, + _cpp_operators[i].string); + } + return 0; +} + + +/* cpp_callback_quote */ +static int _cpp_callback_quote(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + int escape = 0; + char * str = NULL; + size_t len = 0; + char * p; + + if(c == '\'') + token_set_code(token, CPP_CODE_SQUOTE); + else if(c == '"') + token_set_code(token, CPP_CODE_DQUOTE); + else + return 1; + DEBUG_CALLBACK(); + while((p = realloc(str, len + 3)) != NULL) + { + str = p; + str[len++] = c; + if((c = parser_scan_filter(parser)) == EOF || c == '\n') + break; + if(escape) + escape = 0; + else if(c == str[0]) + break; + else if(c == '\\') + escape = 1; + } + if(p == NULL) /* there was an error with realloc() */ + { + error_set_code(1, "%s", strerror(errno)); + free(str); + return -1; + } + else if(c == str[0]) /* the quoted string is properly closed */ + { + str[len++] = str[0]; + parser_scan_filter(parser); + } /* XXX else we should probably issue a warning */ + str[len] = '\0'; + token_set_string(token, str); + if(cpp->queue_code != CPP_CODE_NULL) + { + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + if(cpp->queue_string == NULL) + cpp->queue_string = string_new(str); + else + string_append(&cpp->queue_string, str); + } + free(str); + return 0; +} + + +/* cpp_callback_directive */ +static int _cpp_callback_directive(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + char * str; + size_t i; + + if(cpp->directive_control != 1 || cpp->queue_code != CPP_CODE_NULL + || !_cpp_isword(c)) + return 1; + DEBUG_CALLBACK(); + if((str = _cpp_parse_word(parser, c)) == NULL) + return -1; + for(i = 0; _cpp_directives[i] != NULL; i++) + if(strcmp(str, _cpp_directives[i]) == 0) + break; + if(_cpp_directives[i] != NULL) + { + cpp->queue_code = CPP_CODE_META_FIRST + i; + cpp->queue_string = NULL; + } + else + { + cpp->queue_code = CPP_CODE_META_ERROR; + cpp->queue_string = string_new_append("Invalid directive: #", + str, ":", NULL); /* XXX check for errors */ + } + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + token_set_string(token, str); + free(str); + return 0; +} + + +/* cpp_callback_word */ +static int _cpp_callback_word(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + char * str; + + if(!_cpp_isword(c)) + return 1; + DEBUG_CALLBACK(); + if((str = _cpp_parse_word(parser, c)) == NULL) + return -1; + token_set_code(token, CPP_CODE_WORD); + token_set_string(token, str); + if(cpp->queue_code != CPP_CODE_NULL) + { + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + if(cpp->queue_string == NULL) + cpp->queue_string = string_new(str); + else + string_append(&cpp->queue_string, str); + } + free(str); + return 0; +} + + +/* cpp_callback_unknown */ +static int _cpp_callback_unknown(Parser * parser, Token * token, int c, + void * data) +{ + CppParser * cpp = data; + char buf[2] = "\0"; + + if(c == EOF) + return 1; + DEBUG_CALLBACK(); + buf[0] = c; + parser_scan(parser); + token_set_code(token, CPP_CODE_UNKNOWN); + token_set_string(token, buf); + if(cpp->queue_code != CPP_CODE_NULL) + { + token_set_code(token, CPP_CODE_META_LINE); /* XXX */ + string_append(&cpp->queue_string, buf); + } + return 0; +} + + +/* public */ +/* functions */ +/* cppparser_new */ +CppParser * cppparser_new(Cpp * cpp, CppParser * parent, char const * filename, + int filters) +{ + CppParser * cp; + + if((cp = object_new(sizeof(*cp))) == NULL) + return NULL; + cp->cpp = cpp; + cp->parent = parent; + cp->parser = parser_new(filename); + cp->filters = filters; + cp->newlines_last = 0; + cp->newlines_last_cnt = 0; + cp->trigraphs_last = 0; + cp->trigraphs_last_cnt = 0; + cp->directive_newline = 1; + cp->directive_control = 0; + cp->queue_ready = 0; + cp->queue_code = CPP_CODE_NULL; + cp->queue_string = NULL; + cp->subparser = NULL; + if(cp->parser == NULL) + { + cppparser_delete(cp); + return NULL; + } + parser_add_filter(cp->parser, _cpp_filter_newlines, cp); + if(cp->filters & CPP_FILTER_TRIGRAPH) + parser_add_filter(cp->parser, _cpp_filter_trigraphs, cp); + parser_add_callback(cp->parser, _cpp_callback_dequeue, cp); + if(cp->filters & CPP_FILTER_WHITESPACE) + parser_add_callback(cp->parser, _cpp_callback_whitespace, cp); + else + { + parser_add_callback(cp->parser, _cpp_callback_newline, cp); + parser_add_callback(cp->parser, _cpp_callback_otherspace, cp); + } + parser_add_callback(cp->parser, _cpp_callback_comment, cp); + parser_add_callback(cp->parser, _cpp_callback_header, cp); + parser_add_callback(cp->parser, _cpp_callback_control, cp); + parser_add_callback(cp->parser, _cpp_callback_comma, cp); + parser_add_callback(cp->parser, _cpp_callback_operator, cp); + parser_add_callback(cp->parser, _cpp_callback_quote, cp); + parser_add_callback(cp->parser, _cpp_callback_directive, cp); + parser_add_callback(cp->parser, _cpp_callback_word, cp); + parser_add_callback(cp->parser, _cpp_callback_unknown, cp); + return cp; +} + + +/* cppparser_delete */ +void cppparser_delete(CppParser * cp) +{ + string_delete(cp->queue_string); + parser_delete(cp->parser); + if(cp->subparser != NULL) + cppparser_delete(cp->subparser); + object_delete(cp); +} + + +/* accessors */ +/* cppparser_get_filename */ +char const * cppparser_get_filename(CppParser * cpp) +{ + return parser_get_filename(cpp->parser); +} + + +/* useful */ +int cppparser_scan(CppParser * cp, Token ** token) +{ + if(cp->subparser != NULL) + { + if(cppparser_scan(cp->subparser, token) != 0) + return 1; + if(*token != NULL) + return 0; + cppparser_delete(cp->subparser); /* end of file */ + cp->subparser = NULL; + } + return parser_get_token(cp->parser, token); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..2cb3e79 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,41 @@ +/* $Id$ */ +/* Copyright (c) 2009 Pierre Pronchery */ +/* This file is part of DeforaOS Devel cpp */ +/* cpp is not free software; you can redistribute it and/or modify it under the + * terms of the Creative Commons Attribution-NonCommercial-ShareAlike 3.0 + * Unported as published by the Creative Commons organization. + * + * cpp is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the Creative Commons Attribution-NonCommercial- + * ShareAlike 3.0 Unported license for more details. + * + * You should have received a copy of the Creative Commons Attribution- + * NonCommercial-ShareAlike 3.0 along with cpp; if not, browse to + * http://creativecommons.org/licenses/by-nc-sa/3.0/ */ + + +#ifndef _CPP_PARSER_H +# define _CPP_PARSER_H + +# include "cpp.h" + + +/* types */ +typedef struct _CppParser CppParser; + + +/* functions */ +CppParser * cppparser_new(Cpp * cpp, CppParser * parent, char const * filename, + int filters); +void cppparser_delete(CppParser * cppparser); + + +/* accessors */ +char const * cppparser_get_filename(CppParser * cppparser); + + +/* useful */ +int cppparser_scan(CppParser * cppparser, Token ** token); + +#endif /* !_CPP_PARSER_H */ diff --git a/src/project.conf b/src/project.conf index c0b08a2..01e5217 100644 --- a/src/project.conf +++ b/src/project.conf @@ -2,11 +2,11 @@ targets=libcpp,cpp cppflags=-I ../include cflags_force=-W cflags=-Wall -g -O2 -dist=Makefile,common.h +dist=Makefile,common.h,parser.h [libcpp] type=library -sources=cpp.c,scanner.c +sources=cpp.c,parser.c,scanner.c cflags=-fPIC ldflags=-L $(LIBDIR) -Wl,-rpath $(LIBDIR) -l System install=$(LIBDIR) @@ -14,6 +14,9 @@ install=$(LIBDIR) [cpp.c] depends=common.h,../include/cpp.h +[parser.c] +depends=parser.h,../include/cpp.h + [scanner.c] depends=common.h,../include/cpp.h diff --git a/src/scanner.c b/src/scanner.c index 42c099d..9725757 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -52,7 +52,6 @@ static int _cpp_scope_push(Cpp * cpp, CppScope scope) { CppScope * p; - cpp = cpp->toplevel; if(_cpp_scope_get(cpp) != CPP_SCOPE_TAKING) scope = CPP_SCOPE_TAKEN; if((p = realloc(cpp->scopes, sizeof(*p) * (cpp->scopes_cnt + 1))) @@ -67,7 +66,6 @@ static int _cpp_scope_push(Cpp * cpp, CppScope scope) /* cpp_scope_get */ static CppScope _cpp_scope_get(Cpp * cpp) { - cpp = cpp->toplevel; return (cpp->scopes_cnt == 0) ? CPP_SCOPE_TAKING : cpp->scopes[cpp->scopes_cnt - 1]; } @@ -76,14 +74,13 @@ static CppScope _cpp_scope_get(Cpp * cpp) /* cpp_scope_get_count */ static size_t _cpp_scope_get_count(Cpp * cpp) { - return cpp->toplevel->scopes_cnt; + return cpp->scopes_cnt; } /* cpp_scope_set */ static void _cpp_scope_set(Cpp * cpp, CppScope scope) { - cpp = cpp->toplevel; assert(cpp->scopes_cnt > 0); cpp->scopes[cpp->scopes_cnt - 1] = scope; } @@ -94,7 +91,6 @@ static int _cpp_scope_pop(Cpp * cpp) { CppScope * p; - cpp = cpp->toplevel; assert(cpp->scopes_cnt > 0); if(cpp->scopes_cnt == 1) { @@ -112,7 +108,6 @@ static int _cpp_scope_pop(Cpp * cpp) /* public */ /* cpp_scan */ -static int _scan_get_next(Cpp * cpp, Token ** token); static int _scan_ifdef(Cpp * cpp, Token ** token); static int _scan_ifndef(Cpp * cpp, Token ** token); static int _scan_if(Cpp * cpp, Token ** token); @@ -127,7 +122,8 @@ int cpp_scan(Cpp * cpp, Token ** token) int ret; TokenCode code; - for(; (ret = _scan_get_next(cpp, token)) == 0; token_delete(*token)) + for(; (ret = cppparser_scan(cpp->parser, token)) == 0; + token_delete(*token)) { if(*token == NULL) /* end of file */ break; @@ -163,20 +159,6 @@ int cpp_scan(Cpp * cpp, Token ** token) return ret; } -static int _scan_get_next(Cpp * cpp, Token ** token) -{ - if(cpp->subparser != NULL) - { - if(_scan_get_next(cpp->subparser, token) != 0) - return 1; - if(*token != NULL) - return 0; - cpp_delete(cpp->subparser); /* end of file */ - cpp->subparser = NULL; - } - return parser_get_token(cpp->parser, token); -} - static int _scan_ifdef(Cpp * cpp, Token ** token) { char * name;