CPP/src/cpp.c

576 lines
12 KiB
C

/* $Id$ */
/* Copyright (c) 2008 Pierre Pronchery <khorben@defora.org> */
/* This file is part of DeforaOS Devel cpp */
/* cpp is not free software; you can redistribute it and/or modify it under the
* terms of the Creative Commons Attribution-NonCommercial-ShareAlike 3.0
* Unported as published by the Creative Commons organization.
*
* cpp is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the Creative Commons Attribution-NonCommercial-
* ShareAlike 3.0 Unported license for more details.
*
* You should have received a copy of the Creative Commons Attribution-
* NonCommercial-ShareAlike 3.0 along with cpp; if not, browse to
* http://creativecommons.org/licenses/by-nc-sa/3.0/ */
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <errno.h>
#include <System.h>
#include "cpp.h"
/* CppParser */
/* private */
/* types */
typedef struct _CppOperator
{
CppCode code;
char const * string;
} CppOperator;
typedef struct _CppParser
{
Parser * parser;
const CppOperator * operators;
size_t operators_cnt;
} CppParser;
/* variables */
static const CppOperator _cpp_operators[] =
{
/* FIXME "&=" */
{ CPP_CODE_OPERATOR_DAMPERSAND, "&&" },
{ CPP_CODE_OPERATOR_AMPERSAND, "&" },
{ CPP_CODE_OPERATOR_COLON, ":" },
{ CPP_CODE_OPERATOR_DBAR, "||" },
{ CPP_CODE_OPERATOR_BAR, "|" },
{ CPP_CODE_OPERATOR_DIVIDE, "/" },
{ CPP_CODE_OPERATOR_DOT, "." },
{ CPP_CODE_OPERATOR_DEQUALS, "==" },
{ CPP_CODE_OPERATOR_EQUALS, "=" },
{ CPP_CODE_OPERATOR_DGREATER, ">>" },
{ CPP_CODE_OPERATOR_GREATER, ">" },
{ CPP_CODE_OPERATOR_INVERSE, "~" },
{ CPP_CODE_OPERATOR_LBRACE, "{" },
{ CPP_CODE_OPERATOR_LBRACKET, "[" },
{ CPP_CODE_OPERATOR_DLESS, "<<" },
{ CPP_CODE_OPERATOR_LESS, "<" },
{ CPP_CODE_OPERATOR_LPAREN, "(" },
/* FIXME "|=" */
/* FIXME "->" */
/* FIXME "-=" */
{ CPP_CODE_OPERATOR_DMINUS, "--" },
{ CPP_CODE_OPERATOR_MINUS, "-" },
{ CPP_CODE_OPERATOR_MODULO, "%" },
{ CPP_CODE_OPERATOR_MULTIPLY, "*" },
{ CPP_CODE_OPERATOR_NOT, "!" },
/* FIXME "+=" */
{ CPP_CODE_OPERATOR_DPLUS, "++" },
{ CPP_CODE_OPERATOR_PLUS, "+" },
{ CPP_CODE_OPERATOR_RBRACE, "}" },
{ CPP_CODE_OPERATOR_RBRACKET, "]" },
{ CPP_CODE_OPERATOR_RPAREN, ")" },
{ CPP_CODE_OPERATOR_SEMICOLON, ";" },
{ CPP_CODE_OPERATOR_XOR, "^" }
};
static const size_t _cpp_operators_cnt = sizeof(_cpp_operators)
/ sizeof(*_cpp_operators);
/* prototypes */
static CppParser * _cppparser_new(char const * filename, int filters,
const CppOperator * operators, size_t operators_cnt);
static void _cppparser_delete(CppParser * cppparser);
/* Cpp */
/* private */
/* types */
struct _Cpp
{
CppParser ** parsers;
size_t parsers_cnt;
int filters;
};
/* prototypes */
/* useful */
static int _cpp_filter_newlines(int * c, void * data);
static int _cpp_filter_trigraphs(int * c, void * data);
static int _cpp_callback_whitespace(Parser * parser, Token * token, int c,
void * data);
/* FIXME handle directives */
static int _cpp_callback_comment(Parser * parser, Token * token, int c,
void * data);
static int _cpp_callback_operator(Parser * parser, Token * token, int c,
void * data);
static int _cpp_callback_quote(Parser * parser, Token * token, int c,
void * data);
static int _cpp_callback_word(Parser * parser, Token * token, int c,
void * data);
static int _cpp_callback_unknown(Parser * parser, Token * token, int c,
void * data);
/* CppParser */
/* private */
/* functions */
static CppParser * _cppparser_new(char const * filename, int filters,
const CppOperator * operators, size_t operators_cnt)
{
CppParser * cppparser;
if((cppparser = object_new(sizeof(*cppparser))) == NULL)
return NULL;
cppparser->operators = operators;
cppparser->operators_cnt = operators_cnt;
if((cppparser->parser = parser_new(filename)) == NULL)
{
_cppparser_delete(cppparser);
return NULL;
}
parser_add_filter(cppparser->parser, _cpp_filter_newlines,
cppparser->parser);
if(filters & CPP_FILTER_TRIGRAPH)
parser_add_filter(cppparser->parser, _cpp_filter_trigraphs,
cppparser->parser);
parser_add_callback(cppparser->parser, _cpp_callback_whitespace, NULL);
parser_add_callback(cppparser->parser, _cpp_callback_comment, NULL);
parser_add_callback(cppparser->parser, _cpp_callback_operator,
cppparser);
parser_add_callback(cppparser->parser, _cpp_callback_quote, NULL);
parser_add_callback(cppparser->parser, _cpp_callback_word, NULL);
parser_add_callback(cppparser->parser, _cpp_callback_unknown, NULL);
return cppparser;
}
/* cppparser_delete */
static void _cppparser_delete(CppParser * cppparser)
{
parser_delete(cppparser->parser);
object_delete(cppparser);
}
/* Cpp */
/* private */
/* cpp_filter_newlines */
static int _cpp_filter_newlines(int * c, void * data)
{
Parser * parser = data;
/* FIXME obtain from data */
static int last;
static int last_cnt = 0;
if(last_cnt)
{
last_cnt--;
*c = last;
return 0;
}
if(*c != '\\')
return 0;
if((*c = parser_scan(parser)) == '\n')
{
*c = parser_scan(parser); /* skip the newline */
return 0;
}
last = *c;
last_cnt = 1;
*c = '\\';
return 1;
}
/* cpp_filter_trigraphs */
static int _trigraph_get(int last, int * c);
static int _cpp_filter_trigraphs(int * c, void * data)
{
Parser * parser = data;
static int last;
static int last_cnt = 0;
if(last_cnt == 2)
{
last_cnt--;
*c = '?';
return 0;
}
else if(last_cnt == 1)
{
last_cnt--;
*c = last;
return 0;
}
if(*c != '?')
return 0;
if((last = parser_scan(parser)) != '?')
{
last_cnt = 1;
return 1;
}
last = parser_scan(parser);
if(_trigraph_get(last, c) != 0)
{
#ifdef DEBUG
fprintf(stderr, "DEBUG: last=%c\n", last);
#endif
last_cnt = 2;
return 2;
}
#ifdef DEBUG
fprintf(stderr, "DEBUG: filtered \"??%c\" into \"%c\"\n", last, *c);
#endif
return 0;
}
static int _trigraph_get(int last, int * c)
{
switch(last)
{
case '=':
*c = '#';
break;
case '/':
*c = '\\';
break;
case '\'':
*c = '^';
break;
case '(':
*c = '[';
break;
case ')':
*c = ']';
break;
case '!':
*c = '|';
break;
case '<':
*c = '{';
break;
case '>':
*c = '}';
break;
case '-':
*c = '~';
break;
default:
return 1;
}
return 0;
}
/* cpp_callback_whitespace */
static int _cpp_callback_whitespace(Parser * parser, Token * token, int c,
void * data)
{
char * str = NULL;
size_t len = 0;
char * p;
if(!isspace(c))
return 1;
#ifdef DEBUG
fprintf(stderr, "%s", "DEBUG: cpp_callback_whitespace()\n");
#endif
do
{
if(c != '\n')
continue;
if((p = realloc(str, len + 1)) == NULL)
{
free(str);
return -1;
}
str = p;
str[len++] = '\n';
}
while(isspace((c = parser_scan_filter(parser))));
token_set_code(token, CPP_CODE_WHITESPACE);
if(str != NULL)
{
str[len] = '\0';
token_set_string(token, str);
free(str);
}
else
token_set_string(token, " ");
return 0;
}
/* cpp_callback_comment */
static int _cpp_callback_comment(Parser * parser, Token * token, int c,
void * data)
/* FIXME implement properly (and fallback on division) */
{
if(c != '/')
return 1;
#ifdef DEBUG
fprintf(stderr, "DEBUG: cpp_callback_comment()\n");
#endif
if((c = parser_scan_filter(parser)) != '*')
{
token_set_code(token, CPP_CODE_OPERATOR_DIVIDE);
token_set_string(token, "/");
return 0;
}
for(c = parser_scan_filter(parser); c != EOF;)
{
if(c == '*')
{
if((c = parser_scan_filter(parser)) == '/')
break;
}
else
c = parser_scan_filter(parser);
}
if(c == EOF)
return -1;
token_set_code(token, CPP_CODE_WHITESPACE);
token_set_string(token, "");
parser_scan_filter(parser);
return 0;
}
/* cpp_callback_operator */
static int _cpp_callback_operator(Parser * parser, Token * token, int c,
void * data)
{
CppParser * cp = data;
size_t i;
const size_t j = sizeof(_cpp_operators) / sizeof(*_cpp_operators);
size_t pos;
for(i = 0; i < cp->operators_cnt; i++)
if(cp->operators[i].string[0] == c)
break;
if(i == cp->operators_cnt) /* nothing found */
return 1;
#ifdef DEBUG
fprintf(stderr, "DEBUG: cpp_callback_operator('%c')\n", c);
#endif
for(pos = 0; i < j;)
{
if(cp->operators[i].string[pos] == '\0')
break;
if(c == cp->operators[i].string[pos])
{
c = parser_scan_filter(parser);
pos++;
}
else
i++;
}
if(i == j) /* should not happen */
return -1;
token_set_code(token, cp->operators[i].code);
token_set_string(token, cp->operators[i].string);
return 0;
}
/* cpp_callback_quote */
static int _cpp_callback_quote(Parser * parser, Token * token, int c,
void * data)
{
int escape = 0;
char * str = NULL;
size_t len = 0;
char * p;
if(c == '\'')
token_set_code(token, CPP_CODE_SQUOTE);
else if(c == '"')
token_set_code(token, CPP_CODE_DQUOTE);
else
return 1;
#ifdef DEBUG
fprintf(stderr, "%s%c%s", "DEBUG: cpp_callback_quote('", c, "')\n");
#endif
while((p = realloc(str, len + 3)) != NULL)
{
str = p;
str[len++] = c;
if((c = parser_scan_filter(parser)) == EOF || c == '\n')
break;
if(escape)
{
escape = 0;
continue;
}
if(c == str[0])
break;
if(c == '\\')
escape = 1;
}
if(str == NULL || c != str[0])
{
free(str);
return -1;
}
str[len++] = str[0];
str[len] = '\0';
token_set_string(token, str);
free(str);
parser_scan_filter(parser);
return 0;
}
/* cpp_callback_word */
static int _isword(int c);
static int _cpp_callback_word(Parser * parser, Token * token, int c,
void * data)
{
char * str = NULL;
size_t len = 0;
char * p;
if(!_isword(c))
return 1;
#ifdef DEBUG
fprintf(stderr, "DEBUG: cpp_callback_word('%c')\n", c);
#endif
do
{
if((p = realloc(str, len + 2)) == NULL)
{
error_set_code(1, "%s", strerror(errno));
free(str);
return 0;
}
str = p;
str[len++] = c;
}
while(_isword((c = parser_scan_filter(parser))));
str[len] = '\0';
token_set_code(token, CPP_CODE_WORD);
token_set_string(token, str);
free(str);
return 0;
}
static int _isword(int c)
{
return isalnum(c) || c == '_';
}
/* cpp_callback_unknown */
static int _cpp_callback_unknown(Parser * parser, Token * token, int c,
void * data)
{
static char buf[2] = { '\0', '\0' };
if(c == EOF)
return 1;
buf[0] = c;
parser_scan(parser);
#ifdef DEBUG
fprintf(stderr, "DEBUG: cpp_callback_unknown('%c' 0x%x)\n", c, c);
#endif
token_set_code(token, CPP_CODE_UNKNOWN);
token_set_string(token, buf);
return 0;
}
/* public */
/* functions */
/* cpp_new */
Cpp * cpp_new(void)
{
Cpp * cpp;
if((cpp = object_new(sizeof(*cpp))) == NULL)
return NULL;
cpp->parsers = NULL;
cpp->parsers_cnt = 0;
cpp->filters = 0;
return cpp;
}
/* cpp_delete */
void cpp_delete(Cpp * cpp)
{
size_t i;
for(i = 0; i < cpp->parsers_cnt; i++)
parser_delete(cpp->parsers[i]->parser);
object_delete(cpp);
}
/* accessors */
/* cpp_get_filename */
char const * cpp_get_filename(Cpp * cpp)
{
if(cpp->parsers_cnt > 0)
return parser_get_filename(
cpp->parsers[cpp->parsers_cnt - 1]->parser);
return NULL;
}
/* useful */
/* cpp_filter_disable */
void cpp_filter_disable(Cpp * cpp, CppFilter filter)
{
cpp->filters -= (cpp->filters & filter);
}
/* cpp_filter_enable */
void cpp_filter_enable(Cpp * cpp, CppFilter filter)
{
cpp->filters |= filter;
}
/* cpp_parse */
int cpp_parse(Cpp * cpp, char const * pathname)
{
if(cpp->parsers_cnt != 0)
return error_set_code(1, "%s", "Already parsing");
if((cpp->parsers = malloc(sizeof(*cpp->parsers))) == NULL)
return error_set_code(1, "%s", strerror(errno));
if((cpp->parsers[0] = _cppparser_new(pathname, cpp->filters,
_cpp_operators, _cpp_operators_cnt))
== NULL)
return 1;
cpp->parsers_cnt = 1;
return 0;
}
/* cpp_scan */
int cpp_scan(Cpp * cpp, Token ** token)
{
if(cpp->parsers_cnt == 0)
return error_set_code(1, "%s", "No file to parse");
return parser_get_token(cpp->parsers[cpp->parsers_cnt - 1]->parser,
token);
}