diff options
author | Jason Linehan <patientulysses@gmail.com> | 2012-07-22 22:40:02 -0400 |
---|---|---|
committer | Jason Linehan <patientulysses@gmail.com> | 2012-07-22 22:40:02 -0400 |
commit | 9b0c080b4a69530b8b6e8e945c4afbe24cac684f (patch) | |
tree | ba40bdd1b424137e91df37755355e2b9e63a3f8a | |
parent | de8cb439d5b75e4923f8da34f2f8a45767ac1038 (diff) | |
download | plex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.tar.gz plex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.tar.bz2 plex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.zip |
More cleaning up.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | dfa.c | 99 | ||||
-rw-r--r-- | dfa.h | 83 | ||||
-rw-r--r-- | gen.c | 194 | ||||
-rw-r--r-- | gen.h | 12 | ||||
-rw-r--r-- | lex.c | 178 | ||||
-rw-r--r-- | lex.h | 69 | ||||
-rw-r--r-- | lib/error.c | 196 | ||||
-rw-r--r-- | lib/error.h | 31 | ||||
-rw-r--r-- | lib/set.c | 501 | ||||
-rw-r--r-- | lib/set.h | 129 | ||||
-rw-r--r-- | lib/textutils.c | 756 | ||||
-rw-r--r-- | lib/textutils.h | 77 | ||||
-rw-r--r-- | lib/util.h | 399 | ||||
-rw-r--r-- | macro.c | 102 | ||||
-rw-r--r-- | macro.h | 18 | ||||
-rw-r--r-- | main.c | 77 | ||||
-rw-r--r-- | main.h | 38 | ||||
-rw-r--r-- | nfa.c | 58 | ||||
-rw-r--r-- | nfa.h | 6 | ||||
-rw-r--r-- | scan.c | 139 | ||||
-rw-r--r-- | scan.h | 11 | ||||
-rw-r--r-- | tok.h | 58 |
23 files changed, 2966 insertions, 267 deletions
@@ -9,7 +9,7 @@ LDFLAGS= # # -SOURCES=main.c lex.c nfa.c dfa.c gen.c +SOURCES=main.c scan.c lex.c macro.c nfa.c dfa.c gen.c lib/set.c lib/textutils.c lib/error.c OBJECTS=$(SOURCES:.c=.o) EXECUTABLE=plex @@ -2,28 +2,11 @@ * Make a DFA transition table from an NFA created with * Thompson's construction. */ - -/* - * DTAB is the deterministic transition table. It is indexed by state number - * along the major axis and by input character along the minor axis. Dstates - * is a list of deterministic states represented as sets of NFA states. - * NSTATES is the number of valid entries in DTAB. - */ -struct dfa_state { - unsigned group:8; // Group id used by minimize() - unsigned mark:1; // Mark used by make_dtran() - char *accept; // accept action if accept state - int anchor // Anchor point if an accept state. - SET *set; // Set of NFA states represented by this DFA state -}; - - -struct dfa_state *DSTATES; // DFA states table -struct dfa_state *PREV_STATE; // Most-recently marked DFA state in DTAB - -ROW *DTAB; // DFA transition table -int NSTATES // Number of DFA states - +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "dfa.h" +#include "nfa.h" /** @@ -37,21 +20,23 @@ int NSTATES // Number of DFA states * (indexed by state number). * dfa() discards all the memory used for the initial NFA. */ -int dfa(FILE *input, ROW *(dfap[]), ACCEPT *(*acceptp)) +int dfa(struct pgen_t *pgen, struct dfa_table_row **tab, struct accept_t **acc) { - ACCEPT *accept_states; + struct accept_t *accept_states; int start int i; /* Build the NFA */ - start = nfa(input); + start = nfa(pgen); + + DFA = calloc(1, sizeof(struct dfa_t)); - NSTATES = 0; - DSTATES = calloc(DFA_MAX, sizeof(DFA_STATE)); - DTAB = calloc(DFA_MAX, sizeof(ROW)); - PREV_STATE = DSTATES; + DFA->nstates = 0; + DFA->state = calloc(DFA_MAX, sizeof(struct dfa_state)); + DFA->trans = calloc(DFA_MAX, sizeof(struct dfa_table_row)); + DFA->prev = DFA->state; - if (!DSTATES || !DTAB) + if (!DFA->state || !DFA->trans) halt(SIGABRT, "No memory for DFA transition matrix!"); /* Convert NFA to DFA */ @@ -61,20 +46,20 @@ int dfa(FILE *input, ROW *(dfap[]), ACCEPT *(*acceptp)) free_nfa(); /* Build the DFA */ - DTAB = realloc(DTAB, NSTATES * sizeof(ROW)); - accept_states = malloc(NSTATES * sizeof(ACCEPT)); + DFA->trans = realloc(DFA->trans, DFA->nstates * sizeof(struct dfa_table_row)); + accept_states = malloc(DFA->nstates * sizeof(struct accept_t)); - if (!accept_states || !DTAB) + if (!accept_states || !DFA->trans) halt(SIGABRT, "Out of memory!!"); - for (i=NSTATES; i-->0;) { - accept_states[i].string = DSTATES[i].accept; - accept_states[i].anchor = DSTATES[i].anchor; + for (i=DFA->nstates; i-->0;) { + accept_states[i].string = DFA->state[i].accept; + accept_states[i].anchor = DFA->state[i].anchor; } - free(DSTATES); - *dfap = DTAB; - *acceptp = accept_states; + free(DFA->state); + *tab = DFA->trans; + *acc = accept_states; return NSTATES; } @@ -85,13 +70,13 @@ int add_to_dstates(SET *NFA_set, char *accept_string, int anchor) { int nextstate; - if (NSTATES > (DFA_MAX-1)) + if (DFA->nstates > (DFA_MAX-1)) halt(SIGABRT, "Too many DFA states\n"); - nextstate = NSTATES++; - DSTATES[nextstate].set = NFA_set; - DSTATES[nextstate].accept = accept_string; - DSTATES[nextstate].anchor = anchor; + nextstate = DFA->nstates++; + DFA->state[nextstate].set = NFA_set; + DFA->state[nextstate].accept = accept_string; + DFA->state[nextstate].anchor = anchor; return nextstate; } @@ -106,11 +91,11 @@ int add_to_dstates(SET *NFA_set, char *accept_string, int anchor) int in_dstates(SET *NFA_set) { struct dfa_state *p; - struct dfa_state *end = &DSTATE[NSTATES]; + struct dfa_state *end = &DFA->state[DFA->nstates]; - for (p=DSTATES; p<end; ++p) { + for (p=DFA->dstate; p<end; ++p) { if (IS_EQUIVALENT(NFA_set, p->set)) - return(p - DSTATES); + return(p - DFA->state); } return -1; @@ -124,13 +109,13 @@ int in_dstates(SET *NFA_set) * exists, return NULL. Print an asterisk for each state to tell the * user that the program hasn't died while the table is being constructed. */ -DFA_STATE *get_unmarked(void) +struct dfa_state *get_unmarked(void) { - for (; Last_marked<&DSTATES[NSTATES]; ++Last_marked) { - if (!Last_marked->mark) { + for (; DFA->prev < &DFA->state[DFA->nstates]; ++DFA->prev) { + if (!DFA->prev->mark) { putc('*', stderr); fflush(stderr); - return Last_marked; + return DFA->prev; } } return NULL; @@ -141,9 +126,9 @@ DFA_STATE *get_unmarked(void) void free_sets(void) { struct dfa_state *p; - struct dfa_state *end = &DSTATES[NSTATES]; + struct dfa_state *end = &DFA->state[DFA->nstates]; - for (p=DSTATES; p<end; ++p) + for (p=DFA->state; p<end; ++p) delset(p->set); } @@ -170,9 +155,9 @@ void make_dtran(int sstate) NFA_set = newset() ; ADD(NFA_set, sstate); - NSTATES = 1; - DSTATES[0].set = e_closure(NFA_set,&DSTATES[0].accept,&DSTATES[0].anchor); - DSTATES[0].mark = 0; + DFA->nstates = 1; + DFA->state[0].set = e_closure(NFA_set,&DFA->state[0].accept,&DFA->state[0].anchor); + DFA->state[0].mark = 0; /* Make the table */ while (current = get_unmarked()) { @@ -192,7 +177,7 @@ void make_dtran(int sstate) else nextstate = add_to_dstates(NFA_set, isaccept, anchor); - DTAB[current-DSTATES][c] = nextstate; + DFA->trans[current-DFA->state][c] = nextstate; } } /* Terminate string of *'s printed in get_unmarked(); */ @@ -1,36 +1,75 @@ -#ifndef __DFA_H -#define __DFA_H +#ifndef _DFA_H +#define _DFA_H -/* - * Representations of Deterministic finite automata. - */ +#include "main.h" + +#define DFA_MAX 254 // Maximum number of DFA states. +#define MAX_CHARS 128 // Maximum width of DFA transition table. +#define F -1 // Marks a failure state in the table. -/* Maximum number of DFA states. There will be problems if this is >= 255. */ -#define DFA_MAX 254 -/* - * The type of the output DFA transition table (the internal one is an - * array of int). It is used to figure the various table sizes. +/** + * Contains an accepting string, which is NULL if non-accepting, + * and an anchor point, if any. */ -typedef unsigned char TTYPE; +struct accept_t { + char *string; + int anchor; +}; -#define F -1 // Marks a failure state in the table. -#define MAX_CHARS 128 // Maximum width of DFA transition table. +/** + * The DFA transition table is indexed by state number along the major axis + * and by input character along the minor axis. + * + * Each row of the table holds a list of deterministic states represented + * as sets of NFA states. + * + * nstates is the number of valid entries in the table. + * + * @tab : DFA transition table. + * @nstates : Number of DFA state nodes in the state array. + */ +struct dfa_table_row { + int row[MAX_CHARS]; + int nstates; +}; -/* - * One full row of DTAB, which is itself - * an array of DFA_MAX ROWS. +/** + * A DFA state comprises the machine state after a given set of transitions. + * + * @mark: Used by make_dtran + * @accept: Action if accepting state. + * @anchor: Anchor point if accepting state. + * @set : Set of NFA states represented by this DFA state. */ -typedef int ROW[MAX_CHARS]; +struct dfa_state { + unsigned mark:1; + char *accept; + int anchor; + struct set_t *set; +}; + +/** + * A deterministic finite automaton. + * + * @state : Array of DFA state nodes. + * @prev_state: Previous state node. + */ +struct dfa_t { + struct dfa_state *state; + struct dfa_state *prev; + struct dfa_table_row *trans; + int nstates; +}; + +struct dfa_state *last_marked; +struct dfa_t *DFA; -typedef struct accept_t { - char *string; // An accepting string. NULL if non-accepting. - int anchor; // Anchor point, if any. See NFA.h -} ACCEPT; +int dfa(struct pgen_t *pgen, struct dfa_table_row **tab, struct accept_t **acc); -#endif /* __DFA_H */ +#endif @@ -0,0 +1,194 @@ +#include <stdlib.h> +#include <stdio.h> +#include "textutils.h" +#include "nfa.h" +#include "dfa.h" +#include "main.h" + +/* + * + * PRINTING + * + */ + +/** + * pheader + * ``````` + * Print out a header comment that describes the uncompressed DFA. + * + * @fp: output stream + * @dtran: DFA transition table + * @nrows: Number of states in dtran[] + * @accept: set of accept states in dtran[] + */ +void pheader(FILE *fp, struct dfa_row **dtran, int nrows, struct accept_t *accept) +{ + int last_transition; + int chars_printed; + char *bin_to_ascii(); + int i; + int j; + + fprintf(fp, "#ifdef __NEVER__\n" ); + fprintf(fp, "/*---------------------------------------------------\n"); + fprintf(fp, " * DFA (start state is 0) is:\n *\n" ); + + for (i=0; i<nrows; i++) { + if (!accept[i].string) { + fprintf(fp, " * State %d [nonaccepting]", i); + } else { + fprintf(fp, " * State %d [accepting, line %d <", + i , ((int *)(accept[i].string))[-1]); + + fputstr(accept[i].string, 20, fp); + fprintf(fp, ">]"); + + if (accept[i].anchor) + fprintf(fp, " Anchor: %s%s", + accept[i].anchor & START ? "start " : "", + accept[i].anchor & END ? "end" : ""); + } + + last_transition = -1; + + for (j=0; j<MAX_CHARS; j++) { + if (dtran[i][j] != F) { + if (dtran[i][j] != last_transition) { + fprintf(fp, "\n * goto %2d on ", dtran[i][j]); + chars_printed = 0; + } + fprintf(fp, "%s", bin_to_ascii(j,1) ); + + if ((chars_printed += strlen(bin_to_ascii(j,1))) > 56) { + fprintf(fp, "\n * "); + chars_printed = 0; + } + + last_transition = dtran[i][j]; + } + } + fprintf(fp, "\n"); + } + fprintf(fp," */\n\n"); + fprintf(fp,"#endif\n"); +} + + +/** + * pdriver + * ``````` + * Print the array of accepting states, the driver itself, and the case + * statements for the accepting strings. + * + * @output: Output stream + * @nrows: number of states in dtran[] + * @accept: set of accepting states in dtran[] + **/ +void pdriver(FILE *output, int nrows, ACCEPT *accept) +{ + int i; + + fprintf(output, + "/*\n" + " * The Yyaccept array has two purposes. If Yyaccept[i] is 0,\n" + " * then state i is nonaccepting. If it is non-zero, then the\n" + " * number determines whether the string is anchored.\n" + " *\t 1 = anchored at start of line\n" + " *\t 2 = anchored at end of line\n" + " *\t 3 = both\n" + " *\t 4 = neither\n" + " */\n" + "YYPRIVATE YY_TYPE Yyaccept[] = \n"); + fprintf(output, "{\n"); + + /* Print the array of accepting states */ + for (i=0; i<nrows; i++) { + if (!accept[i].string) + fprintf(output, "\t0 "); + else + fprintf(output, "\t%-3d", accept[i].anchor ? accept[i].anchor :4); + + fprintf(output, "%c /* State %-3d */\n", i == (nrows -1) ? ' ' : ',' , i); + } + fprintf(output, "};\n\n"); + + /* Print code above case statements */ + driver_2(output, !NOLINE); + + /* Print case statements */ + for (i=0; i<nrows; i++) { + if (accept[i].string) { + fprintf(output, "\t\tcase %d:\t\t\t\t\t/* State %-3d */\n",i,i); + if (!NOLINE) + fprintf(output, "#line %d \"%s\"\n", + *( (int *)(accept[i].string) - 1), + Input_file_name); + + fprintf(output, "\t\t %s\n", accept[i].string); + fprintf(output, "\t\t break;\n"); + } + } + /* Code below cases. */ + driver_2(output, !NOLINE); +} + + + +/** + * print_array + * ``````````` + * Print the C source code to initialize the two-dimensional array pointed + * to by "array." Prints only the initialization part of the declaration. + * + * @fp: output stream. + * @array: DFA transition table + * @nrows: number of rows in array[] + * @ncols: number of cols in array[] + */ +void print_array(FILE *fp, int *array, int nrows, int ncols) +{ + #define NCOLS 10 // Num. columns used to print arrays + int col; // Output column. + int i; + + fprintf(fp, "{\n"); + + for(i=0; i<nrows; i++) { + fprintf(fp, "/* %02d */ { ", i); + + for (col=0; col<ncols; col++) { + fprintf(fp, "%3d" , *array++); + if (col < ncols-1) + fprintf(fp, ", "); + + if ((col % NCOLS)==NCOLS-1 && col!=ncols-1) + fprintf(fp, "\n "); + } + + if (col > NCOLS) + fprintf(fp, "\n "); + + fprintf(fp, " }%c\n", i < nrows-1 ? ',' : ' '); + } + fprintf(fp, "};\n"); +} + + +/** + * defnext + * ``````` + * Print the default yy_next(s,c) subroutine for an uncompressed table. + * + * @fp: output stream + * @name: Definition name + */ +void defnext(FILE *fp, char *name) +{ + fprintf(fp, "/*\n" + " * yy_next(state,c) is given the current state and input\n" + " * character and evaluates to the next state.\n" + " */\n" + "#define yy_next(state, c) %s[state][c]\n", name); +} + + @@ -0,0 +1,12 @@ +#ifndef _GENERATE_H +#define _GENERATE_H + +#include "main.h" +#include "dfa.h" + +void pheader(FILE *fp, struct dfa_table_row **dtran, int nrows, struct accept_t *accept); +void pdriver(FILE *out, int nrows, struct accept_t *accept); +void print_array(FILE *fp, int *array, int nrows, int ncols); +void defnext(FILE *fp, char *name); + +#endif @@ -1,56 +1,60 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <stdbool.h> + +#include "lib/textutils.h" #include "lib/error.h" +#include "nfa.h" +#include "main.h" /* * Lexical analyzer. */ -enum token current_tok; -char *input_line; -char *input_start; -char *lexeme; +enum token Token; +char *Current; +char *Start; +char *Lexeme; -enum token lex_advance(void) +enum token advance(void) { - static bool inquote = false; // When processing a quoted string. + static bool in_quote = false; // When processing a quoted string. bool saw_escape; // Saw a backslash escape - static char *stack[SSIZE]; // Import stack - static char **sp = NULL; // Stack pointer + static char *stack[32]; // Import stack + static char **sp = NULL; // Stack pointer /* Initialize the stack pointer. */ if (!sp) sp = stack - 1; // Necessary for a large model. /* Get the next line */ - if (current_tok == EOS) { - if (inquote) + if (Token == EOS) { + + if (in_quote) halt(SIGABRT, "No newline."); - /* - * Sit in this loop until a non-blank line is read - * into the "Input" array. + + /* + * Loop until a non-blank line is read. */ do { - /* Then at the end of file. */ - if (!(input = (getline)())) { - current_tok = END_OF_INPUT; + if (!(Current = getline(&Current, MAXLINE, pgen->in))) { + Token = END_OF_INPUT; goto exit; } - /* Ignore leading whitespace. */ - while (isspace(*input)) - input++; + while (isspace(*Current)) + Current++; - } while (!*input); /* Ignore blank lines. */ + } while (!*Current); /* Ignore blank lines. */ - input_start = input; // Remember start of line for errors. + Start = Current; // Remember start of line for errors. } - while (*input == '\0') { + while (*Current == '\0') { /* Restore previous input source. */ if (INBOUNDS(stack, sp)) { - input = *sp--; + Current = *sp--; continue; } @@ -58,18 +62,18 @@ enum token lex_advance(void) * No more input sources to restore; you're at the real end * of the input string. */ - current_tok = EOS; - lexeme = '\0'; + Token = EOS; + Lexeme = '\0'; goto exit; } - if (!inquote) { + if (!in_quote) { /* Macro expansion required. */ - while (*input == '{') { + while (*Current == '{') { /* Stack current input string. */ - *++sp = input; + *++sp = Current; /* Use macro body as input string. */ - input = get_macro(sp); + Current = get_macro(sp); if (TOOHIGH(stack,sp)) halt(SIGABRT, "Stack overflow"); @@ -78,40 +82,40 @@ enum token lex_advance(void) /* * At either start or end of a quoted string. All characters are - * treated as literals while inquote is true. + * treated as literals while in_quote is true. */ - if (*input == '"') { - inquote = ~inquote; - if (!*++input) { - current_tok = EOS; - lexeme = '\0'; + if (*Current == '"') { + in_quote = ~in_quote; + if (!*++Current) { + Token = EOS; + Lexeme = '\0'; goto exit; } } - saw_escape = (*input == '\\'); + saw_escape = (*Current == '\\'); - if (!inquote) { - if (isspace(*input)) { - current_tok = EOS; - lexeme = '\0'; + if (!in_quote) { + if (isspace(*Current)) { + Token = EOS; + Lexeme = '\0'; goto exit; } - lexeme = esc(&input); + Lexeme = esc(&Current); } else { - if (saw_escape && input[1] == '"') { + if (saw_escape && Current[1] == '"') { /* Skip the escaped character. */ - input += 2; - lexeme = '"'; + Current += 2; + Lexeme = '"'; } else { - lexeme = *input++; + Lexeme = *Current++; } } - current_tok = (inquote || saw_escape) ? L : [lexeme]; + Token = (in_quote || saw_escape) ? L : TOKEN_MAP[Lexeme]; exit: - return current_tok; + return Token; } @@ -143,24 +147,20 @@ enum token lex_advance(void) * ``````` * Build the NFA */ -NFA *machine(void) +struct nfa_t *machine(void) { - NFA *start; - NFA *p; - - ENTER("machine"); + struct nfa_t *start; + struct nfa_t *p; p = start = new_nfa(); p->next = rule(); - while (!MATCH(END_OF_INPUT)) { + while (Token != END_OF_INPUT) { p->next2 = new_nfa(); p = p->next2; p->next = rule(); } - LEAVE("machine"); - return start; } @@ -177,14 +177,12 @@ NFA *machine(void) * action --> <tabs> <string of characters> * epsilon */ -NFA *rule(void) +struct nfa_t *rule(void) { - NFA *start = NULL; - NFA *end = NULL; + struct nfa_t *start = NULL; + struct nfa_t *end = NULL; int anchor = NONE; - ENTER("rule"); - if (MATCH(AT_BOL)) { start = new_nfa(); start->edge = '\n'; @@ -220,8 +218,6 @@ NFA *rule(void) end->anchor = anchor; advance(); // Skip past EOS - LEAVE("rule"); - return start; } @@ -249,13 +245,11 @@ NFA *rule(void) * cat_expr * do the OR */ -void expr(NFA **startp, NFA **endp) +void expr(struct nfa_t **startp, struct nfa_t **endp) { - NFA *e2_start = NULL; /* expression to right of | */ - NFA *e2_end = NULL; - NFA *p; - - ENTER("expr"); + struct nfa_t *e2_start = NULL; /* expression to right of | */ + struct nfa_t *e2_end = NULL; + struct nfa_t *p; cat_expr(startp, endp); @@ -273,7 +267,6 @@ void expr(NFA **startp, NFA **endp) e2_end ->next = p; *endp = p; } - LEAVE("expr"); } @@ -292,31 +285,27 @@ void expr(NFA **startp, NFA **endp) * cat_expr' -> | factor cat_expr' * epsilon */ -void cat_expr(NFA **startp, NFA **endp ) +void cat_expr(struct nfa_t **startp, struct nfa_t **endp ) { - NFA *e2_start; - NFA *e2_end; + struct nfa_t *e2_start; + struct nfa_t *e2_end; - ENTER("cat_expr"); - - if (first_in_cat(current_tok)) + if (first_in_cat(Token)) factor(startp, endp); - while (first_in_cat(current_tok)) { + while (first_in_cat(Token)) { factor(&e2_start, &e2_end); - memcpy(*endp, e2_start, sizeof(NFA)); + memcpy(*endp, e2_start, sizeof(struct nfa_t)); discard(e2_start); *endp = e2_end; } - - LEAVE("cat_expr"); } -int first_in_cat(TOKEN tok) +int first_in_cat(enum token tok) { switch(tok) { @@ -349,12 +338,10 @@ int first_in_cat(TOKEN tok) /* * factor --> term* | term+ | term? */ -void factor(NFA **startp, NFA **endp) +void factor(struct nfa_t **startp, struct nfa_t **endp) { - NFA *start; - NFA *end; - - ENTER("factor"); + struct nfa_t *start; + struct nfa_t *end; term(startp, endp); @@ -376,8 +363,6 @@ void factor(NFA **startp, NFA **endp) *endp = end; advance(); } - - LEAVE("factor"); } @@ -389,13 +374,11 @@ void factor(NFA **startp, NFA **endp) * but not a carriage return (\r). All of these are single nodes in the * NFA. */ -void term(NFA **startp, NFA **endp) +void term(struct nfa_t **startp, struct nfa_t **endp) { - NFA *start; + struct nfa_t *start; int c; - ENTER("term"); - if (MATCH(OPEN_PAREN)) { advance(); expr(startp, endp); @@ -449,35 +432,32 @@ void term(NFA **startp, NFA **endp) advance(); } } - LEAVE("term"); } -void dodash(SET *set) +void dodash(struct set_t *set) { register int first; /* Treat [-...] as a literal dash, but print a warning. */ if (MATCH(DASH)) { - warning(W_STARTDASH); - ADD(set, lexeme); + ADD(set, Lexeme); advance(); } for (; !MATCH(EOS) && !MATCH(CCL_END); advance()) { if (!MATCH(DASH)) { - first = lexeme; - ADD(set, lexeme); + first = Lexeme; + ADD(set, Lexeme); /* Looking at a dash */ } else { advance(); /* Treat [...-] as literal */ if (MATCH(CCL_END)) { - warning(W_ENDDASH); ADD(set, '-'); } else { - for (; first<=lexeme; first++) + for (; first<=Lexeme; first++) ADD(set, first); } } @@ -0,0 +1,69 @@ +#ifndef _LEXER_H +#define _LEXER_H + + +enum token { + EOS = 1, // end of string + ANY, // . + AT_BOL, // ^ + AT_EOL, // $ + CCL_END, // ] + CCL_START, // [ + CLOSE_CURLY, // } + CLOSE_PAREN, // ) + CLOSURE, // * + DASH, // - + END_OF_INPUT, // EOF + L, // literal character + OPEN_CURLY, // { + OPEN_PAREN, // ( + OPTIONAL, // ? + OR, // | + PLUS_CLOSE // + +}; + + + +enum token advance(void); +struct nfa_t *machine(void); +struct nfa_t *rule(void); + +void expr(struct nfa_t **startp, struct nfa_t **endp); +void cat_expr(struct nfa_t **startp, struct nfa_t **endp); +int first_in_cat(enum token tok); + +void factor(struct nfa_t **startp, struct nfa_t **endp); +void term(struct nfa_t **startp, struct nfa_t **endp); +void dodash(struct set_t *set); + + + +enum token TOKEN_MAP[] = { +// ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +// ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +// ^^ ^_ SPACE ! " # $ % & ' + L, L, L, L, L, L, AT_EOL, L, L, L, +// ( ) * + , - . + OPEN_PAREN, CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY, +// / 0 1 2 3 4 5 6 7 8 9 : ; < = + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +// > ? + L, OPTIONAL, +// @ A B C D E F G H I J K L M N + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +// O P Q R S T U V W X Y Z + L, L, L, L, L, L, L, L, L, L, L, L, +// [ \ ] ^ + CCL_START, L, CCL_END, AT_BOL, +// _ ` a b c d e f g h i j k l m + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +// n o p q r s t u v w x y z + L, L, L, L, L, L, L, L, L, L, L, L, L, +// { | } DEL + OPEN_CURLY, OR, CLOSE_CURLY, L +}; + + +#endif diff --git a/lib/error.c b/lib/error.c new file mode 100644 index 0000000..c0ef8fa --- /dev/null +++ b/lib/error.c @@ -0,0 +1,196 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <stdarg.h> + +#include "error.h" + +#define USE_ERRNO_H + +#ifdef USE_ERRNO_H +char *etag[]={ "", "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO", + "ENXIO", "E2BIG", "ENOEXEC", "EBADF", "ECHILD", + "EAGAIN", "ENOMEM", "EACCES", "EFAULT", "ENOTBLK", + "EBUSY", "EEXIST", "EXDEV", "ENODEV", "ENOTDIR", + "EISDIR", "EINVAL", "ENFILE", "EMFILE", "ENOTTY", + "ETXTBSY", "EFBIG", "ENOSPC", "ESPIPE", "EROFS", + "EMLINK", "EPIPE", "EDOM", "ERANGE" }; + +char *emsg[]={ + "", + "Operation not permitted", + "No such file or directory", + "No such process", + "Interrupted system call", + "I/O error", + "No such device or address", + "Argument list too long", + "Exec format error", + "Bad file number", + "No child processes", + "Try again", + "Out of memory", + "Permission denied", + "Bad address", + "Block device required", + "Device or resource busy", + "File exists", + "Cross-device link", + "No such device", + "Not a directory", + "Is a directory", + "Invalid argument", + "File table overflow", + "Too many open files", + "Not a typewriter", + "Text file busy", + "File too large", + "No space left on device", + "Illegal seek", + "Read-only file system", + "Too many links" + "Broken pipe", + "Math argument out of domain of func", + "Math result not representable" +}; +#endif /* USE_ERRNO_H */ + + +/** + * abort_report + * ```````````` + * Print a formatted report to stderr and exit. + * + * @fmt: a printf-style format string + * @...: the variable argument list to the format string + */ +void abort_report(const char *fmt, ...) +{ + char buf[1000]; + va_list args; + + /* Write formatted output to stream */ + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + + #ifdef USE_ERRNO_H + if (errno) + fprintf(stderr, "%s (%d): %s\n", etag[errno],errno,emsg[errno]); + #endif + + fprintf(stderr, "The handler reported: \"%s\"\n", buf); + + exit(1); +} + + +/** + * raise_report + * ```````````` + * Print a formatted report to stderr and raise a signal. + * + * @signo: POSIX signal number to raise. + * @fmt : printf-style format string. + * @... : the variable argument list to the format string. + */ +void raise_report(sig_t signo, const char *fmt, ...) +{ + char buf[1000]; + va_list args; + + /* Write formatted output to stream */ + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + + #ifdef USE_ERRNO_H + if (errno) + fprintf(stderr, "%s (%d): %s\n", etag[errno],errno,emsg[errno]); + #endif + + fprintf(stderr, "The handler reported: \"%s\"\n", buf); + + raise(signo); +} + + + +/****************************************************************************** + * SIGNAL HANDLING + * + * Overview + * -------- + * Signals are usually Bad News that a process receives from the kernel. + * + * + * Signal Default action Description + * -------------------------------------------------------------------------- + * SIGABRT A Process abort signal. + * SIGALRM T Alarm clock. + * SIGBUS A Access to an undefined memory portion. + * SIGCHLD I Child process terminated/stopped/continued. + * SIGCONT C Continue executing, if stopped. + * SIGFPE A Erroneous arithmetic operation. + * SIGHUP T Terminal hangup. + * SIGILL A Illegal instruction. + * SIGINT T Terminal interrupt. + * SIGKILL T Kill (cannot be caught or ignored). + * SIGPIPE T Write on a pipe with no one to read it. + * SIGQUIT A Terminal quit signal. + * SIGSEGV A Invalid memory reference. + * SIGSTOP S Stop executing (cannot be caught or ignored). + * SIGTERM T Termination signal. + * SIGTSTP S Terminal stop signal. + * SIGTTIN S Background process attempting read. + * SIGTTOU S Background process attempting write. + * SIGUSR1 T User-defined signal 1. + * SIGUSR2 T User-defined signal 2. + * SIGPOLL T Pollable event. + * SIGPROF T Profiling timer expired. + * SIGSYS A Bad system call. + * SIGTRAP A Trace/breakpoint trap. + * SIGURG I High bandwidth data availible at a socket. + * SIGVTALRM T Virtual timer expired. + * SIGXCPU A CPU time limit exceeded. + * SIGXFSZ A File size limit exceeded. + * -------------------------------------------------------------------------- + * + * + * signal.h defines the sigaction() function: + * + * int sigaction(int sig, const struct sigaction *restrict act, + * struct sigaction *restrict oact); + * + * where 'act' specifies the implementation-defined signal handling, and + * 'oact' refers to the location at which the default signal handling + * configuration will be stored. These are of type struct sigaction, which + * is also defined in signal.h. See man(3) signal.h + * + ******************************************************************************/ + + +/** + * sigreg -- register a function to handle standard signals + */ +void sigreg(sig_handler_t handler) +{ + /* You say stop */ + signal(SIGINT, handler); + signal(SIGABRT, handler); + signal(SIGINT, handler); + signal(SIGTERM, handler); + signal(SIGQUIT, handler); + signal(SIGSTOP, handler); + + /* I say go */ + signal(SIGPIPE, handler); + signal(SIGSEGV, handler); + + /* You say goodbye */ + signal(SIGUSR1, handler); + signal(SIGUSR2, handler); +} + + diff --git a/lib/error.h b/lib/error.h new file mode 100644 index 0000000..571646b --- /dev/null +++ b/lib/error.h @@ -0,0 +1,31 @@ +#ifndef _ERROR_H +#define _ERROR_H +#include "util.h" +#include <errno.h> +#include <signal.h> +#include <stdarg.h> + +/* + * Exit the program and print a diagnostic message + */ +#define bye(...) \ + (VA_NUM_ARGS(__VA_ARGS__) == 1) ? abort_report(__VA_ARGS__, "") \ + : abort_report(__VA_ARGS__) \ + +/* + * Raise a signal and print an error. + */ +#define halt(sig, ...) \ + (VA_NUM_ARGS(__VA_ARGS__) == 1) ? raise_report(sig, __VA_ARGS__, "") \ + : raise_report(sig, __VA_ARGS__) \ + +void abort_report(const char *fmt, ...); +void raise_report(int signo, const char *fmt, ...); + + +typedef void (*sig_handler_t)(int signo); +void sigreg(sig_handler_t handler); + + +#endif + diff --git a/lib/set.c b/lib/set.c new file mode 100644 index 0000000..fd0c129 --- /dev/null +++ b/lib/set.c @@ -0,0 +1,501 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <ctype.h> +#include <signal.h> +#include <string.h> +#include "error.h" + +#include "set.h" + +/* + * new_set + * ``````` + * Create a new SET. + * + * Returns: Pointer to an allocated default set. + */ +struct set_t *new_set(void) +{ + struct set_t *new; + + if (!(new = calloc(1, sizeof(struct set_t)))) { + halt(SIGABRT, "No memory to create SET.\n") + return NULL; // "Shouldn't happen." + } + + new->map = new->defmap; + new->nwords = _DEFWORDS; + new->nbits = _DEFBITS; + + return new; +} + +/** + * del_set + * ``````` + * Destroy a set created with new_set(). + * + * @set : pointer to a SET. + * Returns: Nothing. + */ +void del_set(struct set_t *set) +{ + if (set->map != set->defmap) + free(set->map); + + free(set); +} + + +/** + * dup_set + * ``````` + * Create a new set that has the same members as the input set. + * + * @set : Input set (to be duplicated). + * Returns: Pointer to a new set with members identical to @set. + */ +struct set_t *dup_set(struct set_t *set) +{ + struct set_t *new; + + if (!(new = calloc(1, sizeof(struct set_t)))) { + halt(SIGABRT, "No memory to create SET.\n"); + return NULL; // "Shouldn't happen." + } + + new->compl = set->compl; + new->nwords = set->nwords; + new->nbits = set->nbits; + + if (set->map == set->defmap) { + new->map = new->defmap; + memcpy(new->defmap, set->defmap, _DEFWORDS * sizeof(_SETTYPE)); + } else { + new->map = (_SETTYPE *)malloc(set->nwords * sizeof(_SETTYPE)); + if (!new->map) + halt(SIGABRT, "No memory to duplicate SET.\n"); + + memcpy(new->map, set->map, set->nwords * sizeof(_SETTYPE)); + } + + return new; +} + + +/** + * _add_set + * ```````` + * Called by the ADD() macro when the set isn't big enough. + * + * @set: Pointer to a set. + * @bit: Bit to be added to set. + */ +int _add_set(struct set_t *set, int bit) +{ + grow_set(_ROUND(bit), set); + return _GBIT(set, bit, |=); +} + + +/** + * grow_set + * ```````` + * @set : Pointer to set to be enlarged. + * @need: Number of words needed (target). + * + * NOTE + * This routine calls malloc() and is rather slow. Its use should be + * limited, and avoided if possible. + */ +void grow_set(struct set_t *set, int need) +{ + _SETTYPE *new; + + if (!set || need <= set->nwords) + return; + + if (!(new = (_SETTYPE *) malloc(need * sizeof(_SETTYPE)))) + halt(SIGABRT, "No memory to expand SET.\n"); + + memcpy(new, set->map, set->nwords * sizeof(_SETTYPE)); + memcpy(new + set->nwords, 0, (need - set->nwords) * sizeof(_SETTYPE)); + + if (set->map != set->defmap) + free(set->map); + + set->map = new; + set->nwords = (unsigned char)need; + set->nbits = need * _BITS_IN_WORD; +} + + +/** + * num_set + * ``````` + * Get the number of elements (non-zero bits) in a set. NULL sets are empty. + * + * @set : Pointer to the set. + * Return: number of set bits in @set. + */ +int num_set(struct set_t *set) +{ + /* + * Neat macro that will expand to a lookup table indexed by a + * number in the range 0-255, with tab[i] == the number of set + * bits in i. + * + * Hallvard Furuseth suggested this approach on Sean Anderson's + * Bit Twiddling Hacks website on July 14, 2009. + */ + static const unsigned char nbits[256] = + { + #define B2(n) n, n+1, n+1, n+2 + #define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) + #define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) + B6(0), B6(1), B6(1), B6(2) + }; + + unsigned int count = 0; + unsigned char *p; + int i; + + if (!set) + return 0; + + p = (unsigned char *)set->map; + + for (i=_BYTES_IN_ARRAY(set->nwords); --i >= 0;) + count += nbits[*p++]; + + return count; +} + + +/** + * _set_test + * ````````` + * Compares two sets. Returns as follows: + * + * _SET_EQUIV Sets are equivalent. + * _SET_INTER Sets intersect but aren't equivalent. + * _SET_DISJ Sets are disjoint. + * + * NOTE + * The smaller set is made larger if the two sets are different sizes. + */ +int _set_test(struct set_t *set1, struct set_t *set2) +{ + _SETTYPE *p1; + _SETTYPE *p2; + int rval; + int i; + + rval = _SET_EQUIV; + i = max(set1->nwords, set2->nwords); + + grow_set(i, set1); + grow_set(i, set2); + + p1 = set1->map; + p2 = set2->map; + + for (; --i >= 0; p1++, p2++) { + if (*p1 != *p2) + return *p1 - *p2; + } + + /* + * You get here if the sets are not equivalent. + * If the sets intersect, you can return immediately, + * but have to keep going in the case of disjoint + * sets because they might actually intersect + * at some yet-unseen byte. + */ + if ((j = set1->nwords - i) > 0) // set1 is larger + while (--j >= 0) { + if (*p1++) + return 1; + } + } else if ((j = set2->nwords - i) > 0) // set2 is larger + while (--j >= 0) { + if (*p2++) + return -1; + } + } + return 0; // they are equivalent. +} + + +/** + * set_cmp + * ``````` + * Yet another comparison function. Works like strcmp(). + * + * @set1 : Pointer to a set to be compared. + * @set2 : Pointer to another set. + * Returns: 0 if set1==set2, < 0 if set1<set2, > 0 if set1>set2. + */ +void set_cmp(struct set_t *set1, struct set_t *set2) +{ + _SETTYPE *p1; + _SETTYPE *p2; + int j; + int i; + + i = j = min(set1->nwords, set2->nwords); + + for (p1 = set1->map, p2 = set2->map; --j >= 0; p1++, p2++) { + if (*p1 != *p2) + return *p1 - *p2; + } + + /* + * You get here only if all words in both sets are the same. + * Check the tail end of the larger array for all zeroes. + */ + if ((j = set1->nwords - i) > 0) // set1 is larger + while (--j >= 0) { + if (*p1++) + return 1; + } + } else if ((j = set2->nwords - i) > 0) // set2 is larger + while (--j >= 0) { + if (*p2++) + return -1; + } + } + return 0; // they are equivalent. +} + + +/** + * sethash + * ``````` + * Hash a set by summing together the words in the bitmap. + * + * @set : Pointer to a set. + * Return: hashed value. + */ +unsigned sethash(struct set_t *set) +{ + _SETTYPE *p; + unsigned total; + int j; + + total = 0; + j = set->nwords; + p = set->map; + + while (--j >= 0) + total += *p++; + + return total; +} + +/** + * is_subset + * ````````` + * Attempt to determine if 'sub' is a subset of 'set'. + * + * @set : Pointer to a set. + * @sub : Pointer to a possible subset of @set. + * Return: 1 if @sub is a subset of @set, otherwise 0. + * + * NOTE + * If @sub is larger than @set, the extra bytes must be all zeroes. + */ +int is_subset(struct set_t *set, struct set_t *sub) +{ + _SETTYPE *subsetp; + _SETTYPE *setp; + int common; // Number of bytes in potential subset. + int tail; // Number of implied 0 bytes in subset. + + if (sub->nwords > set->nwords) { + common = set->nwords; + tail = sub->nwords - common; + } else { + common = sub->nwords; + tail = 0; + } + + subsetp = sub->map; + setp = set->map; + + for (; --common >= 0; subsetp++, setp++) { + if ((*subsetp & *setp) != *subsetp) + return 0; + } + + while (--tail >= 0) { + if (*subsetp++) + return 0; + } + + return 1; +} + + +/** + * _set_op + * ``````` + * Performs binary operations depending on op. + * + * @op: One of _UNION, _INTERSECT, _DIFFERENCE, or _ASSIGN. + * @dest : Destination set. + * @src : Source set. + * Returns: nothing. + */ +void _set_op(int op, struct set_t *dest, struct set_t *src) +{ + _SETTYPE *d; // Destination map. + _SETTYPE *s; // Source map. + unsigned ssize; // Number of words in source set. + int tail // Dest is this many words bigger than source. + + ssize = src->nwords; + + /* Make sure destination is big enough. */ + if ((unsigned)dest->nwords < ssize) + grow_set(ssize, dest); + + tail = dest->nwords - ssize; + d = dest->map; + s = src->map; + + switch (op) { + case _UNION: + while (--ssize >= 0) + *d++ |= *s++; + break; + case _INTERSECT: + while (--ssize >= 0) + *d++ &= *s++; + while (--tail >= 0) + *d++ = 0; + break; + case _DIFFERENCE: + while (--ssize >= 0) + *d++ ^= *s++; + break; + case _ASSIGN: + while (--ssize >= 0) + *d++ = *s++; + while (--tail >= 0) + *d++ = 0; + break; + } +} + +/** + * invert_set + * `````````` + * Physically invert the bits in the set. Compare with COMPLIMENT(). + * + * @set : Pointer to a set. + * Return: Nothing. + */ +void invert_set(struct set_t *set) +{ + _SETTYPE *p; + _SETTYPE *end; + + for (p = set->map, end = p + set->nwords; p < end; p++) + *p = ~*p; +} + + +/** + * trunc_set + * ````````` + * Clear a set and truncate it to the default size. Compare with CLEAR(). + * + * @set : Pointer to a set. + * Return: Nothing. + */ +void trunc_set(struct set_t *set) +{ + if (set->map != set->defmap) { + free(set->map); + set->map = set->defmap; + } + set->nwords = _DEFWORDS; + set->nbits = _DEFBITS; + memset(set->defmap, 0, sizeof(set->defmap)); +} + + +/** + * next_member + * ``````````` + * Access all members of a set sequentially. + * + * @set : Pointer to a set. + * Return: value of each bit. + * + * USAGE + * Like strtok() + * set == NULL resets. + * set changed from last call resets and returns first element. + * otherwise the next element is returned, or else -1 if none. + */ +int next_member(struct set_t *set) +{ + static struct set_t *oset = 0; + static int current = 0; + _SETTYPE *map; + + if (!set) + return ((int)(oset = NULL)); + + if (oset != set) { + oset = set; + current = 0; + + for (map = set->map; *map == 0 && current < set->nbits; ++map) + current += _BITS_IN_WORD; + } + + /* + * The increment must be put in the test because if TEST() + * evaluates true, the increment on the right of the for would + * never be executed. + */ + while (current++ < set->nbits) { + if (TEST(set, current-1)) + return (current - 1); + } + return (-1); +} + + +/** + * print_set + * ````````` + * Print a set in human-readable form. + * + * @set: pointer to a set + */ +void print_set(struct set_t *set) +{ + int i; + + if (!set) + printf("Null set.\n"); + + else { + next_member(NULL); + while ((i = next_member(set)) >= 0) { + did_something++; + printf("%d", i); + } + next_member(NULL); + + if (!did_something) + printf("Empty set.\n"); + } +} + + diff --git a/lib/set.h b/lib/set.h new file mode 100644 index 0000000..ab8d982 --- /dev/null +++ b/lib/set.h @@ -0,0 +1,129 @@ +#ifndef _SET_H +#define _SET_H + + +/* One cell in the bitmap */ +unsigned short _SETTYPE; + + +#define _BITS_IN_WORD (16) +#define _BYTES_IN_ARRAY(x) (x << 1) +#define _DEFWORDS 8 +#define _DEFBITS (_DEFWORDS * _BITS_IN_WORD) + +/* + * Evaluates to the array element that holds the bit x. + * Performs a simple integer divide by 16, which is + * implemented as a right shift of 4. + */ +#define _DIV_WSIZE(x) ((unsigned)(x) >> 4) + +/* + * Evaluates to the position of the bit within the word, + * i.e. the offset in bits from the least-significant bit. + * Performs a modulus 16 using a bitwise AND. + */ +#define _MOD_WSIZE(x) ((x) & 0x0f) + +/* + * Used to expand the size of the array. An array grows in + * _DEFWORDS-sized chunks. + * + * >>3 is equivalent to integer division by 8. + * <<3 is equivalent to integer multiplication by 8. + * + * Imagine we start with the default array of 8 chunks, and + * wish to add the number 200 to the set. The array must be + * expanded to do this, and after the expansion, the array + * should have 16 elements in it: + * + * (((_DIV_WSIZE(200) + 8) >> 3) << 3) + * + * (((((unsigned)(200) >> 4) + 8) >> 3) << 3) + * (((12 + 8) >> 3) << 3) + * ((20 >> 3) << 3) + * (2 << 3) + * 16 + */ +#define _ROUND(bit) (((_DIV_WSIZE(bit) + 8) >> 3) << 3) + + +/* + * The defmap array is the default bitmap. Initially, map is + * set to point at defmap. When the array grows, however, instead + * of calling realloc() to change the size of defmap (and thus _set_), + * map is simply pointed at the newly malloc'd array. + * + * The reason for this is that realloc() will copy the entire memory + * array to the newly allocated one, whereas only the map needs to be + * copied. You can thus save time at the expense of some memory if you + * do it yourself. + */ +struct set_t { + unsigned char nwords; // Number of words in the map + unsigned char compl; // Is a negative true set if true + unsigned nbits; // Number of bits in map + _SETTYPE *map; // Pointer to the map + _SETTYPE defmap[_DEFWORDS]; // The map itself +}; + + +/* + * Op arguments passed to _set_op + */ +#define _UNION 0 // x is in s1 or s2 +#define _INTERSECT 1 // x is in s1 and s2 +#define _DIFFERENCE 2 // (x in s1) && (x not in s2) +#define _ASSIGN 4 // s1 = s2 + +#define UNION(d,s) _set_op(_UNION, d, s) +#define INTERSECT(d,s) _set_op(_INTERSECT, d, s) +#define DIFFERENCE(d,s) _set_op(_DIFFERENCE, d, s) +#define ASSIGN(d,s) _set_op(_ASSIGN, d, s) + +#define CLEAR(s) memset((s)->map, 0, (s)->nwords * sizeof(_SETTYPE)) +#define FILL(s) memset((s)->map, ~0, (s)->nwords * sizeof(_SETTYPE)) +#define COMPLIMENT(s) ((s)->compl = ~(s)->compl) +#define INVERT(s) invert(s) + +/* Value returned from _set_test */ +#define _SET_EQUIV 0 +#define _SET_DISJ 1 +#define _SET_INTER 2 + +#define IS_DISJOINT(s1,s2) (_set_test(s1,s2) == _SET_DISJ) +#define IS_INTERSECTING(s1,s2) (_set_test(s1,s2) == _SET_INTER) +#define IS_EQUIVALENT(s1,s2) (set_cmp((a),(b)) == 0) +#define IS_EMPTY(s) (set_num(s) == 0) + +/* + * CAVEAT + * Heavy duty side effects ahead, be aware. + */ + +#define _GETBIT(s,x,op) (((s)->map)[_DIV_WSIZE(x)] op (1 << _MOD_WSIZE(x))) + +#define ADD(s,x) (((x) >= (s)->nbits) ? _add_set(s,x) : _GETBIT(s,x,|=)) +#define REMOVE(s,x) (((x) >= (s)->nbits) ? 0 : _GETBIT(s,x,&=~)) +#define TEST(s,x) ((MEMBER(s,x)) ? !(s)->compl : (s)->compl ) + + +struct set_t *new_set(void); +void del_set(struct set_t *set); +struct set_t *dup_set(struct set_t *set); +int _add_set(struct set_t *set, int bit); +void grow_set(struct set_t *set, int need); +void _set_op(int op, struct set_t *dest, struct set_t *src); +int _set_test(struct set_t *set1, struct set_t *set2); + +int set_num (struct set_t *set); +void set_cmp (struct set_t *set1, struct set_t *set2); +unsigned set_hash (struct set_t *set); +void set_invert(struct set_t *set); +void set_trunc (struct set_t *set); +int set_next (struct set_t *set); +void set_print (struct set_t *set); +int is_subset (struct set_t *set, struct set_t *sub); + + +#endif diff --git a/lib/textutils.c b/lib/textutils.c new file mode 100644 index 0000000..aa5e795 --- /dev/null +++ b/lib/textutils.c @@ -0,0 +1,756 @@ +/* + * textutils.c -- byte-oriented character and string routines. + * + * Copyright (C) 2012 Jason Linehan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <ctype.h> +#include <stdbool.h> + +#include "textutils.h" + + +/** + * szero + * ````` + * Given a character buffer, set the contents to '\0'. + * + * @str : pointer to a byte buffer + * Return: nothing. + */ +void szero(char *str) +{ + memset(str, '\0', strlen(str)); +} + + +/** + * sdup + * ```` + * Copy *str to a newly-alloc'd buffer, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * Return: pointer to a copy of *str, else NULL. + */ +char *sdup(const char *str) +{ + char *copy; + size_t len; + + len = strlen(str) + 1; + copy = malloc(len); + + return copy ? memcpy(copy, str, len) : NULL; +} + + +/** + * sldup + * ````` + * Copy *str to a newly-alloc'd buffer of size len, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * @len : size of buffer (including '\0') + * Return: pointer to a copy of *str, else NULL. + */ +char *sldup(const char *str, size_t max) +{ + char *copy; + size_t len; + size_t end; + + len = strlen(str) + 1; + len = (len > max) ? max : len; // lesser of two weevils + end = len - 1; + + if (!(copy = calloc(1, len))) + return NULL; + + copy[end] = '\0'; + + return memcpy(copy, str, end); +} + + +/** + * slcpy + * ````` + * Writes at most len characters from src to dst. + * + * @dst : destination buffer + * @src : source buffer + * @len : length of source buffer + * Return: number of bytes written. + */ +size_t slcpy(char *dst, const char *src, size_t siz) +{ + const char *s; + char *d; + size_t n; + + d = dst; + s = src; + n = siz; + + /* Copy as many bytes from src as will fit in dst */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* + * Not enough room in dst, add NUL + * and traverse the rest of src + */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} + + +/** + * slcat + * ````` + * Concatenates src and dst in dst. + * + * @dst : destination buffer + * @src : source buffer + * @siz : size of source buffer + * Return: Number of bytes concatenated + */ +size_t slcat(char *dst, const char *src, size_t siz) +{ + char *d; + const char *s; + size_t n; + size_t dlen; + + d = dst; + s = src; + n = siz; + + /* + * Find the end of dst and adjust bytes + * left, but don't go past end + */ + while (n--!=0 && *d!='\0') + d++; + + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return (dlen + (s - src)); /* count does not include NUL */ +} + + +/** + * match + * ````` + * Locate first occurance of string 'needle' in string 'haystack' + * + * @haystack: the string being searched for a match + * @needle : the pattern being matched in 'haystack' + * Return : The first occurance of 'needle' + */ +char *match(const char *haystack, const char *needle) +{ + size_t len_haystack; + size_t len_needle; + + if (!needle || !haystack) + return NULL; + + len_haystack = strlen(haystack); + len_needle = strlen(needle); + + /* Needle can't be larger than haystack */ + if (len_needle > len_haystack) + return NULL; + + return memmem(haystack, needle); +} + + +/** + * field + * ````` + * Return pointer to a delimited substring (not including delimiter) + * + * @str : the string being matched against + * @delim: the delimiter to be searched for + * Return: pointer to the start of the substring + */ +char *field(const char *string, const char *delimiter) +{ + size_t offset; + char *frame; + + if (!string || !delimiter) + return NULL; + + if (frame = match(string, delimiter), !frame) + return NULL; + + offset = strlen(delimiter); + + return &frame[offset]; +} + + +/** + * pumpf + * ````` + * Write a formatted character string into an auto-allocated buffer + * + * @strp : pointer to a character buffer (will be allocated) + * @fmt : format string + * @... : format string arguments + * Return: length of the formatted string at *strp + */ +void pumpf(char **strp, const char *fmt, ...) +{ + va_list args; + size_t len; + FILE *stream; + + /* Open a new FILE stream. *strp will be dynamically allocated to + * contain characters written to the stream, and len will reflect + * these changes. See man(3) open_memstream. */ + stream = open_memstream(strp, &len); + + if (!stream) + /* Unable to open FILE stream */ + return; + + /* Write formatted output to stream */ + va_start(args, fmt); + vfprintf(stream, fmt, args); + va_end(args); + + fflush(stream); + fclose(stream); +} + + +/******************************************************************************/ +#define LONGALIGNED(X) ((long)X & (sizeof(long) - 1)) +#define LONGBYTES (sizeof(long)) +#define USE_BYTEWISE(len) ((len) < LONGBYTES) + +/* NUL expands to nonzero if X (long int) contains '\0' */ +#if LONG_MAX == 2147483647L +#define NUL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) +#elif LONG_MAX == 9223372036854775807L +#define NUL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) +#else +#error memchar: long int is neither a 32bit nor a 64bit value +#endif + +#define DETECTCHAR(X,MASK) (NUL(X ^ MASK)) /* nonzero if X contains MASK. */ + + +/** + * textutils_memchr + * ```````````````` + * Search memory starting at src for character 'c' + * If 'c' is found within 'len' characters of 'src', a pointer + * to the character is returned. Otherwise, NULL is returned. + */ +void *textutils_memchr(const void *src_void, int c, size_t len) +{ + const int *src = (const int *)src_void; + int d = c; + + #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) + unsigned long *asrc; + unsigned long mask; + int i; + + while (LONGALIGNED(src)) { + if (!len--) + return NULL; + if (*src == d) + return (void *)src; + src++; + } + /* + * If we get this far, we know that length is large and src is + * word-aligned. + */ + if (!USE_BYTEWISE(len)) { + /* + * The fast code reads the source one word at a time + * and only performs the bytewise search on word-sized + * segments if they contain the search character, which + * is detected by XOR-ing the word-sized segment with a + * word-sized block of the search character and then + * detecting for the presence of NUL in the result. + */ + asrc = (unsigned long *)src; + mask = ((d << 8) | d); + mask = ((mask << 16) | mask); + + for (i=32; i<8*LONGBYTES; i<<=1) { + mask = ((mask << i) | mask); + } + + while (len >= LONGBYTES) { + if (DETECTCHAR(*asrc, mask)) + break; + len -= LONGBYTES; + asrc++; + } + /* + * If there are fewer than LONGBYTES characters left, + * we decay to the bytewise loop. + */ + src = (int *)asrc; + } + #endif /* !PREFER_SIZE_OVER_SPEED */ + + while (len--) { + if (*src == d) + return (void *)src; + src++; + } + return NULL; +} + + +/** + * chrswp -- replace (swap) the first occurence of a char byte with another + * @src : memory area to be searched + * @at : char byte to be searched for in 'src' + * @with: char byte which will overwrite the first occurence of 'at' + * @len : maximum length to search + */ +void chrswp(char *src, char at, char with, size_t len) +{ + char *sub; + if ((sub = (char *)memchr(src, at, len)), sub!=NULL && *sub==at) + *sub = with; +} + +char *trimws(char *str) +{ + char *end; + + /* Trim leading space */ + while (isspace(*str)) + str++; + + /* Check for empty string */ + if (*str == '\0') + return str; + + /* Trim trailing space */ + end = str + strlen(str) - 1; + while (end > str && isspace(*end)) + end--; + + /* Write new NUL terminator */ + *(end+1) = '\0'; + + return str; +} + + +/* + * Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LIBC +# define __builtin_expect(expr, val) (expr) +#endif + + +/* Return the first occurrence of NEEDLE in HAYSTACK. */ +void *textutils_memmem(const void *haystack, const void *needle) +{ + const char *begin; + const char *final; + size_t len_haystack; + size_t len_needle; + + len_haystack = strlen((const char*)haystack); + len_needle = strlen((const char*)needle); + + final = (const char *)haystack + (len_haystack - len_needle); + + /* The first occurrence of the empty string is deemed to occur at + the beginning of the string. */ + if (len_needle == 0) + return (void *) haystack; + + /* Sanity check, otherwise the loop might search through the whole + memory. */ + if (__builtin_expect (len_haystack < len_needle, 0)) + return NULL; + + for (begin = (const char *)haystack; begin <= final; ++begin) { + if (begin[0] == ((const char *) needle)[0] + && !memcmp((const void *)&begin[1], (const void *)((const char *)needle+1), len_needle-1)) + return (void *)begin; + } + + return NULL; +} + +char *textutils_strstr(const char *h, const char *n) +{ + const char *begin; + const char *end; + size_t hlen; + size_t nlen; + + hlen = strlen(h); + nlen = strlen(n); + + end = h + (hlen - nlen); + + /* + * The first occurrence of the empty string is deemed to occur at + * the beginning of the string. + */ + if (nlen == 0) + return (char *)h; + + /* + * Sanity check, otherwise the loop might search through the whole + * memory. + */ + if (__builtin_expect (hlen < nlen, 0)) + return NULL; + + for (begin=h; begin<=end; ++begin) { + if (begin[0] == n[0] + && !memcmp((const void *)(begin+1),(const void *)(n+1),nlen-1)) + return (char *)begin; + } + return NULL; +} + + +/** + * sbif + * ```` + * Bifurcate a string at into two substrings if a token is found. + * + * @l : destination of left side of string. + * @r : destination of right side of string. + * @str : original string. + * @tok : token to split at. + * Return: nothing. + */ +size_t sbif(char *l, char *r, const char *str, const char *tok) +{ + const char *cur; + char *cpy; + size_t t_len; + size_t s_len; + + s_len = strlen(str); + t_len = strlen(tok); + + if (t_len == 0) + return -1; + + if (__builtin_expect (s_len < t_len, 0)) + return -1; + + cpy = l; // Start with the left string. + + for (cur=str; *cur!='\0'; cur++) { + if (cur[0]==tok[0] && !memcmp((cur+1), (tok+1), (t_len-1))) { + *cpy = '\0'; // NUL-terminate left string. + cpy = r; // We copy the right side now + cur += t_len; // Move cursor past the token + } + *cpy = *cur; + cpy++; + } + *cpy = '\0'; // NUL-terminate right string. + + + return 1; +} + + + +size_t catenate(char *dest, size_t max, int n, char *strings[]) +{ + size_t len = 0; + int i; + + for (i=0; i<n; i++) { + len += slcat(dest, strings[i], max); + len += slcat(dest, " ", max); + } + return len; +} + + +size_t tonext(char *str, char tok) +{ + char *match; + size_t len; + + len = strlen(str); + + match = (char *)memchr(str, tok, len); + + return len - strlen(match); +} + + + +bool is_ws(char c) { + switch (c) { + case ' ': + case '\n': + case '\t': + case '\f': + case '\r': + return true; + default: + return false; + } +} + + +/** + * tail + * ```` + * Return pointer to the last character of a string (not including newline). + */ +char *tail(char *string) +{ + return string + strlen(string)-1; +} + + +/** + * trimcpy + * ``````` + * Trim leading/trailing whitespace at src and copy result to dst + * + * @dst : destination buffer (must be allocated) + * @src : source buffer + * Return: number of bytes written to dst. + */ +size_t trimcpy(char *dst, const char *src) +{ + const char *end; + + /* Leading */ + while (isspace(*src)) + src++; + + /* All spaces */ + if (*src == 0) + return 0; + + /* Trailing */ + end = src + strlen(src) - 1; + + while (end > src && isspace(*end)) + end--; + end++; + + return slcpy(dst, src, (end-src)+1); // slcpy adds NUL +} + + +int ntok(const char *str, const char *tok) +{ + size_t toklen; + char *sub; + + int count=0; + + toklen = strlen(tok); + + for (sub = (char *)memmem(str, tok); + sub != NULL; + sub = (char *)memmem((sub+toklen), tok)) + { + count++; + } + + return count; +} + + +/** + * strip_comments + * `````````````` + * Replace C-like comments with whitespace space characters. + * + * @str : String to strip comment symbols from. + * Return: Does not return. + * + * NOTE + * Multi-line comments are supported. + */ +void strip_comments(char *str) +{ + static bool in_comment = false; + + for (; *str; str++) { + if (in_comment) { + /* Exiting comment zone */ + if (str[0] == '*' && str[1] == '/') { + in_comment = false; + *str++ = ' '; + } + /* Replace comments with space. */ + if (!isspace(*str)) { + *str = ' '; + } + } else { + /* Entering comment zone */ + if (str[0] == '/' && str[1] == '*') { + in_comment = true; + *str++ = ' '; + *str = ' '; + } + } + } +} + + +/** + * getline + * ``````` + * Get a line of input. + * + * @stringp: Pointer to a string pointer. + * @max : Get no more than n-1 characters. + * @stream : Get the next string from this file stream. + * Return : EOF if end of file, 0 if line is too long, otherwise lookahead. + */ +int getline(char **dest, int n, FILE *stream) +{ + static int lookahead = 0; + char *str; + + str = *dest; + + /* Initialize */ + if (lookahead == 0) + lookahead = getc(stream); + + if (n > 0 && lookahead != EOF) { + while(n--> 0) { + *str = lookahead = getc(stream); + + if (*str == '\n' || *str == EOF) + break; + str++; + } + *str = '\0'; + *dest = str; + } + return (n <= 0) ? 0 : lookahead; +} + + + +/** + * get_expr + * ```````` + * Get a regular expression and associated string from the input stream. + * + * @fd_input: file descriptor of the input stream. + * Return : Pointer to the line containing the regex and string in it. + * + * NOTE + * Discards all blank lines and concatenates all whitespace-separated strings. + */ +char *get_expr(FILE *fd_input) +{ + static int lookahead = 0; + int size; + char *line; + + size = MAXINP; + + /* If the next line starts with %, return the EOF marker. */ + if (lookahead == '%') + return NULL; + + while ((lookahead = getline(&line, size-1, fd_input)) != EOF) { + if (lookahead == 0) + halt(SIGABRT, "Rule too long\n"); + + /* Ignore blank lines. */ + if (!line[0]) + continue; + + size = MAXINP - (line-IBUF); + + /* Ignore whitespace */ + if (!isspace(lookahead)) + break; + + *line++ = '\n'; + } + + return lookahead ? line : NULL; +} + + diff --git a/lib/textutils.h b/lib/textutils.h new file mode 100644 index 0000000..475597f --- /dev/null +++ b/lib/textutils.h @@ -0,0 +1,77 @@ +#ifndef __TEXTUTILS_H +#define __TEXTUTILS_H + +#include <string.h> + +/* Initialization ----------------------------------------------------------- */ +void szero(char *str); + +/* Safe strings ------------------------------------------------------------- */ +char *sdup(const char *str); +char *sldup(const char *str, size_t max); +size_t slcpy(char *dst, const char *src, size_t siz); +size_t slcat(char *dst, const char *src, size_t siz); +size_t sbif(char *l, char *r, const char *str, const char *tok); + +/* String sets -------------------------------------------------------------- */ +size_t catenate(char *dest, size_t max, int n, char *strings[]); +//const char *concat(const char *a, const char *b); +char *match(const char *haystack, const char *needle); +char *field(const char *string, const char *delimiter); +int ntok(const char *str, const char *tok); +void chrswp(char *src, char at, char with, size_t len); + +/* Format print ------------------------------------------------------------- */ +void pumpf(char **strp, const char *fmt, ...); + +/* Whitespace --------------------------------------------------------------- */ +size_t trimcpy(char *dst, const char *src); +char *trimws(char *str); + +void strip_comments(char *str); + +char *tail(char *string); + +char *get_expr(FILE *fd_input); + + +/* Raw memory --------------------------------------------------------------- */ +#define memmem textutils_memmem +#define strstr textutils_strstr +#define memchr textutils_memchr +#define getline textutils_getline + +void *textutils_memmem(const void *haystack, const void *needle); +char *textutils_strstr(const char *haystack, const char *needle); +void *textutils_memchr(const void *src_void, int c, size_t len); +int textutils_getline(char **dest, int n, FILE *stream); + +/* Nice macros -------------------------------------------------------------- */ + +#define STRCMP(a,b) (strcmp((a),(b)) == 0) ? true : false +#define isarg(n, string) (STRCMP(argv[(n)], (string))) +#define ARG(n) (argv[(n)]) + +#define STREMPTY(s) (STRCMP((s),"")) + + +/** + * concat + * `````` + * Return pointer to a static value of 2 concatenated strings. + * @a: first string (head) + * @b: second string (tail) + * Return: pointer to the concateneated string, static. + */ +static inline const char *concat(const char *a, const char *b) +{ + #define BIG 9000 + static char buffer[BIG]; + + slcpy(buffer, a, BIG); + slcat(buffer, b, BIG); + + return buffer; +} + +#endif diff --git a/lib/util.h b/lib/util.h new file mode 100644 index 0000000..1cefe51 --- /dev/null +++ b/lib/util.h @@ -0,0 +1,399 @@ +#ifndef _PUMP_UTIL_H +#define _PUMP_UTIL_H + + +/****************************************************************************** + * THINGS I PILFERED FROM THE LINUX KERNEL + * + * Like a freak show of the unfathomable, equal parts lewd and fascinating... + ******************************************************************************/ +/****************************************************************************** + * The __CHECKER__ family of macros provide reminders and diagnostics to + * check that your code isn't needlessly wasting time or doing something + * silly. + ******************************************************************************/ +#ifdef __CHECKER__ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void*)0) +#define BUILD_BUG_ON(condition) +#define BUILD_BUG() (0) +#else /* __CHECKER__ */ + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments + * to inline functions). So as a fallback we use the optimizer; if it can't + * prove the condition is false, it will cause a link error on the undefined + * "__build_bug_on_failed". This error message can be harder to track down + * though, hence the two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif + + +/** + * BUILD_BUG - break compile if used. + * + * If you have some code that you expect the compiler to eliminate at + * build time, you should use BUILD_BUG to detect if it is + * unexpectedly used. + */ +#define BUILD_BUG() \ + do { \ + extern void __build_bug_failed(void) \ + __linktime_error("BUILD_BUG failed"); \ + __build_bug_failed(); \ + } while (0) + + +/** + * BUILD_BUG_ON_NOT_POWER_OF_2 + * Force a compilation error if a constant expression is not a power of 2 + */ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + + +/** + * BUILD_BUG_ON_ZERO / NULL + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type size_t), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) + + +/** + * BUILD_BUG_ON_INVALID + * Permits the compiler to check the validity of the expression but + * avoids the generation of any code, even if that expression has + * side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + + +#endif /* __CHECKER__ */ + + + +/****************************************************************************** + * TYPE SNOOPING + * Try not to let the compiler get away with doing funny things to the wrong + * types. An art, not a science. + ******************************************************************************/ + +/** + * Make sure a macro argument is a variable: + * declare an enumeration inside a new scope with the same name + * as the variable. + * + * Macros for macros, that's right folks. + */ +#define VARIABLE(v) { enum v { }; } + +#define ASSIGN(variable, value) \ + VARIABLE(variable) \ + { enum { E = value }; } \ + variable = value; + + +/** + * __same_type + * Are two types/vars the same type (ignoring qualifiers)? + */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + + +/** + * __must_be_array + * In C, you can often get away with thinking about arrays and pointers + * as two sides of the same coin, but there IS a distinction, and it + * tends to sow strange buggies if ignored. + * + * In the macro, &a[0] degrades to a pointer, which is a different type + * than an array. Usually the compiler will print a warning and move on, + * but BUILD_BUG_ON_ZERO ensures the whole thing gets called off. + */ +#ifdef __CHECKER__ +#define __must_be_array(arr) 0 +#else +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#endif + + +/** + * ARRAY_SIZE + * Justly famous macro for determining the size of a dynamic array at + * runtime. The version from the Linux kernel manages to improve on a + * classic by stipulating the argument be a true array using the + * __must_be_array macro described above. + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + + +/****************************************************************************** + * OPTIMIZATION + * Macros that take advantage of (usually) compiler-specific features to + * attempt to increase the performance of your code. This is usually + * unsuccessful, but it has its place. + ******************************************************************************/ + +/** + * Branch prediction macros; gcc will emit instructions causing the branch + * prediction to favor the instruction on the "likely" side, re-arranging + * the jumps so that it gets tested first. + * e.g. + * if (unlikely(c < 4)) { + * special code + * } + * There has been some evidence that performance improvements here are + * negligible in all but the most special cases. + */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + + +/****************************************************************************** + * MATHS + * Preprocessor stand-ins for some of the unsexy functions in <math.h>, + * plus safe incrementers/decrementers that amount to bounds-checking + * in most use cases. + ******************************************************************************/ + +/** + * abs() handles unsigned and signed longs, ints, shorts and chars. For all + * input types abs() returns a signed long. + * + * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64() + * for those. + */ +#define myabs(x) ({ \ + long ret; \ + if (sizeof(x) == sizeof(long)) { \ + long __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } else { \ + int __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } \ + ret; \ + }) + +#define myabs64(x) ({ \ + s64 __x = (x); \ + (__x < 0) ? -__x : __x; \ + }) + + +/** + * min()/max()/clamp() macros that also do strict type-checking... + * See the "unnecessary" pointer comparison. + */ +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + +#define min3(x, y, z) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + typeof(z) _min3 = (z); \ + (void) (&_min1 == &_min2); \ + (void) (&_min1 == &_min3); \ + _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ + (_min2 < _min3 ? _min2 : _min3); }) + +#define max3(x, y, z) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + typeof(z) _max3 = (z); \ + (void) (&_max1 == &_max2); \ + (void) (&_max1 == &_max3); \ + _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \ + (_max2 > _max3 ? _max2 : _max3); }) + + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does strict typechecking of min/max to make sure they are of the + * same type as val. See the unnecessary pointer comparisons. + */ +#define clamp(val, min, max) ({ \ + typeof(val) __val = (val); \ + typeof(min) __min = (min); \ + typeof(max) __max = (max); \ + (void) (&__val == &__min); \ + (void) (&__val == &__max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) + +/** + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1: __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1: __max2; }) + + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * 'type' to make all the comparisons. + */ +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) + + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument 'val' is. This is useful when val is an unsigned + * type and min and max are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, min, max) ({ \ + typeof(val) __val = (val); \ + typeof(val) __min = (min); \ + typeof(val) __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) + + +/** + * swap - swap value of @a and @b + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + + +/****************************************************************************** + * END LINUX KERNEL NOVELTIES + ******************************************************************************/ +/****************************************************************************** + * BEGIN MISCELLANEOUS MONSTROSITIES + ******************************************************************************/ + +/** + * VA_NUM_ARGS + * Counts the number of VA_ARGS by means of an intense and heathen magic, + * the particulars of which are not to be uttered here. + */ +#define VA_NUM_ARGS(...) \ + VA_NUM_ARGS_IMPL(__VA_ARGS__, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, \ + 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, \ + 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \ + 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, \ + 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, \ + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, \ + 3, 2, 1) + +#define VA_NUM_ARGS_IMPL( _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, \ + _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \ + _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, \ + _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, \ + _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, \ + _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, \ + _61, _62, _63, N, ...) N + +/** + * dec/inc + * Safe decrement and increment; the value of x is modified + */ +#define dec(x, min) x = ((x) > (min)) ? ((x)-1) : (x) +#define inc(x, max) x = ((x) < (max)) ? ((x)+1) : (x) + +#define CAT(a,b) a ## b +#define STR(a) #a +#define EXP(a) a + + + +void sha256gen(char *hex, void *hash); +void nsleep(long nanoseconds); + + +static inline char *itoa(int i) +{ + static char buffer[4096]; + snprintf(buffer, 4096, "%d", i); + + return buffer; +} + + +#ifdef __MERSENNE__ +#include "mersenne.h" +#endif + + +#endif + @@ -0,0 +1,102 @@ +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "nfa.h" +#include "macro.h" + +struct htab_t *MACROTABLE; /* Symbol table for macro definitions */ + + +/** + * new_macro + * ````````` + * Add a new macro to the table. + * If two macros have the same name, the second one takes precedence. + * A definition takes the form: + * + * name <whitespace> text [<whitespace>] + * + * Whitespace at the end of the line is ignored. + */ +void new_macro(char *def) +{ + static int first = 1; + char *name; // Name part of macro + char *text; // Text part of macro + char *edef; // Pointer to the end of text part + struct macro_t *p; + + if (first) { + first = 0; + MACROTABLE = maketab(31, hash_add, strcmp); + } + + /* Isolate name */ + for (name = def; *def && !isspace(*def); def++) + ; + + if (*def) + *def++ = '\0' ; + + /* + * Isolate the definition text. + * Because you need to discard any trailing whitespace on the + * line, this is a bit tricky. The first while loop skips the + * preceding whitespace. The for loop is looking for end of string. + * If you find a ws character (and the \n at the end of string is ws), + * remember the position as a potential end of string. + */ + while (isspace(*def)) + ++def; + + text = def; + edef = NULL; + + while (*def) { + if (!isspace(*def)) ++def; + else { + for (edef = def++; isspace(*def); ++def) + ; + } + } + + if (edef) + *edef = '\0'; + + /* Add the macro to the symbol table */ + p = (struct macro_t *)newsym(sizeof(struct macro_t)); + slcpy(p->name, name, MAC_NAME_MAX); + slcpy(p->text, text, MAC_TEXT_MAX); + addsym(MACROTABLE, p); +} + + +/* + * Return a pointer to the contents of a macro having the indicated + * name. Abort with a message if no macro exists. The macro name includes + * the brackets. *namep is modified to point past the close brace. + */ +char *get_macro(char **namep) +{ + struct macro_t *mac; + char *p; + + /* Hit a '{', skip it and find '}'. */ + if (!(p = strchr(++(*namep), '}'))) + halt(SIGABRT, "Bad macro."); + else { + *p = '\0'; // Overwrite } + + if (!(mac = (struct macro_t *)findsym(MACROTABLE, *namep))) + halt(SIGABRT, "No macro."); + + *p++ = '}'; // Re-write } + *namep = p; // Update name pointer + + return mac->text; + } + return "ERROR"; // Shouldn't happen. +} + + @@ -0,0 +1,18 @@ +#ifndef _MACRO_H +#define _MACRO_H + +#define MAC_NAME_MAX 34 // Max macro name length +#define MAC_TEXT_MAX 80 // Max macro text length + + +struct macro_t { + char name[MAC_NAME_MAX]; + char text[MAC_TEXT_MAX]; +}; + + +void new_macro(char *def); +char *get_macro(char **namep); + + +#endif @@ -0,0 +1,77 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <stdarg.h> + +#include "lib/error.h" +#include "lib/textutils.h" +#include "scan.h" +#include "nfa.h" +#include "dfa.h" +#include "gen.h" + + +/** + * flex + * ```` + * Lex the input file. + */ +void flex(struct pgen_t *pgen) +{ + struct dfa_table_row *dtrans; + struct accept_t *accept; + int nstates; + + /* Print the input file header */ + scan_head(pgen); + + /* Construct the DFA */ + nstates = dfa(pgen, &dtrans, &accept); + + /* Open the output stream */ + if (!(fopen(pgen->path_out, "w"))) + halt(SIGABRT, "Can't open output file."); + + /* Print the DFA transition table to the output stream. */ + fprintf(pgen->out, + "YYPRIVATE YY_TTYPE %s[ %d ][ %d ] =\n", + DTRAN_NAME, nstates, MAX_CHARS); + + /* Print the DFA array to the output stream. */ + print_array(pgen->out, (int *)dtrans, nstates, MAX_CHARS); + + defnext(pgen->out, DTRAN_NAME); + + /* Print the rest of the driver and everyting after the second %% */ + pdriver(pgen->out, nstates, accept); + + scan_tail(pgen); +} + + + +/** + * This is the actual main function. + */ +int main(int argc, char *argv[]) +{ + struct pgen_t *pgen; + + if (!argv[0]) + halt(SIGABRT, "Nope."); + + pgen = calloc(1, sizeof(struct pgen_t)); + + slcpy(pgen->path_in, argv[0], PATHSIZE); + slcpy(pgen->path_out, argv[1], PATHSIZE); + + if (!(pgen->in = fopen(pgen->path_in, "r"))) + halt(SIGABRT,"Can't open input file %s", pgen->path_in); + + flex(pgen); + fclose(pgen->in); + + return 0; +} + + @@ -0,0 +1,38 @@ +#ifndef _MAIN_H +#define _MAIN_H + +/* + * Name of DFA transition table. Up to 3 characters are appended + * to the end of this name in the row-compressed tables. + */ +#define DTRAN_NAME "Yy_nxt" +#define TEMPLATE "lex.par" // Driver template for the state machine. + +#define PATHSIZE 255 // Maximum pathsize on Unix +#define MAXLINE 2048 // Max rule/line size + +/** + * The parser generator singleton. + * + * It helps to encapsulate this state and avoid global + * variables floating around, getting initalized at all + * hours of the day. + * + * @filename: name of the input file. + * @line : buffer holding the current line of input. + * @cur : pointer for traversing the line. + * @in : input file stream + * @out : output file stream + * + */ +struct pgen_t { + char path_in[PATHSIZE]; + char path_out[PATHSIZE]; + char line[MAXLINE]; + char *cur; + FILE *in; + FILE *out; +}; + + +#endif @@ -22,16 +22,11 @@ char *beg_input; // Beginning of input string enum token cur_tok; // Current token int lexeme; // Value associated with LITERAL +struct nfa_t *STACK[SSIZE]; // Stack used by new() +struct nfa_t **STACKP = &STACK[-1]; // Stack pointer -struct nfa_t *nfa_stack[SSIZE]; // Stack used by new() -struct nfa_t **nfa_stack_ptr = &nfa_stack[-1]; // Stack pointer -struct nfa_t *nfa_base; // Base address of nfa_states array -struct nfa_t *nfa_states; // State machine array -int nfa_nstates=0; // Number of states in array. -int nfa_next; // Index of next element in the array. - /***************************************************************************** * Allocate, create, and destroy NFA blocks in the NFA stack array. @@ -48,27 +43,29 @@ int nfa_next; // Index of next element in the array. * contain the maximum number of states. Subsequent calls simply return * a pointer to a region of this memory block. */ -struct nfa_t *new_nfa(void) +struct nfa_t *new_nfa(struct pgen_t *pgen) { static bool init = false; - struct nfa_t *new; + struct nfa_node *new; - /* Allocate memory for the maximum number of states at once. */ if (!init) { - nfa_states = calloc(NFA_MAX, sizeof(struct nfa_t)); - if (!nfa_states) + /* Allocate memory for the maximum number of states. */ + nfa_state = calloc(NFA_MAX, sizeof(struct nfa_t)); + + if (!nfa_state) halt(SIGABRT, "Could not allocate NFA.\n"); - nfa_stack_ptr = &nfa_state_stack[-1]; + STACKP = &STACK[-1]; init = true; } - if (++nfa_nstates >= NFA_MAX) halt(SIGABRT, "Too long\n"); - /* If the stack is not ok, it's empty */ - new = !STACK_OK() ? &nfa_states[nfa_next++] : POP(); + /* + * If the stack is not ok, it's empty + */ + new = !STACK_OK() ? &nfa_state[nfa_next++] : POP(); new->edge = EPSILON; return new; @@ -84,13 +81,13 @@ struct nfa_t *new_nfa(void) * Actually decrements the number of states and clears the region * of memory in the allocated NFA stack block. */ -void del_nfa(NFA *nfa) +void del_nfa(struct nfa_t *doomed) { --nfa_nstates; - memset(nfa, 0, sizeof(NFA)); - nfa->edge = EMPTY ; - PUSH(nfa); + memset(doomed, 0, sizeof(NFA)); + doomed->edge = EMPTY ; + PUSH(doomed); if (!STACK_OK()) halt(SIGABRT, "Stack overflow!"); @@ -168,10 +165,10 @@ char *save(char *str) * ```````` * The main access routine. Creates an NFA using Thompson's construction. * - * @max_state: the maximum number of states, modified to reflect largest used. + * @max_state : the maximum number of states, modified to reflect largest used. * @start_state: modified to be the start state */ -struct nfa_t *thompson(int *max_state, NFA **start_state) +struct nfa_t *thompson(int *max_state, struct nfa_t **start_state) { CLEAR_STACK(); @@ -185,7 +182,7 @@ struct nfa_t *thompson(int *max_state, NFA **start_state) *start_state = machine(); // Manufacture the NFA *max_state = nfa_next; // Max state # in NFA - return nfa_base; + return nfa_state; } @@ -201,14 +198,15 @@ struct nfa_t *thompson(int *max_state, NFA **start_state) int nfa(FILE *input) { struct nfa_t *sstate; - Nfa = thompson(&nfa_states, &sstate); - return (sstate - nfa_base); + + nfa_state = thompson(&nfa_nstates, &sstate); + return (sstate - nfa_state); } void free_nfa(void) { - free(nfa_base); + free(nfa_state); } @@ -271,7 +269,7 @@ SET *e_closure(SET *input, char **accept, int *anchor) while (INBOUNDS(stack,tos)) { /* 2 */ i = *tos--; - p = &nfa_base[i]; + p = &nfa_state[i]; if (p->accept && (i < accept_num)) { accept_num = i; @@ -325,13 +323,13 @@ SET *move(SET *inp_set, int c) for (i = nfa_states; --i >= 0;) { if (MEMBER(inp_set, i)) { - p = &nfa_base[i]; + p = &nfa_state[i]; if (p->edge==c || (p->edge==CCL && TEST(p->bitset, c))) { if(!outset) outset = newset(); - ADD(outset, p->next - nfa_base); + ADD(outset, p->next - nfa_state); } } } @@ -344,7 +342,7 @@ SET *move(SET *inp_set, int c) #ifdef MAIN -main(int argc, char **argv ) +void main(int argc, char **argv) { struct nfa_t *nfa; struct nfa_t *start_state; @@ -16,6 +16,12 @@ struct nfa_t { }; +struct nfa_t *nfa_base; // Base address of nfa_states array +struct nfa_t *nfa_state; // State machine array +int nfa_nstates; // Number of states in array. +int nfa_next; // Index of next element in the array. + + /* Non-character edge values */ #define EPSILON -1 #define CCL -2 @@ -0,0 +1,139 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdbool.h> + +#include "lib/error.h" +#include "macro.h" +#include "main.h" + +/* + * scan.c + * + * Everything about breaking up the input file and getting it ready + * to be turned into a beautiful butterly. + * + */ + + +/** + * strip_comments + * `````````````` + * Replace C-like comments with whitespace space characters. + * + * @str : String to strip comment symbols from. + * Return: Does not return. + * + * NOTE + * Multi-line comments are supported. + */ +void strip_comments(char *str) +{ + static bool in_comment = false; + + for (; *str; str++) { + if (in_comment) { + /* Exiting comment zone */ + if (str[0] == '*' && str[1] == '/') { + in_comment = false; + *str++ = ' '; + } + /* Replace comments with space. */ + if (!isspace(*str)) { + *str = ' '; + } + } else { + /* Entering comment zone */ + if (str[0] == '/' && str[1] == '*') { + in_comment = true; + *str++ = ' '; + *str = ' '; + } + } + } +} + + +/** + * scan_head + * ````````` + * Process the first third of the file (up to the first %%). + * + * @pgen: The parser generator singleton. + * + * NOTES + * The following rules are followed by scan_head: + * + * 0. Any lines beginning with white space or surrounded by %{ and %} + * are passed directly to the output stream. + * + * 1. All other lines are assumed to be macro definitions. + * + * 2. A %% can not be concealed in a %{ %}. + * + * 3. A %% or %{ and %} must be anchored at the start of a line (^). + * Anywhere else in the input (such as in a printf statement), + * they will be passed to the output correctly. + */ +void scan_head(struct pgen_t *pgen) +{ + /* ignore becomes true inside a %{ ... %} block. */ + bool ignore = false; + + while (fgets(pgen->line, MAXLINE, pgen->in)) { + + if (!ignore) + strip_comments(pgen->line); + + /* Encounter a '%' character. */ + if (pgen->line[0] == '%') { + + if (pgen->line[1] == '%') { + fputs("\n", pgen->out); + break; + } + + if (pgen->line[1] == '{') ignore = 1; + else if (pgen->line[1] == '}') ignore = 0; + else + halt(SIGABRT, "Ignoring illegal %%%c directive\n", pgen->line[1]); + + /* Skipping blank lines or comments. */ + } else if (ignore || isspace(pgen->line[0])) { + + fputs(pgen->line, pgen->out); + + /* Otherwise we've got a macro. */ + } else { + /* + * Replace macro def with a blank line so that the + * line numbers won't get messed up. + */ + new_macro(pgen->line); + fputs("\n", pgen->out); + } + } +} + + +/** + * scan_tail + * ````````` + * Scan and print the final 1/3 of the input file, after the second %%. + * + * Return: nothing. + */ +void scan_tail(struct pgen_t *pgen) +{ + while (fgets(pgen->line, MAXLINE, pgen->in)) + fputs(pgen->line, pgen->out); +} + + + + + + + + @@ -0,0 +1,11 @@ +#ifndef _SCAN_H +#define _SCAN_H + +#include "main.h" + +void strip_comments(char *string); +void scan_head(struct pgen_t *pgen); +void scan_tail(struct pgen_t *pgen); + + +#endif @@ -1,58 +0,0 @@ -#ifndef __TOKEN_H -#define __TOKEN_H - -/**************************************************************************** - * TOKENS - ****************************************************************************/ - -enum token { - EOS = 1, // end of string - ANY, // . - AT_BOL, // ^ - AT_EOL, // $ - CCL_END, // ] - CCL_START, // [ - CLOSE_CURLY, // } - CLOSE_PAREN, // ) - CLOSURE, // * - DASH, // - - END_OF_INPUT, // EOF - L, // literal character - OPEN_CURLY, // { - OPEN_PAREN, // ( - OPTIONAL, // ? - OR, // | - PLUS_CLOSE // + -}; - - -enum token TOKEN_MAP[] = { -// ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N - L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, -// ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] - L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, -// ^^ ^_ SPACE ! " # $ % & ' - L, L, L, L, L, L, AT_EOL, L, L, L, -// ( ) * + , - . - OPEN_PAREN, CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY, -// / 0 1 2 3 4 5 6 7 8 9 : ; < = - L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, -// > ? - L, OPTIONAL, -// @ A B C D E F G H I J K L M N - L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, -// O P Q R S T U V W X Y Z - L, L, L, L, L, L, L, L, L, L, L, L, -// [ \ ] ^ - CCL_START, L, CCL_END, AT_BOL, -// _ ` a b c d e f g h i j k l m - L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, -// n o p q r s t u v w x y z - L, L, L, L, L, L, L, L, L, L, L, L, L, -// { | } DEL - OPEN_CURLY, OR, CLOSE_CURLY, L -}; - - -#endif - |