summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Linehan <patientulysses@gmail.com>2012-07-22 22:40:02 -0400
committerJason Linehan <patientulysses@gmail.com>2012-07-22 22:40:02 -0400
commit9b0c080b4a69530b8b6e8e945c4afbe24cac684f (patch)
treeba40bdd1b424137e91df37755355e2b9e63a3f8a
parentde8cb439d5b75e4923f8da34f2f8a45767ac1038 (diff)
downloadplex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.tar.gz
plex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.tar.bz2
plex-9b0c080b4a69530b8b6e8e945c4afbe24cac684f.zip
More cleaning up.
-rw-r--r--Makefile2
-rw-r--r--dfa.c99
-rw-r--r--dfa.h83
-rw-r--r--gen.c194
-rw-r--r--gen.h12
-rw-r--r--lex.c178
-rw-r--r--lex.h69
-rw-r--r--lib/error.c196
-rw-r--r--lib/error.h31
-rw-r--r--lib/set.c501
-rw-r--r--lib/set.h129
-rw-r--r--lib/textutils.c756
-rw-r--r--lib/textutils.h77
-rw-r--r--lib/util.h399
-rw-r--r--macro.c102
-rw-r--r--macro.h18
-rw-r--r--main.c77
-rw-r--r--main.h38
-rw-r--r--nfa.c58
-rw-r--r--nfa.h6
-rw-r--r--scan.c139
-rw-r--r--scan.h11
-rw-r--r--tok.h58
23 files changed, 2966 insertions, 267 deletions
diff --git a/Makefile b/Makefile
index b85df51..40fd009 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ LDFLAGS=
#
#
-SOURCES=main.c lex.c nfa.c dfa.c gen.c
+SOURCES=main.c scan.c lex.c macro.c nfa.c dfa.c gen.c lib/set.c lib/textutils.c lib/error.c
OBJECTS=$(SOURCES:.c=.o)
EXECUTABLE=plex
diff --git a/dfa.c b/dfa.c
index c4fe116..3fbccf2 100644
--- a/dfa.c
+++ b/dfa.c
@@ -2,28 +2,11 @@
* Make a DFA transition table from an NFA created with
* Thompson's construction.
*/
-
-/*
- * DTAB is the deterministic transition table. It is indexed by state number
- * along the major axis and by input character along the minor axis. Dstates
- * is a list of deterministic states represented as sets of NFA states.
- * NSTATES is the number of valid entries in DTAB.
- */
-struct dfa_state {
- unsigned group:8; // Group id used by minimize()
- unsigned mark:1; // Mark used by make_dtran()
- char *accept; // accept action if accept state
- int anchor // Anchor point if an accept state.
- SET *set; // Set of NFA states represented by this DFA state
-};
-
-
-struct dfa_state *DSTATES; // DFA states table
-struct dfa_state *PREV_STATE; // Most-recently marked DFA state in DTAB
-
-ROW *DTAB; // DFA transition table
-int NSTATES // Number of DFA states
-
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "dfa.h"
+#include "nfa.h"
/**
@@ -37,21 +20,23 @@ int NSTATES // Number of DFA states
* (indexed by state number).
* dfa() discards all the memory used for the initial NFA.
*/
-int dfa(FILE *input, ROW *(dfap[]), ACCEPT *(*acceptp))
+int dfa(struct pgen_t *pgen, struct dfa_table_row **tab, struct accept_t **acc)
{
- ACCEPT *accept_states;
+ struct accept_t *accept_states;
int start
int i;
/* Build the NFA */
- start = nfa(input);
+ start = nfa(pgen);
+
+ DFA = calloc(1, sizeof(struct dfa_t));
- NSTATES = 0;
- DSTATES = calloc(DFA_MAX, sizeof(DFA_STATE));
- DTAB = calloc(DFA_MAX, sizeof(ROW));
- PREV_STATE = DSTATES;
+ DFA->nstates = 0;
+ DFA->state = calloc(DFA_MAX, sizeof(struct dfa_state));
+ DFA->trans = calloc(DFA_MAX, sizeof(struct dfa_table_row));
+ DFA->prev = DFA->state;
- if (!DSTATES || !DTAB)
+ if (!DFA->state || !DFA->trans)
halt(SIGABRT, "No memory for DFA transition matrix!");
/* Convert NFA to DFA */
@@ -61,20 +46,20 @@ int dfa(FILE *input, ROW *(dfap[]), ACCEPT *(*acceptp))
free_nfa();
/* Build the DFA */
- DTAB = realloc(DTAB, NSTATES * sizeof(ROW));
- accept_states = malloc(NSTATES * sizeof(ACCEPT));
+ DFA->trans = realloc(DFA->trans, DFA->nstates * sizeof(struct dfa_table_row));
+ accept_states = malloc(DFA->nstates * sizeof(struct accept_t));
- if (!accept_states || !DTAB)
+ if (!accept_states || !DFA->trans)
halt(SIGABRT, "Out of memory!!");
- for (i=NSTATES; i-->0;) {
- accept_states[i].string = DSTATES[i].accept;
- accept_states[i].anchor = DSTATES[i].anchor;
+ for (i=DFA->nstates; i-->0;) {
+ accept_states[i].string = DFA->state[i].accept;
+ accept_states[i].anchor = DFA->state[i].anchor;
}
- free(DSTATES);
- *dfap = DTAB;
- *acceptp = accept_states;
+ free(DFA->state);
+ *tab = DFA->trans;
+ *acc = accept_states;
return NSTATES;
}
@@ -85,13 +70,13 @@ int add_to_dstates(SET *NFA_set, char *accept_string, int anchor)
{
int nextstate;
- if (NSTATES > (DFA_MAX-1))
+ if (DFA->nstates > (DFA_MAX-1))
halt(SIGABRT, "Too many DFA states\n");
- nextstate = NSTATES++;
- DSTATES[nextstate].set = NFA_set;
- DSTATES[nextstate].accept = accept_string;
- DSTATES[nextstate].anchor = anchor;
+ nextstate = DFA->nstates++;
+ DFA->state[nextstate].set = NFA_set;
+ DFA->state[nextstate].accept = accept_string;
+ DFA->state[nextstate].anchor = anchor;
return nextstate;
}
@@ -106,11 +91,11 @@ int add_to_dstates(SET *NFA_set, char *accept_string, int anchor)
int in_dstates(SET *NFA_set)
{
struct dfa_state *p;
- struct dfa_state *end = &DSTATE[NSTATES];
+ struct dfa_state *end = &DFA->state[DFA->nstates];
- for (p=DSTATES; p<end; ++p) {
+ for (p=DFA->dstate; p<end; ++p) {
if (IS_EQUIVALENT(NFA_set, p->set))
- return(p - DSTATES);
+ return(p - DFA->state);
}
return -1;
@@ -124,13 +109,13 @@ int in_dstates(SET *NFA_set)
* exists, return NULL. Print an asterisk for each state to tell the
* user that the program hasn't died while the table is being constructed.
*/
-DFA_STATE *get_unmarked(void)
+struct dfa_state *get_unmarked(void)
{
- for (; Last_marked<&DSTATES[NSTATES]; ++Last_marked) {
- if (!Last_marked->mark) {
+ for (; DFA->prev < &DFA->state[DFA->nstates]; ++DFA->prev) {
+ if (!DFA->prev->mark) {
putc('*', stderr);
fflush(stderr);
- return Last_marked;
+ return DFA->prev;
}
}
return NULL;
@@ -141,9 +126,9 @@ DFA_STATE *get_unmarked(void)
void free_sets(void)
{
struct dfa_state *p;
- struct dfa_state *end = &DSTATES[NSTATES];
+ struct dfa_state *end = &DFA->state[DFA->nstates];
- for (p=DSTATES; p<end; ++p)
+ for (p=DFA->state; p<end; ++p)
delset(p->set);
}
@@ -170,9 +155,9 @@ void make_dtran(int sstate)
NFA_set = newset() ;
ADD(NFA_set, sstate);
- NSTATES = 1;
- DSTATES[0].set = e_closure(NFA_set,&DSTATES[0].accept,&DSTATES[0].anchor);
- DSTATES[0].mark = 0;
+ DFA->nstates = 1;
+ DFA->state[0].set = e_closure(NFA_set,&DFA->state[0].accept,&DFA->state[0].anchor);
+ DFA->state[0].mark = 0;
/* Make the table */
while (current = get_unmarked()) {
@@ -192,7 +177,7 @@ void make_dtran(int sstate)
else
nextstate = add_to_dstates(NFA_set, isaccept, anchor);
- DTAB[current-DSTATES][c] = nextstate;
+ DFA->trans[current-DFA->state][c] = nextstate;
}
}
/* Terminate string of *'s printed in get_unmarked(); */
diff --git a/dfa.h b/dfa.h
index 5e50d47..1cafa70 100644
--- a/dfa.h
+++ b/dfa.h
@@ -1,36 +1,75 @@
-#ifndef __DFA_H
-#define __DFA_H
+#ifndef _DFA_H
+#define _DFA_H
-/*
- * Representations of Deterministic finite automata.
- */
+#include "main.h"
+
+#define DFA_MAX 254 // Maximum number of DFA states.
+#define MAX_CHARS 128 // Maximum width of DFA transition table.
+#define F -1 // Marks a failure state in the table.
-/* Maximum number of DFA states. There will be problems if this is >= 255. */
-#define DFA_MAX 254
-/*
- * The type of the output DFA transition table (the internal one is an
- * array of int). It is used to figure the various table sizes.
+/**
+ * Contains an accepting string, which is NULL if non-accepting,
+ * and an anchor point, if any.
*/
-typedef unsigned char TTYPE;
+struct accept_t {
+ char *string;
+ int anchor;
+};
-#define F -1 // Marks a failure state in the table.
-#define MAX_CHARS 128 // Maximum width of DFA transition table.
+/**
+ * The DFA transition table is indexed by state number along the major axis
+ * and by input character along the minor axis.
+ *
+ * Each row of the table holds a list of deterministic states represented
+ * as sets of NFA states.
+ *
+ * nstates is the number of valid entries in the table.
+ *
+ * @tab : DFA transition table.
+ * @nstates : Number of DFA state nodes in the state array.
+ */
+struct dfa_table_row {
+ int row[MAX_CHARS];
+ int nstates;
+};
-/*
- * One full row of DTAB, which is itself
- * an array of DFA_MAX ROWS.
+/**
+ * A DFA state comprises the machine state after a given set of transitions.
+ *
+ * @mark: Used by make_dtran
+ * @accept: Action if accepting state.
+ * @anchor: Anchor point if accepting state.
+ * @set : Set of NFA states represented by this DFA state.
*/
-typedef int ROW[MAX_CHARS];
+struct dfa_state {
+ unsigned mark:1;
+ char *accept;
+ int anchor;
+ struct set_t *set;
+};
+
+/**
+ * A deterministic finite automaton.
+ *
+ * @state : Array of DFA state nodes.
+ * @prev_state: Previous state node.
+ */
+struct dfa_t {
+ struct dfa_state *state;
+ struct dfa_state *prev;
+ struct dfa_table_row *trans;
+ int nstates;
+};
+
+struct dfa_state *last_marked;
+struct dfa_t *DFA;
-typedef struct accept_t {
- char *string; // An accepting string. NULL if non-accepting.
- int anchor; // Anchor point, if any. See NFA.h
-} ACCEPT;
+int dfa(struct pgen_t *pgen, struct dfa_table_row **tab, struct accept_t **acc);
-#endif /* __DFA_H */
+#endif
diff --git a/gen.c b/gen.c
index e69de29..45c117b 100644
--- a/gen.c
+++ b/gen.c
@@ -0,0 +1,194 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "textutils.h"
+#include "nfa.h"
+#include "dfa.h"
+#include "main.h"
+
+/*
+ *
+ * PRINTING
+ *
+ */
+
+/**
+ * pheader
+ * ```````
+ * Print out a header comment that describes the uncompressed DFA.
+ *
+ * @fp: output stream
+ * @dtran: DFA transition table
+ * @nrows: Number of states in dtran[]
+ * @accept: set of accept states in dtran[]
+ */
+void pheader(FILE *fp, struct dfa_row **dtran, int nrows, struct accept_t *accept)
+{
+ int last_transition;
+ int chars_printed;
+ char *bin_to_ascii();
+ int i;
+ int j;
+
+ fprintf(fp, "#ifdef __NEVER__\n" );
+ fprintf(fp, "/*---------------------------------------------------\n");
+ fprintf(fp, " * DFA (start state is 0) is:\n *\n" );
+
+ for (i=0; i<nrows; i++) {
+ if (!accept[i].string) {
+ fprintf(fp, " * State %d [nonaccepting]", i);
+ } else {
+ fprintf(fp, " * State %d [accepting, line %d <",
+ i , ((int *)(accept[i].string))[-1]);
+
+ fputstr(accept[i].string, 20, fp);
+ fprintf(fp, ">]");
+
+ if (accept[i].anchor)
+ fprintf(fp, " Anchor: %s%s",
+ accept[i].anchor & START ? "start " : "",
+ accept[i].anchor & END ? "end" : "");
+ }
+
+ last_transition = -1;
+
+ for (j=0; j<MAX_CHARS; j++) {
+ if (dtran[i][j] != F) {
+ if (dtran[i][j] != last_transition) {
+ fprintf(fp, "\n * goto %2d on ", dtran[i][j]);
+ chars_printed = 0;
+ }
+ fprintf(fp, "%s", bin_to_ascii(j,1) );
+
+ if ((chars_printed += strlen(bin_to_ascii(j,1))) > 56) {
+ fprintf(fp, "\n * ");
+ chars_printed = 0;
+ }
+
+ last_transition = dtran[i][j];
+ }
+ }
+ fprintf(fp, "\n");
+ }
+ fprintf(fp," */\n\n");
+ fprintf(fp,"#endif\n");
+}
+
+
+/**
+ * pdriver
+ * ```````
+ * Print the array of accepting states, the driver itself, and the case
+ * statements for the accepting strings.
+ *
+ * @output: Output stream
+ * @nrows: number of states in dtran[]
+ * @accept: set of accepting states in dtran[]
+ **/
+void pdriver(FILE *output, int nrows, ACCEPT *accept)
+{
+ int i;
+
+ fprintf(output,
+ "/*\n"
+ " * The Yyaccept array has two purposes. If Yyaccept[i] is 0,\n"
+ " * then state i is nonaccepting. If it is non-zero, then the\n"
+ " * number determines whether the string is anchored.\n"
+ " *\t 1 = anchored at start of line\n"
+ " *\t 2 = anchored at end of line\n"
+ " *\t 3 = both\n"
+ " *\t 4 = neither\n"
+ " */\n"
+ "YYPRIVATE YY_TYPE Yyaccept[] = \n");
+ fprintf(output, "{\n");
+
+ /* Print the array of accepting states */
+ for (i=0; i<nrows; i++) {
+ if (!accept[i].string)
+ fprintf(output, "\t0 ");
+ else
+ fprintf(output, "\t%-3d", accept[i].anchor ? accept[i].anchor :4);
+
+ fprintf(output, "%c /* State %-3d */\n", i == (nrows -1) ? ' ' : ',' , i);
+ }
+ fprintf(output, "};\n\n");
+
+ /* Print code above case statements */
+ driver_2(output, !NOLINE);
+
+ /* Print case statements */
+ for (i=0; i<nrows; i++) {
+ if (accept[i].string) {
+ fprintf(output, "\t\tcase %d:\t\t\t\t\t/* State %-3d */\n",i,i);
+ if (!NOLINE)
+ fprintf(output, "#line %d \"%s\"\n",
+ *( (int *)(accept[i].string) - 1),
+ Input_file_name);
+
+ fprintf(output, "\t\t %s\n", accept[i].string);
+ fprintf(output, "\t\t break;\n");
+ }
+ }
+ /* Code below cases. */
+ driver_2(output, !NOLINE);
+}
+
+
+
+/**
+ * print_array
+ * ```````````
+ * Print the C source code to initialize the two-dimensional array pointed
+ * to by "array." Prints only the initialization part of the declaration.
+ *
+ * @fp: output stream.
+ * @array: DFA transition table
+ * @nrows: number of rows in array[]
+ * @ncols: number of cols in array[]
+ */
+void print_array(FILE *fp, int *array, int nrows, int ncols)
+{
+ #define NCOLS 10 // Num. columns used to print arrays
+ int col; // Output column.
+ int i;
+
+ fprintf(fp, "{\n");
+
+ for(i=0; i<nrows; i++) {
+ fprintf(fp, "/* %02d */ { ", i);
+
+ for (col=0; col<ncols; col++) {
+ fprintf(fp, "%3d" , *array++);
+ if (col < ncols-1)
+ fprintf(fp, ", ");
+
+ if ((col % NCOLS)==NCOLS-1 && col!=ncols-1)
+ fprintf(fp, "\n ");
+ }
+
+ if (col > NCOLS)
+ fprintf(fp, "\n ");
+
+ fprintf(fp, " }%c\n", i < nrows-1 ? ',' : ' ');
+ }
+ fprintf(fp, "};\n");
+}
+
+
+/**
+ * defnext
+ * ```````
+ * Print the default yy_next(s,c) subroutine for an uncompressed table.
+ *
+ * @fp: output stream
+ * @name: Definition name
+ */
+void defnext(FILE *fp, char *name)
+{
+ fprintf(fp, "/*\n"
+ " * yy_next(state,c) is given the current state and input\n"
+ " * character and evaluates to the next state.\n"
+ " */\n"
+ "#define yy_next(state, c) %s[state][c]\n", name);
+}
+
+
diff --git a/gen.h b/gen.h
index e69de29..e90cb7a 100644
--- a/gen.h
+++ b/gen.h
@@ -0,0 +1,12 @@
+#ifndef _GENERATE_H
+#define _GENERATE_H
+
+#include "main.h"
+#include "dfa.h"
+
+void pheader(FILE *fp, struct dfa_table_row **dtran, int nrows, struct accept_t *accept);
+void pdriver(FILE *out, int nrows, struct accept_t *accept);
+void print_array(FILE *fp, int *array, int nrows, int ncols);
+void defnext(FILE *fp, char *name);
+
+#endif
diff --git a/lex.c b/lex.c
index 1821a85..e5ee7b3 100644
--- a/lex.c
+++ b/lex.c
@@ -1,56 +1,60 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <stdbool.h>
+
+#include "lib/textutils.h"
#include "lib/error.h"
+#include "nfa.h"
+#include "main.h"
/*
* Lexical analyzer.
*/
-enum token current_tok;
-char *input_line;
-char *input_start;
-char *lexeme;
+enum token Token;
+char *Current;
+char *Start;
+char *Lexeme;
-enum token lex_advance(void)
+enum token advance(void)
{
- static bool inquote = false; // When processing a quoted string.
+ static bool in_quote = false; // When processing a quoted string.
bool saw_escape; // Saw a backslash escape
- static char *stack[SSIZE]; // Import stack
- static char **sp = NULL; // Stack pointer
+ static char *stack[32]; // Import stack
+ static char **sp = NULL; // Stack pointer
/* Initialize the stack pointer. */
if (!sp)
sp = stack - 1; // Necessary for a large model.
/* Get the next line */
- if (current_tok == EOS) {
- if (inquote)
+ if (Token == EOS) {
+
+ if (in_quote)
halt(SIGABRT, "No newline.");
- /*
- * Sit in this loop until a non-blank line is read
- * into the "Input" array.
+
+ /*
+ * Loop until a non-blank line is read.
*/
do {
- /* Then at the end of file. */
- if (!(input = (getline)())) {
- current_tok = END_OF_INPUT;
+ if (!(Current = getline(&Current, MAXLINE, pgen->in))) {
+ Token = END_OF_INPUT;
goto exit;
}
- /* Ignore leading whitespace. */
- while (isspace(*input))
- input++;
+ while (isspace(*Current))
+ Current++;
- } while (!*input); /* Ignore blank lines. */
+ } while (!*Current); /* Ignore blank lines. */
- input_start = input; // Remember start of line for errors.
+ Start = Current; // Remember start of line for errors.
}
- while (*input == '\0') {
+ while (*Current == '\0') {
/* Restore previous input source. */
if (INBOUNDS(stack, sp)) {
- input = *sp--;
+ Current = *sp--;
continue;
}
@@ -58,18 +62,18 @@ enum token lex_advance(void)
* No more input sources to restore; you're at the real end
* of the input string.
*/
- current_tok = EOS;
- lexeme = '\0';
+ Token = EOS;
+ Lexeme = '\0';
goto exit;
}
- if (!inquote) {
+ if (!in_quote) {
/* Macro expansion required. */
- while (*input == '{') {
+ while (*Current == '{') {
/* Stack current input string. */
- *++sp = input;
+ *++sp = Current;
/* Use macro body as input string. */
- input = get_macro(sp);
+ Current = get_macro(sp);
if (TOOHIGH(stack,sp))
halt(SIGABRT, "Stack overflow");
@@ -78,40 +82,40 @@ enum token lex_advance(void)
/*
* At either start or end of a quoted string. All characters are
- * treated as literals while inquote is true.
+ * treated as literals while in_quote is true.
*/
- if (*input == '"') {
- inquote = ~inquote;
- if (!*++input) {
- current_tok = EOS;
- lexeme = '\0';
+ if (*Current == '"') {
+ in_quote = ~in_quote;
+ if (!*++Current) {
+ Token = EOS;
+ Lexeme = '\0';
goto exit;
}
}
- saw_escape = (*input == '\\');
+ saw_escape = (*Current == '\\');
- if (!inquote) {
- if (isspace(*input)) {
- current_tok = EOS;
- lexeme = '\0';
+ if (!in_quote) {
+ if (isspace(*Current)) {
+ Token = EOS;
+ Lexeme = '\0';
goto exit;
}
- lexeme = esc(&input);
+ Lexeme = esc(&Current);
} else {
- if (saw_escape && input[1] == '"') {
+ if (saw_escape && Current[1] == '"') {
/* Skip the escaped character. */
- input += 2;
- lexeme = '"';
+ Current += 2;
+ Lexeme = '"';
} else {
- lexeme = *input++;
+ Lexeme = *Current++;
}
}
- current_tok = (inquote || saw_escape) ? L : [lexeme];
+ Token = (in_quote || saw_escape) ? L : TOKEN_MAP[Lexeme];
exit:
- return current_tok;
+ return Token;
}
@@ -143,24 +147,20 @@ enum token lex_advance(void)
* ```````
* Build the NFA
*/
-NFA *machine(void)
+struct nfa_t *machine(void)
{
- NFA *start;
- NFA *p;
-
- ENTER("machine");
+ struct nfa_t *start;
+ struct nfa_t *p;
p = start = new_nfa();
p->next = rule();
- while (!MATCH(END_OF_INPUT)) {
+ while (Token != END_OF_INPUT) {
p->next2 = new_nfa();
p = p->next2;
p->next = rule();
}
- LEAVE("machine");
-
return start;
}
@@ -177,14 +177,12 @@ NFA *machine(void)
* action --> <tabs> <string of characters>
* epsilon
*/
-NFA *rule(void)
+struct nfa_t *rule(void)
{
- NFA *start = NULL;
- NFA *end = NULL;
+ struct nfa_t *start = NULL;
+ struct nfa_t *end = NULL;
int anchor = NONE;
- ENTER("rule");
-
if (MATCH(AT_BOL)) {
start = new_nfa();
start->edge = '\n';
@@ -220,8 +218,6 @@ NFA *rule(void)
end->anchor = anchor;
advance(); // Skip past EOS
- LEAVE("rule");
-
return start;
}
@@ -249,13 +245,11 @@ NFA *rule(void)
* cat_expr
* do the OR
*/
-void expr(NFA **startp, NFA **endp)
+void expr(struct nfa_t **startp, struct nfa_t **endp)
{
- NFA *e2_start = NULL; /* expression to right of | */
- NFA *e2_end = NULL;
- NFA *p;
-
- ENTER("expr");
+ struct nfa_t *e2_start = NULL; /* expression to right of | */
+ struct nfa_t *e2_end = NULL;
+ struct nfa_t *p;
cat_expr(startp, endp);
@@ -273,7 +267,6 @@ void expr(NFA **startp, NFA **endp)
e2_end ->next = p;
*endp = p;
}
- LEAVE("expr");
}
@@ -292,31 +285,27 @@ void expr(NFA **startp, NFA **endp)
* cat_expr' -> | factor cat_expr'
* epsilon
*/
-void cat_expr(NFA **startp, NFA **endp )
+void cat_expr(struct nfa_t **startp, struct nfa_t **endp )
{
- NFA *e2_start;
- NFA *e2_end;
+ struct nfa_t *e2_start;
+ struct nfa_t *e2_end;
- ENTER("cat_expr");
-
- if (first_in_cat(current_tok))
+ if (first_in_cat(Token))
factor(startp, endp);
- while (first_in_cat(current_tok)) {
+ while (first_in_cat(Token)) {
factor(&e2_start, &e2_end);
- memcpy(*endp, e2_start, sizeof(NFA));
+ memcpy(*endp, e2_start, sizeof(struct nfa_t));
discard(e2_start);
*endp = e2_end;
}
-
- LEAVE("cat_expr");
}
-int first_in_cat(TOKEN tok)
+int first_in_cat(enum token tok)
{
switch(tok)
{
@@ -349,12 +338,10 @@ int first_in_cat(TOKEN tok)
/*
* factor --> term* | term+ | term?
*/
-void factor(NFA **startp, NFA **endp)
+void factor(struct nfa_t **startp, struct nfa_t **endp)
{
- NFA *start;
- NFA *end;
-
- ENTER("factor");
+ struct nfa_t *start;
+ struct nfa_t *end;
term(startp, endp);
@@ -376,8 +363,6 @@ void factor(NFA **startp, NFA **endp)
*endp = end;
advance();
}
-
- LEAVE("factor");
}
@@ -389,13 +374,11 @@ void factor(NFA **startp, NFA **endp)
* but not a carriage return (\r). All of these are single nodes in the
* NFA.
*/
-void term(NFA **startp, NFA **endp)
+void term(struct nfa_t **startp, struct nfa_t **endp)
{
- NFA *start;
+ struct nfa_t *start;
int c;
- ENTER("term");
-
if (MATCH(OPEN_PAREN)) {
advance();
expr(startp, endp);
@@ -449,35 +432,32 @@ void term(NFA **startp, NFA **endp)
advance();
}
}
- LEAVE("term");
}
-void dodash(SET *set)
+void dodash(struct set_t *set)
{
register int first;
/* Treat [-...] as a literal dash, but print a warning. */
if (MATCH(DASH)) {
- warning(W_STARTDASH);
- ADD(set, lexeme);
+ ADD(set, Lexeme);
advance();
}
for (; !MATCH(EOS) && !MATCH(CCL_END); advance()) {
if (!MATCH(DASH)) {
- first = lexeme;
- ADD(set, lexeme);
+ first = Lexeme;
+ ADD(set, Lexeme);
/* Looking at a dash */
} else {
advance();
/* Treat [...-] as literal */
if (MATCH(CCL_END)) {
- warning(W_ENDDASH);
ADD(set, '-');
} else {
- for (; first<=lexeme; first++)
+ for (; first<=Lexeme; first++)
ADD(set, first);
}
}
diff --git a/lex.h b/lex.h
index e69de29..977eba7 100644
--- a/lex.h
+++ b/lex.h
@@ -0,0 +1,69 @@
+#ifndef _LEXER_H
+#define _LEXER_H
+
+
+enum token {
+ EOS = 1, // end of string
+ ANY, // .
+ AT_BOL, // ^
+ AT_EOL, // $
+ CCL_END, // ]
+ CCL_START, // [
+ CLOSE_CURLY, // }
+ CLOSE_PAREN, // )
+ CLOSURE, // *
+ DASH, // -
+ END_OF_INPUT, // EOF
+ L, // literal character
+ OPEN_CURLY, // {
+ OPEN_PAREN, // (
+ OPTIONAL, // ?
+ OR, // |
+ PLUS_CLOSE // +
+};
+
+
+
+enum token advance(void);
+struct nfa_t *machine(void);
+struct nfa_t *rule(void);
+
+void expr(struct nfa_t **startp, struct nfa_t **endp);
+void cat_expr(struct nfa_t **startp, struct nfa_t **endp);
+int first_in_cat(enum token tok);
+
+void factor(struct nfa_t **startp, struct nfa_t **endp);
+void term(struct nfa_t **startp, struct nfa_t **endp);
+void dodash(struct set_t *set);
+
+
+
+enum token TOKEN_MAP[] = {
+// ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+// ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^]
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+// ^^ ^_ SPACE ! " # $ % & '
+ L, L, L, L, L, L, AT_EOL, L, L, L,
+// ( ) * + , - .
+ OPEN_PAREN, CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY,
+// / 0 1 2 3 4 5 6 7 8 9 : ; < =
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+// > ?
+ L, OPTIONAL,
+// @ A B C D E F G H I J K L M N
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+// O P Q R S T U V W X Y Z
+ L, L, L, L, L, L, L, L, L, L, L, L,
+// [ \ ] ^
+ CCL_START, L, CCL_END, AT_BOL,
+// _ ` a b c d e f g h i j k l m
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+// n o p q r s t u v w x y z
+ L, L, L, L, L, L, L, L, L, L, L, L, L,
+// { | } DEL
+ OPEN_CURLY, OR, CLOSE_CURLY, L
+};
+
+
+#endif
diff --git a/lib/error.c b/lib/error.c
new file mode 100644
index 0000000..c0ef8fa
--- /dev/null
+++ b/lib/error.c
@@ -0,0 +1,196 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#include "error.h"
+
+#define USE_ERRNO_H
+
+#ifdef USE_ERRNO_H
+char *etag[]={ "", "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO",
+ "ENXIO", "E2BIG", "ENOEXEC", "EBADF", "ECHILD",
+ "EAGAIN", "ENOMEM", "EACCES", "EFAULT", "ENOTBLK",
+ "EBUSY", "EEXIST", "EXDEV", "ENODEV", "ENOTDIR",
+ "EISDIR", "EINVAL", "ENFILE", "EMFILE", "ENOTTY",
+ "ETXTBSY", "EFBIG", "ENOSPC", "ESPIPE", "EROFS",
+ "EMLINK", "EPIPE", "EDOM", "ERANGE" };
+
+char *emsg[]={
+ "",
+ "Operation not permitted",
+ "No such file or directory",
+ "No such process",
+ "Interrupted system call",
+ "I/O error",
+ "No such device or address",
+ "Argument list too long",
+ "Exec format error",
+ "Bad file number",
+ "No child processes",
+ "Try again",
+ "Out of memory",
+ "Permission denied",
+ "Bad address",
+ "Block device required",
+ "Device or resource busy",
+ "File exists",
+ "Cross-device link",
+ "No such device",
+ "Not a directory",
+ "Is a directory",
+ "Invalid argument",
+ "File table overflow",
+ "Too many open files",
+ "Not a typewriter",
+ "Text file busy",
+ "File too large",
+ "No space left on device",
+ "Illegal seek",
+ "Read-only file system",
+ "Too many links"
+ "Broken pipe",
+ "Math argument out of domain of func",
+ "Math result not representable"
+};
+#endif /* USE_ERRNO_H */
+
+
+/**
+ * abort_report
+ * ````````````
+ * Print a formatted report to stderr and exit.
+ *
+ * @fmt: a printf-style format string
+ * @...: the variable argument list to the format string
+ */
+void abort_report(const char *fmt, ...)
+{
+ char buf[1000];
+ va_list args;
+
+ /* Write formatted output to stream */
+ va_start(args, fmt);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+
+ #ifdef USE_ERRNO_H
+ if (errno)
+ fprintf(stderr, "%s (%d): %s\n", etag[errno],errno,emsg[errno]);
+ #endif
+
+ fprintf(stderr, "The handler reported: \"%s\"\n", buf);
+
+ exit(1);
+}
+
+
+/**
+ * raise_report
+ * ````````````
+ * Print a formatted report to stderr and raise a signal.
+ *
+ * @signo: POSIX signal number to raise.
+ * @fmt : printf-style format string.
+ * @... : the variable argument list to the format string.
+ */
+void raise_report(sig_t signo, const char *fmt, ...)
+{
+ char buf[1000];
+ va_list args;
+
+ /* Write formatted output to stream */
+ va_start(args, fmt);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+
+ #ifdef USE_ERRNO_H
+ if (errno)
+ fprintf(stderr, "%s (%d): %s\n", etag[errno],errno,emsg[errno]);
+ #endif
+
+ fprintf(stderr, "The handler reported: \"%s\"\n", buf);
+
+ raise(signo);
+}
+
+
+
+/******************************************************************************
+ * SIGNAL HANDLING
+ *
+ * Overview
+ * --------
+ * Signals are usually Bad News that a process receives from the kernel.
+ *
+ *
+ * Signal Default action Description
+ * --------------------------------------------------------------------------
+ * SIGABRT A Process abort signal.
+ * SIGALRM T Alarm clock.
+ * SIGBUS A Access to an undefined memory portion.
+ * SIGCHLD I Child process terminated/stopped/continued.
+ * SIGCONT C Continue executing, if stopped.
+ * SIGFPE A Erroneous arithmetic operation.
+ * SIGHUP T Terminal hangup.
+ * SIGILL A Illegal instruction.
+ * SIGINT T Terminal interrupt.
+ * SIGKILL T Kill (cannot be caught or ignored).
+ * SIGPIPE T Write on a pipe with no one to read it.
+ * SIGQUIT A Terminal quit signal.
+ * SIGSEGV A Invalid memory reference.
+ * SIGSTOP S Stop executing (cannot be caught or ignored).
+ * SIGTERM T Termination signal.
+ * SIGTSTP S Terminal stop signal.
+ * SIGTTIN S Background process attempting read.
+ * SIGTTOU S Background process attempting write.
+ * SIGUSR1 T User-defined signal 1.
+ * SIGUSR2 T User-defined signal 2.
+ * SIGPOLL T Pollable event.
+ * SIGPROF T Profiling timer expired.
+ * SIGSYS A Bad system call.
+ * SIGTRAP A Trace/breakpoint trap.
+ * SIGURG I High bandwidth data availible at a socket.
+ * SIGVTALRM T Virtual timer expired.
+ * SIGXCPU A CPU time limit exceeded.
+ * SIGXFSZ A File size limit exceeded.
+ * --------------------------------------------------------------------------
+ *
+ *
+ * signal.h defines the sigaction() function:
+ *
+ * int sigaction(int sig, const struct sigaction *restrict act,
+ * struct sigaction *restrict oact);
+ *
+ * where 'act' specifies the implementation-defined signal handling, and
+ * 'oact' refers to the location at which the default signal handling
+ * configuration will be stored. These are of type struct sigaction, which
+ * is also defined in signal.h. See man(3) signal.h
+ *
+ ******************************************************************************/
+
+
+/**
+ * sigreg -- register a function to handle standard signals
+ */
+void sigreg(sig_handler_t handler)
+{
+ /* You say stop */
+ signal(SIGINT, handler);
+ signal(SIGABRT, handler);
+ signal(SIGINT, handler);
+ signal(SIGTERM, handler);
+ signal(SIGQUIT, handler);
+ signal(SIGSTOP, handler);
+
+ /* I say go */
+ signal(SIGPIPE, handler);
+ signal(SIGSEGV, handler);
+
+ /* You say goodbye */
+ signal(SIGUSR1, handler);
+ signal(SIGUSR2, handler);
+}
+
+
diff --git a/lib/error.h b/lib/error.h
new file mode 100644
index 0000000..571646b
--- /dev/null
+++ b/lib/error.h
@@ -0,0 +1,31 @@
+#ifndef _ERROR_H
+#define _ERROR_H
+#include "util.h"
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+
+/*
+ * Exit the program and print a diagnostic message
+ */
+#define bye(...) \
+ (VA_NUM_ARGS(__VA_ARGS__) == 1) ? abort_report(__VA_ARGS__, "") \
+ : abort_report(__VA_ARGS__) \
+
+/*
+ * Raise a signal and print an error.
+ */
+#define halt(sig, ...) \
+ (VA_NUM_ARGS(__VA_ARGS__) == 1) ? raise_report(sig, __VA_ARGS__, "") \
+ : raise_report(sig, __VA_ARGS__) \
+
+void abort_report(const char *fmt, ...);
+void raise_report(int signo, const char *fmt, ...);
+
+
+typedef void (*sig_handler_t)(int signo);
+void sigreg(sig_handler_t handler);
+
+
+#endif
+
diff --git a/lib/set.c b/lib/set.c
new file mode 100644
index 0000000..fd0c129
--- /dev/null
+++ b/lib/set.c
@@ -0,0 +1,501 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <signal.h>
+#include <string.h>
+#include "error.h"
+
+#include "set.h"
+
+/*
+ * new_set
+ * ```````
+ * Create a new SET.
+ *
+ * Returns: Pointer to an allocated default set.
+ */
+struct set_t *new_set(void)
+{
+ struct set_t *new;
+
+ if (!(new = calloc(1, sizeof(struct set_t)))) {
+ halt(SIGABRT, "No memory to create SET.\n")
+ return NULL; // "Shouldn't happen."
+ }
+
+ new->map = new->defmap;
+ new->nwords = _DEFWORDS;
+ new->nbits = _DEFBITS;
+
+ return new;
+}
+
+/**
+ * del_set
+ * ```````
+ * Destroy a set created with new_set().
+ *
+ * @set : pointer to a SET.
+ * Returns: Nothing.
+ */
+void del_set(struct set_t *set)
+{
+ if (set->map != set->defmap)
+ free(set->map);
+
+ free(set);
+}
+
+
+/**
+ * dup_set
+ * ```````
+ * Create a new set that has the same members as the input set.
+ *
+ * @set : Input set (to be duplicated).
+ * Returns: Pointer to a new set with members identical to @set.
+ */
+struct set_t *dup_set(struct set_t *set)
+{
+ struct set_t *new;
+
+ if (!(new = calloc(1, sizeof(struct set_t)))) {
+ halt(SIGABRT, "No memory to create SET.\n");
+ return NULL; // "Shouldn't happen."
+ }
+
+ new->compl = set->compl;
+ new->nwords = set->nwords;
+ new->nbits = set->nbits;
+
+ if (set->map == set->defmap) {
+ new->map = new->defmap;
+ memcpy(new->defmap, set->defmap, _DEFWORDS * sizeof(_SETTYPE));
+ } else {
+ new->map = (_SETTYPE *)malloc(set->nwords * sizeof(_SETTYPE));
+ if (!new->map)
+ halt(SIGABRT, "No memory to duplicate SET.\n");
+
+ memcpy(new->map, set->map, set->nwords * sizeof(_SETTYPE));
+ }
+
+ return new;
+}
+
+
+/**
+ * _add_set
+ * ````````
+ * Called by the ADD() macro when the set isn't big enough.
+ *
+ * @set: Pointer to a set.
+ * @bit: Bit to be added to set.
+ */
+int _add_set(struct set_t *set, int bit)
+{
+ grow_set(_ROUND(bit), set);
+ return _GBIT(set, bit, |=);
+}
+
+
+/**
+ * grow_set
+ * ````````
+ * @set : Pointer to set to be enlarged.
+ * @need: Number of words needed (target).
+ *
+ * NOTE
+ * This routine calls malloc() and is rather slow. Its use should be
+ * limited, and avoided if possible.
+ */
+void grow_set(struct set_t *set, int need)
+{
+ _SETTYPE *new;
+
+ if (!set || need <= set->nwords)
+ return;
+
+ if (!(new = (_SETTYPE *) malloc(need * sizeof(_SETTYPE))))
+ halt(SIGABRT, "No memory to expand SET.\n");
+
+ memcpy(new, set->map, set->nwords * sizeof(_SETTYPE));
+ memcpy(new + set->nwords, 0, (need - set->nwords) * sizeof(_SETTYPE));
+
+ if (set->map != set->defmap)
+ free(set->map);
+
+ set->map = new;
+ set->nwords = (unsigned char)need;
+ set->nbits = need * _BITS_IN_WORD;
+}
+
+
+/**
+ * num_set
+ * ```````
+ * Get the number of elements (non-zero bits) in a set. NULL sets are empty.
+ *
+ * @set : Pointer to the set.
+ * Return: number of set bits in @set.
+ */
+int num_set(struct set_t *set)
+{
+ /*
+ * Neat macro that will expand to a lookup table indexed by a
+ * number in the range 0-255, with tab[i] == the number of set
+ * bits in i.
+ *
+ * Hallvard Furuseth suggested this approach on Sean Anderson's
+ * Bit Twiddling Hacks website on July 14, 2009.
+ */
+ static const unsigned char nbits[256] =
+ {
+ #define B2(n) n, n+1, n+1, n+2
+ #define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
+ #define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
+ B6(0), B6(1), B6(1), B6(2)
+ };
+
+ unsigned int count = 0;
+ unsigned char *p;
+ int i;
+
+ if (!set)
+ return 0;
+
+ p = (unsigned char *)set->map;
+
+ for (i=_BYTES_IN_ARRAY(set->nwords); --i >= 0;)
+ count += nbits[*p++];
+
+ return count;
+}
+
+
+/**
+ * _set_test
+ * `````````
+ * Compares two sets. Returns as follows:
+ *
+ * _SET_EQUIV Sets are equivalent.
+ * _SET_INTER Sets intersect but aren't equivalent.
+ * _SET_DISJ Sets are disjoint.
+ *
+ * NOTE
+ * The smaller set is made larger if the two sets are different sizes.
+ */
+int _set_test(struct set_t *set1, struct set_t *set2)
+{
+ _SETTYPE *p1;
+ _SETTYPE *p2;
+ int rval;
+ int i;
+
+ rval = _SET_EQUIV;
+ i = max(set1->nwords, set2->nwords);
+
+ grow_set(i, set1);
+ grow_set(i, set2);
+
+ p1 = set1->map;
+ p2 = set2->map;
+
+ for (; --i >= 0; p1++, p2++) {
+ if (*p1 != *p2)
+ return *p1 - *p2;
+ }
+
+ /*
+ * You get here if the sets are not equivalent.
+ * If the sets intersect, you can return immediately,
+ * but have to keep going in the case of disjoint
+ * sets because they might actually intersect
+ * at some yet-unseen byte.
+ */
+ if ((j = set1->nwords - i) > 0) // set1 is larger
+ while (--j >= 0) {
+ if (*p1++)
+ return 1;
+ }
+ } else if ((j = set2->nwords - i) > 0) // set2 is larger
+ while (--j >= 0) {
+ if (*p2++)
+ return -1;
+ }
+ }
+ return 0; // they are equivalent.
+}
+
+
+/**
+ * set_cmp
+ * ```````
+ * Yet another comparison function. Works like strcmp().
+ *
+ * @set1 : Pointer to a set to be compared.
+ * @set2 : Pointer to another set.
+ * Returns: 0 if set1==set2, < 0 if set1<set2, > 0 if set1>set2.
+ */
+void set_cmp(struct set_t *set1, struct set_t *set2)
+{
+ _SETTYPE *p1;
+ _SETTYPE *p2;
+ int j;
+ int i;
+
+ i = j = min(set1->nwords, set2->nwords);
+
+ for (p1 = set1->map, p2 = set2->map; --j >= 0; p1++, p2++) {
+ if (*p1 != *p2)
+ return *p1 - *p2;
+ }
+
+ /*
+ * You get here only if all words in both sets are the same.
+ * Check the tail end of the larger array for all zeroes.
+ */
+ if ((j = set1->nwords - i) > 0) // set1 is larger
+ while (--j >= 0) {
+ if (*p1++)
+ return 1;
+ }
+ } else if ((j = set2->nwords - i) > 0) // set2 is larger
+ while (--j >= 0) {
+ if (*p2++)
+ return -1;
+ }
+ }
+ return 0; // they are equivalent.
+}
+
+
+/**
+ * sethash
+ * ```````
+ * Hash a set by summing together the words in the bitmap.
+ *
+ * @set : Pointer to a set.
+ * Return: hashed value.
+ */
+unsigned sethash(struct set_t *set)
+{
+ _SETTYPE *p;
+ unsigned total;
+ int j;
+
+ total = 0;
+ j = set->nwords;
+ p = set->map;
+
+ while (--j >= 0)
+ total += *p++;
+
+ return total;
+}
+
+/**
+ * is_subset
+ * `````````
+ * Attempt to determine if 'sub' is a subset of 'set'.
+ *
+ * @set : Pointer to a set.
+ * @sub : Pointer to a possible subset of @set.
+ * Return: 1 if @sub is a subset of @set, otherwise 0.
+ *
+ * NOTE
+ * If @sub is larger than @set, the extra bytes must be all zeroes.
+ */
+int is_subset(struct set_t *set, struct set_t *sub)
+{
+ _SETTYPE *subsetp;
+ _SETTYPE *setp;
+ int common; // Number of bytes in potential subset.
+ int tail; // Number of implied 0 bytes in subset.
+
+ if (sub->nwords > set->nwords) {
+ common = set->nwords;
+ tail = sub->nwords - common;
+ } else {
+ common = sub->nwords;
+ tail = 0;
+ }
+
+ subsetp = sub->map;
+ setp = set->map;
+
+ for (; --common >= 0; subsetp++, setp++) {
+ if ((*subsetp & *setp) != *subsetp)
+ return 0;
+ }
+
+ while (--tail >= 0) {
+ if (*subsetp++)
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/**
+ * _set_op
+ * ```````
+ * Performs binary operations depending on op.
+ *
+ * @op: One of _UNION, _INTERSECT, _DIFFERENCE, or _ASSIGN.
+ * @dest : Destination set.
+ * @src : Source set.
+ * Returns: nothing.
+ */
+void _set_op(int op, struct set_t *dest, struct set_t *src)
+{
+ _SETTYPE *d; // Destination map.
+ _SETTYPE *s; // Source map.
+ unsigned ssize; // Number of words in source set.
+ int tail // Dest is this many words bigger than source.
+
+ ssize = src->nwords;
+
+ /* Make sure destination is big enough. */
+ if ((unsigned)dest->nwords < ssize)
+ grow_set(ssize, dest);
+
+ tail = dest->nwords - ssize;
+ d = dest->map;
+ s = src->map;
+
+ switch (op) {
+ case _UNION:
+ while (--ssize >= 0)
+ *d++ |= *s++;
+ break;
+ case _INTERSECT:
+ while (--ssize >= 0)
+ *d++ &= *s++;
+ while (--tail >= 0)
+ *d++ = 0;
+ break;
+ case _DIFFERENCE:
+ while (--ssize >= 0)
+ *d++ ^= *s++;
+ break;
+ case _ASSIGN:
+ while (--ssize >= 0)
+ *d++ = *s++;
+ while (--tail >= 0)
+ *d++ = 0;
+ break;
+ }
+}
+
+/**
+ * invert_set
+ * ``````````
+ * Physically invert the bits in the set. Compare with COMPLIMENT().
+ *
+ * @set : Pointer to a set.
+ * Return: Nothing.
+ */
+void invert_set(struct set_t *set)
+{
+ _SETTYPE *p;
+ _SETTYPE *end;
+
+ for (p = set->map, end = p + set->nwords; p < end; p++)
+ *p = ~*p;
+}
+
+
+/**
+ * trunc_set
+ * `````````
+ * Clear a set and truncate it to the default size. Compare with CLEAR().
+ *
+ * @set : Pointer to a set.
+ * Return: Nothing.
+ */
+void trunc_set(struct set_t *set)
+{
+ if (set->map != set->defmap) {
+ free(set->map);
+ set->map = set->defmap;
+ }
+ set->nwords = _DEFWORDS;
+ set->nbits = _DEFBITS;
+ memset(set->defmap, 0, sizeof(set->defmap));
+}
+
+
+/**
+ * next_member
+ * ```````````
+ * Access all members of a set sequentially.
+ *
+ * @set : Pointer to a set.
+ * Return: value of each bit.
+ *
+ * USAGE
+ * Like strtok()
+ * set == NULL resets.
+ * set changed from last call resets and returns first element.
+ * otherwise the next element is returned, or else -1 if none.
+ */
+int next_member(struct set_t *set)
+{
+ static struct set_t *oset = 0;
+ static int current = 0;
+ _SETTYPE *map;
+
+ if (!set)
+ return ((int)(oset = NULL));
+
+ if (oset != set) {
+ oset = set;
+ current = 0;
+
+ for (map = set->map; *map == 0 && current < set->nbits; ++map)
+ current += _BITS_IN_WORD;
+ }
+
+ /*
+ * The increment must be put in the test because if TEST()
+ * evaluates true, the increment on the right of the for would
+ * never be executed.
+ */
+ while (current++ < set->nbits) {
+ if (TEST(set, current-1))
+ return (current - 1);
+ }
+ return (-1);
+}
+
+
+/**
+ * print_set
+ * `````````
+ * Print a set in human-readable form.
+ *
+ * @set: pointer to a set
+ */
+void print_set(struct set_t *set)
+{
+ int i;
+
+ if (!set)
+ printf("Null set.\n");
+
+ else {
+ next_member(NULL);
+ while ((i = next_member(set)) >= 0) {
+ did_something++;
+ printf("%d", i);
+ }
+ next_member(NULL);
+
+ if (!did_something)
+ printf("Empty set.\n");
+ }
+}
+
+
diff --git a/lib/set.h b/lib/set.h
new file mode 100644
index 0000000..ab8d982
--- /dev/null
+++ b/lib/set.h
@@ -0,0 +1,129 @@
+#ifndef _SET_H
+#define _SET_H
+
+
+/* One cell in the bitmap */
+unsigned short _SETTYPE;
+
+
+#define _BITS_IN_WORD (16)
+#define _BYTES_IN_ARRAY(x) (x << 1)
+#define _DEFWORDS 8
+#define _DEFBITS (_DEFWORDS * _BITS_IN_WORD)
+
+/*
+ * Evaluates to the array element that holds the bit x.
+ * Performs a simple integer divide by 16, which is
+ * implemented as a right shift of 4.
+ */
+#define _DIV_WSIZE(x) ((unsigned)(x) >> 4)
+
+/*
+ * Evaluates to the position of the bit within the word,
+ * i.e. the offset in bits from the least-significant bit.
+ * Performs a modulus 16 using a bitwise AND.
+ */
+#define _MOD_WSIZE(x) ((x) & 0x0f)
+
+/*
+ * Used to expand the size of the array. An array grows in
+ * _DEFWORDS-sized chunks.
+ *
+ * >>3 is equivalent to integer division by 8.
+ * <<3 is equivalent to integer multiplication by 8.
+ *
+ * Imagine we start with the default array of 8 chunks, and
+ * wish to add the number 200 to the set. The array must be
+ * expanded to do this, and after the expansion, the array
+ * should have 16 elements in it:
+ *
+ * (((_DIV_WSIZE(200) + 8) >> 3) << 3)
+ *
+ * (((((unsigned)(200) >> 4) + 8) >> 3) << 3)
+ * (((12 + 8) >> 3) << 3)
+ * ((20 >> 3) << 3)
+ * (2 << 3)
+ * 16
+ */
+#define _ROUND(bit) (((_DIV_WSIZE(bit) + 8) >> 3) << 3)
+
+
+/*
+ * The defmap array is the default bitmap. Initially, map is
+ * set to point at defmap. When the array grows, however, instead
+ * of calling realloc() to change the size of defmap (and thus _set_),
+ * map is simply pointed at the newly malloc'd array.
+ *
+ * The reason for this is that realloc() will copy the entire memory
+ * array to the newly allocated one, whereas only the map needs to be
+ * copied. You can thus save time at the expense of some memory if you
+ * do it yourself.
+ */
+struct set_t {
+ unsigned char nwords; // Number of words in the map
+ unsigned char compl; // Is a negative true set if true
+ unsigned nbits; // Number of bits in map
+ _SETTYPE *map; // Pointer to the map
+ _SETTYPE defmap[_DEFWORDS]; // The map itself
+};
+
+
+/*
+ * Op arguments passed to _set_op
+ */
+#define _UNION 0 // x is in s1 or s2
+#define _INTERSECT 1 // x is in s1 and s2
+#define _DIFFERENCE 2 // (x in s1) && (x not in s2)
+#define _ASSIGN 4 // s1 = s2
+
+#define UNION(d,s) _set_op(_UNION, d, s)
+#define INTERSECT(d,s) _set_op(_INTERSECT, d, s)
+#define DIFFERENCE(d,s) _set_op(_DIFFERENCE, d, s)
+#define ASSIGN(d,s) _set_op(_ASSIGN, d, s)
+
+#define CLEAR(s) memset((s)->map, 0, (s)->nwords * sizeof(_SETTYPE))
+#define FILL(s) memset((s)->map, ~0, (s)->nwords * sizeof(_SETTYPE))
+#define COMPLIMENT(s) ((s)->compl = ~(s)->compl)
+#define INVERT(s) invert(s)
+
+/* Value returned from _set_test */
+#define _SET_EQUIV 0
+#define _SET_DISJ 1
+#define _SET_INTER 2
+
+#define IS_DISJOINT(s1,s2) (_set_test(s1,s2) == _SET_DISJ)
+#define IS_INTERSECTING(s1,s2) (_set_test(s1,s2) == _SET_INTER)
+#define IS_EQUIVALENT(s1,s2) (set_cmp((a),(b)) == 0)
+#define IS_EMPTY(s) (set_num(s) == 0)
+
+/*
+ * CAVEAT
+ * Heavy duty side effects ahead, be aware.
+ */
+
+#define _GETBIT(s,x,op) (((s)->map)[_DIV_WSIZE(x)] op (1 << _MOD_WSIZE(x)))
+
+#define ADD(s,x) (((x) >= (s)->nbits) ? _add_set(s,x) : _GETBIT(s,x,|=))
+#define REMOVE(s,x) (((x) >= (s)->nbits) ? 0 : _GETBIT(s,x,&=~))
+#define TEST(s,x) ((MEMBER(s,x)) ? !(s)->compl : (s)->compl )
+
+
+struct set_t *new_set(void);
+void del_set(struct set_t *set);
+struct set_t *dup_set(struct set_t *set);
+int _add_set(struct set_t *set, int bit);
+void grow_set(struct set_t *set, int need);
+void _set_op(int op, struct set_t *dest, struct set_t *src);
+int _set_test(struct set_t *set1, struct set_t *set2);
+
+int set_num (struct set_t *set);
+void set_cmp (struct set_t *set1, struct set_t *set2);
+unsigned set_hash (struct set_t *set);
+void set_invert(struct set_t *set);
+void set_trunc (struct set_t *set);
+int set_next (struct set_t *set);
+void set_print (struct set_t *set);
+int is_subset (struct set_t *set, struct set_t *sub);
+
+
+#endif
diff --git a/lib/textutils.c b/lib/textutils.c
new file mode 100644
index 0000000..aa5e795
--- /dev/null
+++ b/lib/textutils.c
@@ -0,0 +1,756 @@
+/*
+ * textutils.c -- byte-oriented character and string routines.
+ *
+ * Copyright (C) 2012 Jason Linehan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <ctype.h>
+#include <stdbool.h>
+
+#include "textutils.h"
+
+
+/**
+ * szero
+ * `````
+ * Given a character buffer, set the contents to '\0'.
+ *
+ * @str : pointer to a byte buffer
+ * Return: nothing.
+ */
+void szero(char *str)
+{
+ memset(str, '\0', strlen(str));
+}
+
+
+/**
+ * sdup
+ * ````
+ * Copy *str to a newly-alloc'd buffer, and return a pointer to it.
+ *
+ * @str : pointer to a '\0'-terminated char string
+ * Return: pointer to a copy of *str, else NULL.
+ */
+char *sdup(const char *str)
+{
+ char *copy;
+ size_t len;
+
+ len = strlen(str) + 1;
+ copy = malloc(len);
+
+ return copy ? memcpy(copy, str, len) : NULL;
+}
+
+
+/**
+ * sldup
+ * `````
+ * Copy *str to a newly-alloc'd buffer of size len, and return a pointer to it.
+ *
+ * @str : pointer to a '\0'-terminated char string
+ * @len : size of buffer (including '\0')
+ * Return: pointer to a copy of *str, else NULL.
+ */
+char *sldup(const char *str, size_t max)
+{
+ char *copy;
+ size_t len;
+ size_t end;
+
+ len = strlen(str) + 1;
+ len = (len > max) ? max : len; // lesser of two weevils
+ end = len - 1;
+
+ if (!(copy = calloc(1, len)))
+ return NULL;
+
+ copy[end] = '\0';
+
+ return memcpy(copy, str, end);
+}
+
+
+/**
+ * slcpy
+ * `````
+ * Writes at most len characters from src to dst.
+ *
+ * @dst : destination buffer
+ * @src : source buffer
+ * @len : length of source buffer
+ * Return: number of bytes written.
+ */
+size_t slcpy(char *dst, const char *src, size_t siz)
+{
+ const char *s;
+ char *d;
+ size_t n;
+
+ d = dst;
+ s = src;
+ n = siz;
+
+ /* Copy as many bytes from src as will fit in dst */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+ /*
+ * Not enough room in dst, add NUL
+ * and traverse the rest of src
+ */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+ return(s - src - 1); /* count does not include NUL */
+}
+
+
+/**
+ * slcat
+ * `````
+ * Concatenates src and dst in dst.
+ *
+ * @dst : destination buffer
+ * @src : source buffer
+ * @siz : size of source buffer
+ * Return: Number of bytes concatenated
+ */
+size_t slcat(char *dst, const char *src, size_t siz)
+{
+ char *d;
+ const char *s;
+ size_t n;
+ size_t dlen;
+
+ d = dst;
+ s = src;
+ n = siz;
+
+ /*
+ * Find the end of dst and adjust bytes
+ * left, but don't go past end
+ */
+ while (n--!=0 && *d!='\0')
+ d++;
+
+ dlen = d - dst;
+ n = siz - dlen;
+
+ if (n == 0)
+ return(dlen + strlen(s));
+
+ while (*s != '\0') {
+ if (n != 1) {
+ *d++ = *s;
+ n--;
+ }
+ s++;
+ }
+ *d = '\0';
+
+ return (dlen + (s - src)); /* count does not include NUL */
+}
+
+
+/**
+ * match
+ * `````
+ * Locate first occurance of string 'needle' in string 'haystack'
+ *
+ * @haystack: the string being searched for a match
+ * @needle : the pattern being matched in 'haystack'
+ * Return : The first occurance of 'needle'
+ */
+char *match(const char *haystack, const char *needle)
+{
+ size_t len_haystack;
+ size_t len_needle;
+
+ if (!needle || !haystack)
+ return NULL;
+
+ len_haystack = strlen(haystack);
+ len_needle = strlen(needle);
+
+ /* Needle can't be larger than haystack */
+ if (len_needle > len_haystack)
+ return NULL;
+
+ return memmem(haystack, needle);
+}
+
+
+/**
+ * field
+ * `````
+ * Return pointer to a delimited substring (not including delimiter)
+ *
+ * @str : the string being matched against
+ * @delim: the delimiter to be searched for
+ * Return: pointer to the start of the substring
+ */
+char *field(const char *string, const char *delimiter)
+{
+ size_t offset;
+ char *frame;
+
+ if (!string || !delimiter)
+ return NULL;
+
+ if (frame = match(string, delimiter), !frame)
+ return NULL;
+
+ offset = strlen(delimiter);
+
+ return &frame[offset];
+}
+
+
+/**
+ * pumpf
+ * `````
+ * Write a formatted character string into an auto-allocated buffer
+ *
+ * @strp : pointer to a character buffer (will be allocated)
+ * @fmt : format string
+ * @... : format string arguments
+ * Return: length of the formatted string at *strp
+ */
+void pumpf(char **strp, const char *fmt, ...)
+{
+ va_list args;
+ size_t len;
+ FILE *stream;
+
+ /* Open a new FILE stream. *strp will be dynamically allocated to
+ * contain characters written to the stream, and len will reflect
+ * these changes. See man(3) open_memstream. */
+ stream = open_memstream(strp, &len);
+
+ if (!stream)
+ /* Unable to open FILE stream */
+ return;
+
+ /* Write formatted output to stream */
+ va_start(args, fmt);
+ vfprintf(stream, fmt, args);
+ va_end(args);
+
+ fflush(stream);
+ fclose(stream);
+}
+
+
+/******************************************************************************/
+#define LONGALIGNED(X) ((long)X & (sizeof(long) - 1))
+#define LONGBYTES (sizeof(long))
+#define USE_BYTEWISE(len) ((len) < LONGBYTES)
+
+/* NUL expands to nonzero if X (long int) contains '\0' */
+#if LONG_MAX == 2147483647L
+#define NUL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
+#elif LONG_MAX == 9223372036854775807L
+#define NUL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
+#else
+#error memchar: long int is neither a 32bit nor a 64bit value
+#endif
+
+#define DETECTCHAR(X,MASK) (NUL(X ^ MASK)) /* nonzero if X contains MASK. */
+
+
+/**
+ * textutils_memchr
+ * ````````````````
+ * Search memory starting at src for character 'c'
+ * If 'c' is found within 'len' characters of 'src', a pointer
+ * to the character is returned. Otherwise, NULL is returned.
+ */
+void *textutils_memchr(const void *src_void, int c, size_t len)
+{
+ const int *src = (const int *)src_void;
+ int d = c;
+
+ #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+ unsigned long *asrc;
+ unsigned long mask;
+ int i;
+
+ while (LONGALIGNED(src)) {
+ if (!len--)
+ return NULL;
+ if (*src == d)
+ return (void *)src;
+ src++;
+ }
+ /*
+ * If we get this far, we know that length is large and src is
+ * word-aligned.
+ */
+ if (!USE_BYTEWISE(len)) {
+ /*
+ * The fast code reads the source one word at a time
+ * and only performs the bytewise search on word-sized
+ * segments if they contain the search character, which
+ * is detected by XOR-ing the word-sized segment with a
+ * word-sized block of the search character and then
+ * detecting for the presence of NUL in the result.
+ */
+ asrc = (unsigned long *)src;
+ mask = ((d << 8) | d);
+ mask = ((mask << 16) | mask);
+
+ for (i=32; i<8*LONGBYTES; i<<=1) {
+ mask = ((mask << i) | mask);
+ }
+
+ while (len >= LONGBYTES) {
+ if (DETECTCHAR(*asrc, mask))
+ break;
+ len -= LONGBYTES;
+ asrc++;
+ }
+ /*
+ * If there are fewer than LONGBYTES characters left,
+ * we decay to the bytewise loop.
+ */
+ src = (int *)asrc;
+ }
+ #endif /* !PREFER_SIZE_OVER_SPEED */
+
+ while (len--) {
+ if (*src == d)
+ return (void *)src;
+ src++;
+ }
+ return NULL;
+}
+
+
+/**
+ * chrswp -- replace (swap) the first occurence of a char byte with another
+ * @src : memory area to be searched
+ * @at : char byte to be searched for in 'src'
+ * @with: char byte which will overwrite the first occurence of 'at'
+ * @len : maximum length to search
+ */
+void chrswp(char *src, char at, char with, size_t len)
+{
+ char *sub;
+ if ((sub = (char *)memchr(src, at, len)), sub!=NULL && *sub==at)
+ *sub = with;
+}
+
+char *trimws(char *str)
+{
+ char *end;
+
+ /* Trim leading space */
+ while (isspace(*str))
+ str++;
+
+ /* Check for empty string */
+ if (*str == '\0')
+ return str;
+
+ /* Trim trailing space */
+ end = str + strlen(str) - 1;
+ while (end > str && isspace(*end))
+ end--;
+
+ /* Write new NUL terminator */
+ *(end+1) = '\0';
+
+ return str;
+}
+
+
+/*
+ * Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LIBC
+# define __builtin_expect(expr, val) (expr)
+#endif
+
+
+/* Return the first occurrence of NEEDLE in HAYSTACK. */
+void *textutils_memmem(const void *haystack, const void *needle)
+{
+ const char *begin;
+ const char *final;
+ size_t len_haystack;
+ size_t len_needle;
+
+ len_haystack = strlen((const char*)haystack);
+ len_needle = strlen((const char*)needle);
+
+ final = (const char *)haystack + (len_haystack - len_needle);
+
+ /* The first occurrence of the empty string is deemed to occur at
+ the beginning of the string. */
+ if (len_needle == 0)
+ return (void *) haystack;
+
+ /* Sanity check, otherwise the loop might search through the whole
+ memory. */
+ if (__builtin_expect (len_haystack < len_needle, 0))
+ return NULL;
+
+ for (begin = (const char *)haystack; begin <= final; ++begin) {
+ if (begin[0] == ((const char *) needle)[0]
+ && !memcmp((const void *)&begin[1], (const void *)((const char *)needle+1), len_needle-1))
+ return (void *)begin;
+ }
+
+ return NULL;
+}
+
+char *textutils_strstr(const char *h, const char *n)
+{
+ const char *begin;
+ const char *end;
+ size_t hlen;
+ size_t nlen;
+
+ hlen = strlen(h);
+ nlen = strlen(n);
+
+ end = h + (hlen - nlen);
+
+ /*
+ * The first occurrence of the empty string is deemed to occur at
+ * the beginning of the string.
+ */
+ if (nlen == 0)
+ return (char *)h;
+
+ /*
+ * Sanity check, otherwise the loop might search through the whole
+ * memory.
+ */
+ if (__builtin_expect (hlen < nlen, 0))
+ return NULL;
+
+ for (begin=h; begin<=end; ++begin) {
+ if (begin[0] == n[0]
+ && !memcmp((const void *)(begin+1),(const void *)(n+1),nlen-1))
+ return (char *)begin;
+ }
+ return NULL;
+}
+
+
+/**
+ * sbif
+ * ````
+ * Bifurcate a string at into two substrings if a token is found.
+ *
+ * @l : destination of left side of string.
+ * @r : destination of right side of string.
+ * @str : original string.
+ * @tok : token to split at.
+ * Return: nothing.
+ */
+size_t sbif(char *l, char *r, const char *str, const char *tok)
+{
+ const char *cur;
+ char *cpy;
+ size_t t_len;
+ size_t s_len;
+
+ s_len = strlen(str);
+ t_len = strlen(tok);
+
+ if (t_len == 0)
+ return -1;
+
+ if (__builtin_expect (s_len < t_len, 0))
+ return -1;
+
+ cpy = l; // Start with the left string.
+
+ for (cur=str; *cur!='\0'; cur++) {
+ if (cur[0]==tok[0] && !memcmp((cur+1), (tok+1), (t_len-1))) {
+ *cpy = '\0'; // NUL-terminate left string.
+ cpy = r; // We copy the right side now
+ cur += t_len; // Move cursor past the token
+ }
+ *cpy = *cur;
+ cpy++;
+ }
+ *cpy = '\0'; // NUL-terminate right string.
+
+
+ return 1;
+}
+
+
+
+size_t catenate(char *dest, size_t max, int n, char *strings[])
+{
+ size_t len = 0;
+ int i;
+
+ for (i=0; i<n; i++) {
+ len += slcat(dest, strings[i], max);
+ len += slcat(dest, " ", max);
+ }
+ return len;
+}
+
+
+size_t tonext(char *str, char tok)
+{
+ char *match;
+ size_t len;
+
+ len = strlen(str);
+
+ match = (char *)memchr(str, tok, len);
+
+ return len - strlen(match);
+}
+
+
+
+bool is_ws(char c) {
+ switch (c) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\f':
+ case '\r':
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/**
+ * tail
+ * ````
+ * Return pointer to the last character of a string (not including newline).
+ */
+char *tail(char *string)
+{
+ return string + strlen(string)-1;
+}
+
+
+/**
+ * trimcpy
+ * ```````
+ * Trim leading/trailing whitespace at src and copy result to dst
+ *
+ * @dst : destination buffer (must be allocated)
+ * @src : source buffer
+ * Return: number of bytes written to dst.
+ */
+size_t trimcpy(char *dst, const char *src)
+{
+ const char *end;
+
+ /* Leading */
+ while (isspace(*src))
+ src++;
+
+ /* All spaces */
+ if (*src == 0)
+ return 0;
+
+ /* Trailing */
+ end = src + strlen(src) - 1;
+
+ while (end > src && isspace(*end))
+ end--;
+ end++;
+
+ return slcpy(dst, src, (end-src)+1); // slcpy adds NUL
+}
+
+
+int ntok(const char *str, const char *tok)
+{
+ size_t toklen;
+ char *sub;
+
+ int count=0;
+
+ toklen = strlen(tok);
+
+ for (sub = (char *)memmem(str, tok);
+ sub != NULL;
+ sub = (char *)memmem((sub+toklen), tok))
+ {
+ count++;
+ }
+
+ return count;
+}
+
+
+/**
+ * strip_comments
+ * ``````````````
+ * Replace C-like comments with whitespace space characters.
+ *
+ * @str : String to strip comment symbols from.
+ * Return: Does not return.
+ *
+ * NOTE
+ * Multi-line comments are supported.
+ */
+void strip_comments(char *str)
+{
+ static bool in_comment = false;
+
+ for (; *str; str++) {
+ if (in_comment) {
+ /* Exiting comment zone */
+ if (str[0] == '*' && str[1] == '/') {
+ in_comment = false;
+ *str++ = ' ';
+ }
+ /* Replace comments with space. */
+ if (!isspace(*str)) {
+ *str = ' ';
+ }
+ } else {
+ /* Entering comment zone */
+ if (str[0] == '/' && str[1] == '*') {
+ in_comment = true;
+ *str++ = ' ';
+ *str = ' ';
+ }
+ }
+ }
+}
+
+
+/**
+ * getline
+ * ```````
+ * Get a line of input.
+ *
+ * @stringp: Pointer to a string pointer.
+ * @max : Get no more than n-1 characters.
+ * @stream : Get the next string from this file stream.
+ * Return : EOF if end of file, 0 if line is too long, otherwise lookahead.
+ */
+int getline(char **dest, int n, FILE *stream)
+{
+ static int lookahead = 0;
+ char *str;
+
+ str = *dest;
+
+ /* Initialize */
+ if (lookahead == 0)
+ lookahead = getc(stream);
+
+ if (n > 0 && lookahead != EOF) {
+ while(n--> 0) {
+ *str = lookahead = getc(stream);
+
+ if (*str == '\n' || *str == EOF)
+ break;
+ str++;
+ }
+ *str = '\0';
+ *dest = str;
+ }
+ return (n <= 0) ? 0 : lookahead;
+}
+
+
+
+/**
+ * get_expr
+ * ````````
+ * Get a regular expression and associated string from the input stream.
+ *
+ * @fd_input: file descriptor of the input stream.
+ * Return : Pointer to the line containing the regex and string in it.
+ *
+ * NOTE
+ * Discards all blank lines and concatenates all whitespace-separated strings.
+ */
+char *get_expr(FILE *fd_input)
+{
+ static int lookahead = 0;
+ int size;
+ char *line;
+
+ size = MAXINP;
+
+ /* If the next line starts with %, return the EOF marker. */
+ if (lookahead == '%')
+ return NULL;
+
+ while ((lookahead = getline(&line, size-1, fd_input)) != EOF) {
+ if (lookahead == 0)
+ halt(SIGABRT, "Rule too long\n");
+
+ /* Ignore blank lines. */
+ if (!line[0])
+ continue;
+
+ size = MAXINP - (line-IBUF);
+
+ /* Ignore whitespace */
+ if (!isspace(lookahead))
+ break;
+
+ *line++ = '\n';
+ }
+
+ return lookahead ? line : NULL;
+}
+
+
diff --git a/lib/textutils.h b/lib/textutils.h
new file mode 100644
index 0000000..475597f
--- /dev/null
+++ b/lib/textutils.h
@@ -0,0 +1,77 @@
+#ifndef __TEXTUTILS_H
+#define __TEXTUTILS_H
+
+#include <string.h>
+
+/* Initialization ----------------------------------------------------------- */
+void szero(char *str);
+
+/* Safe strings ------------------------------------------------------------- */
+char *sdup(const char *str);
+char *sldup(const char *str, size_t max);
+size_t slcpy(char *dst, const char *src, size_t siz);
+size_t slcat(char *dst, const char *src, size_t siz);
+size_t sbif(char *l, char *r, const char *str, const char *tok);
+
+/* String sets -------------------------------------------------------------- */
+size_t catenate(char *dest, size_t max, int n, char *strings[]);
+//const char *concat(const char *a, const char *b);
+char *match(const char *haystack, const char *needle);
+char *field(const char *string, const char *delimiter);
+int ntok(const char *str, const char *tok);
+void chrswp(char *src, char at, char with, size_t len);
+
+/* Format print ------------------------------------------------------------- */
+void pumpf(char **strp, const char *fmt, ...);
+
+/* Whitespace --------------------------------------------------------------- */
+size_t trimcpy(char *dst, const char *src);
+char *trimws(char *str);
+
+void strip_comments(char *str);
+
+char *tail(char *string);
+
+char *get_expr(FILE *fd_input);
+
+
+/* Raw memory --------------------------------------------------------------- */
+#define memmem textutils_memmem
+#define strstr textutils_strstr
+#define memchr textutils_memchr
+#define getline textutils_getline
+
+void *textutils_memmem(const void *haystack, const void *needle);
+char *textutils_strstr(const char *haystack, const char *needle);
+void *textutils_memchr(const void *src_void, int c, size_t len);
+int textutils_getline(char **dest, int n, FILE *stream);
+
+/* Nice macros -------------------------------------------------------------- */
+
+#define STRCMP(a,b) (strcmp((a),(b)) == 0) ? true : false
+#define isarg(n, string) (STRCMP(argv[(n)], (string)))
+#define ARG(n) (argv[(n)])
+
+#define STREMPTY(s) (STRCMP((s),""))
+
+
+/**
+ * concat
+ * ``````
+ * Return pointer to a static value of 2 concatenated strings.
+ * @a: first string (head)
+ * @b: second string (tail)
+ * Return: pointer to the concateneated string, static.
+ */
+static inline const char *concat(const char *a, const char *b)
+{
+ #define BIG 9000
+ static char buffer[BIG];
+
+ slcpy(buffer, a, BIG);
+ slcat(buffer, b, BIG);
+
+ return buffer;
+}
+
+#endif
diff --git a/lib/util.h b/lib/util.h
new file mode 100644
index 0000000..1cefe51
--- /dev/null
+++ b/lib/util.h
@@ -0,0 +1,399 @@
+#ifndef _PUMP_UTIL_H
+#define _PUMP_UTIL_H
+
+
+/******************************************************************************
+ * THINGS I PILFERED FROM THE LINUX KERNEL
+ *
+ * Like a freak show of the unfathomable, equal parts lewd and fascinating...
+ ******************************************************************************/
+/******************************************************************************
+ * The __CHECKER__ family of macros provide reminders and diagnostics to
+ * check that your code isn't needlessly wasting time or doing something
+ * silly.
+ ******************************************************************************/
+#ifdef __CHECKER__
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void*)0)
+#define BUILD_BUG_ON(condition)
+#define BUILD_BUG() (0)
+#else /* __CHECKER__ */
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but
+ * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
+ * to inline functions). So as a fallback we use the optimizer; if it can't
+ * prove the condition is false, it will cause a link error on the undefined
+ * "__build_bug_on_failed". This error message can be harder to track down
+ * though, hence the two different methods.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int __build_bug_on_failed;
+#define BUILD_BUG_ON(condition) \
+ do { \
+ ((void)sizeof(char[1 - 2*!!(condition)])); \
+ if (condition) __build_bug_on_failed = 1; \
+ } while(0)
+#endif
+
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() \
+ do { \
+ extern void __build_bug_failed(void) \
+ __linktime_error("BUILD_BUG failed"); \
+ __build_bug_failed(); \
+ } while (0)
+
+
+/**
+ * BUILD_BUG_ON_NOT_POWER_OF_2
+ * Force a compilation error if a constant expression is not a power of 2
+ */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+
+/**
+ * BUILD_BUG_ON_ZERO / NULL
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+
+
+/**
+ * BUILD_BUG_ON_INVALID
+ * Permits the compiler to check the validity of the expression but
+ * avoids the generation of any code, even if that expression has
+ * side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+
+#endif /* __CHECKER__ */
+
+
+
+/******************************************************************************
+ * TYPE SNOOPING
+ * Try not to let the compiler get away with doing funny things to the wrong
+ * types. An art, not a science.
+ ******************************************************************************/
+
+/**
+ * Make sure a macro argument is a variable:
+ * declare an enumeration inside a new scope with the same name
+ * as the variable.
+ *
+ * Macros for macros, that's right folks.
+ */
+#define VARIABLE(v) { enum v { }; }
+
+#define ASSIGN(variable, value) \
+ VARIABLE(variable) \
+ { enum { E = value }; } \
+ variable = value;
+
+
+/**
+ * __same_type
+ * Are two types/vars the same type (ignoring qualifiers)?
+ */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
+
+/**
+ * __must_be_array
+ * In C, you can often get away with thinking about arrays and pointers
+ * as two sides of the same coin, but there IS a distinction, and it
+ * tends to sow strange buggies if ignored.
+ *
+ * In the macro, &a[0] degrades to a pointer, which is a different type
+ * than an array. Usually the compiler will print a warning and move on,
+ * but BUILD_BUG_ON_ZERO ensures the whole thing gets called off.
+ */
+#ifdef __CHECKER__
+#define __must_be_array(arr) 0
+#else
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
+
+
+/**
+ * ARRAY_SIZE
+ * Justly famous macro for determining the size of a dynamic array at
+ * runtime. The version from the Linux kernel manages to improve on a
+ * classic by stipulating the argument be a true array using the
+ * __must_be_array macro described above.
+ */
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+
+/******************************************************************************
+ * OPTIMIZATION
+ * Macros that take advantage of (usually) compiler-specific features to
+ * attempt to increase the performance of your code. This is usually
+ * unsuccessful, but it has its place.
+ ******************************************************************************/
+
+/**
+ * Branch prediction macros; gcc will emit instructions causing the branch
+ * prediction to favor the instruction on the "likely" side, re-arranging
+ * the jumps so that it gets tested first.
+ * e.g.
+ * if (unlikely(c < 4)) {
+ * special code
+ * }
+ * There has been some evidence that performance improvements here are
+ * negligible in all but the most special cases.
+ */
+#ifdef __GNUC__
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+
+/******************************************************************************
+ * MATHS
+ * Preprocessor stand-ins for some of the unsexy functions in <math.h>,
+ * plus safe incrementers/decrementers that amount to bounds-checking
+ * in most use cases.
+ ******************************************************************************/
+
+/**
+ * abs() handles unsigned and signed longs, ints, shorts and chars. For all
+ * input types abs() returns a signed long.
+ *
+ * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()
+ * for those.
+ */
+#define myabs(x) ({ \
+ long ret; \
+ if (sizeof(x) == sizeof(long)) { \
+ long __x = (x); \
+ ret = (__x < 0) ? -__x : __x; \
+ } else { \
+ int __x = (x); \
+ ret = (__x < 0) ? -__x : __x; \
+ } \
+ ret; \
+ })
+
+#define myabs64(x) ({ \
+ s64 __x = (x); \
+ (__x < 0) ? -__x : __x; \
+ })
+
+
+/**
+ * min()/max()/clamp() macros that also do strict type-checking...
+ * See the "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+
+#define min3(x, y, z) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ typeof(z) _min3 = (z); \
+ (void) (&_min1 == &_min2); \
+ (void) (&_min1 == &_min3); \
+ _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
+ (_min2 < _min3 ? _min2 : _min3); })
+
+#define max3(x, y, z) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ typeof(z) _max3 = (z); \
+ (void) (&_max1 == &_max2); \
+ (void) (&_max1 == &_max3); \
+ _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
+ (_max2 > _max3 ? _max2 : _max3); })
+
+
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+
+/**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does strict typechecking of min/max to make sure they are of the
+ * same type as val. See the unnecessary pointer comparisons.
+ */
+#define clamp(val, min, max) ({ \
+ typeof(val) __val = (val); \
+ typeof(min) __min = (min); \
+ typeof(max) __max = (max); \
+ (void) (&__val == &__min); \
+ (void) (&__val == &__max); \
+ __val = __val < __min ? __min: __val; \
+ __val > __max ? __max: __val; })
+
+/**
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max/clamp at all, of course.
+ */
+#define min_t(type, x, y) ({ \
+ type __min1 = (x); \
+ type __min2 = (y); \
+ __min1 < __min2 ? __min1: __min2; })
+
+#define max_t(type, x, y) ({ \
+ type __max1 = (x); \
+ type __max2 = (y); \
+ __max1 > __max2 ? __max1: __max2; })
+
+
+/**
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, min, max) ({ \
+ type __val = (val); \
+ type __min = (min); \
+ type __max = (max); \
+ __val = __val < __min ? __min: __val; \
+ __val > __max ? __max: __val; })
+
+
+/**
+ * clamp_val - return a value clamped to a given range using val's type
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of whatever
+ * type the input argument 'val' is. This is useful when val is an unsigned
+ * type and min and max are literals that will otherwise be assigned a signed
+ * integer type.
+ */
+#define clamp_val(val, min, max) ({ \
+ typeof(val) __val = (val); \
+ typeof(val) __min = (min); \
+ typeof(val) __max = (max); \
+ __val = __val < __min ? __min: __val; \
+ __val > __max ? __max: __val; })
+
+
+/**
+ * swap - swap value of @a and @b
+ */
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+
+/******************************************************************************
+ * END LINUX KERNEL NOVELTIES
+ ******************************************************************************/
+/******************************************************************************
+ * BEGIN MISCELLANEOUS MONSTROSITIES
+ ******************************************************************************/
+
+/**
+ * VA_NUM_ARGS
+ * Counts the number of VA_ARGS by means of an intense and heathen magic,
+ * the particulars of which are not to be uttered here.
+ */
+#define VA_NUM_ARGS(...) \
+ VA_NUM_ARGS_IMPL(__VA_ARGS__, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, \
+ 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, \
+ 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \
+ 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, \
+ 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, \
+ 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, \
+ 3, 2, 1)
+
+#define VA_NUM_ARGS_IMPL( _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, \
+ _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \
+ _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, \
+ _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, \
+ _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, \
+ _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, \
+ _61, _62, _63, N, ...) N
+
+/**
+ * dec/inc
+ * Safe decrement and increment; the value of x is modified
+ */
+#define dec(x, min) x = ((x) > (min)) ? ((x)-1) : (x)
+#define inc(x, max) x = ((x) < (max)) ? ((x)+1) : (x)
+
+#define CAT(a,b) a ## b
+#define STR(a) #a
+#define EXP(a) a
+
+
+
+void sha256gen(char *hex, void *hash);
+void nsleep(long nanoseconds);
+
+
+static inline char *itoa(int i)
+{
+ static char buffer[4096];
+ snprintf(buffer, 4096, "%d", i);
+
+ return buffer;
+}
+
+
+#ifdef __MERSENNE__
+#include "mersenne.h"
+#endif
+
+
+#endif
+
diff --git a/macro.c b/macro.c
new file mode 100644
index 0000000..2f7eb37
--- /dev/null
+++ b/macro.c
@@ -0,0 +1,102 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "nfa.h"
+#include "macro.h"
+
+struct htab_t *MACROTABLE; /* Symbol table for macro definitions */
+
+
+/**
+ * new_macro
+ * `````````
+ * Add a new macro to the table.
+ * If two macros have the same name, the second one takes precedence.
+ * A definition takes the form:
+ *
+ * name <whitespace> text [<whitespace>]
+ *
+ * Whitespace at the end of the line is ignored.
+ */
+void new_macro(char *def)
+{
+ static int first = 1;
+ char *name; // Name part of macro
+ char *text; // Text part of macro
+ char *edef; // Pointer to the end of text part
+ struct macro_t *p;
+
+ if (first) {
+ first = 0;
+ MACROTABLE = maketab(31, hash_add, strcmp);
+ }
+
+ /* Isolate name */
+ for (name = def; *def && !isspace(*def); def++)
+ ;
+
+ if (*def)
+ *def++ = '\0' ;
+
+ /*
+ * Isolate the definition text.
+ * Because you need to discard any trailing whitespace on the
+ * line, this is a bit tricky. The first while loop skips the
+ * preceding whitespace. The for loop is looking for end of string.
+ * If you find a ws character (and the \n at the end of string is ws),
+ * remember the position as a potential end of string.
+ */
+ while (isspace(*def))
+ ++def;
+
+ text = def;
+ edef = NULL;
+
+ while (*def) {
+ if (!isspace(*def)) ++def;
+ else {
+ for (edef = def++; isspace(*def); ++def)
+ ;
+ }
+ }
+
+ if (edef)
+ *edef = '\0';
+
+ /* Add the macro to the symbol table */
+ p = (struct macro_t *)newsym(sizeof(struct macro_t));
+ slcpy(p->name, name, MAC_NAME_MAX);
+ slcpy(p->text, text, MAC_TEXT_MAX);
+ addsym(MACROTABLE, p);
+}
+
+
+/*
+ * Return a pointer to the contents of a macro having the indicated
+ * name. Abort with a message if no macro exists. The macro name includes
+ * the brackets. *namep is modified to point past the close brace.
+ */
+char *get_macro(char **namep)
+{
+ struct macro_t *mac;
+ char *p;
+
+ /* Hit a '{', skip it and find '}'. */
+ if (!(p = strchr(++(*namep), '}')))
+ halt(SIGABRT, "Bad macro.");
+ else {
+ *p = '\0'; // Overwrite }
+
+ if (!(mac = (struct macro_t *)findsym(MACROTABLE, *namep)))
+ halt(SIGABRT, "No macro.");
+
+ *p++ = '}'; // Re-write }
+ *namep = p; // Update name pointer
+
+ return mac->text;
+ }
+ return "ERROR"; // Shouldn't happen.
+}
+
+
diff --git a/macro.h b/macro.h
new file mode 100644
index 0000000..6529e56
--- /dev/null
+++ b/macro.h
@@ -0,0 +1,18 @@
+#ifndef _MACRO_H
+#define _MACRO_H
+
+#define MAC_NAME_MAX 34 // Max macro name length
+#define MAC_TEXT_MAX 80 // Max macro text length
+
+
+struct macro_t {
+ char name[MAC_NAME_MAX];
+ char text[MAC_TEXT_MAX];
+};
+
+
+void new_macro(char *def);
+char *get_macro(char **namep);
+
+
+#endif
diff --git a/main.c b/main.c
index e69de29..23452a0 100644
--- a/main.c
+++ b/main.c
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+#include "lib/error.h"
+#include "lib/textutils.h"
+#include "scan.h"
+#include "nfa.h"
+#include "dfa.h"
+#include "gen.h"
+
+
+/**
+ * flex
+ * ````
+ * Lex the input file.
+ */
+void flex(struct pgen_t *pgen)
+{
+ struct dfa_table_row *dtrans;
+ struct accept_t *accept;
+ int nstates;
+
+ /* Print the input file header */
+ scan_head(pgen);
+
+ /* Construct the DFA */
+ nstates = dfa(pgen, &dtrans, &accept);
+
+ /* Open the output stream */
+ if (!(fopen(pgen->path_out, "w")))
+ halt(SIGABRT, "Can't open output file.");
+
+ /* Print the DFA transition table to the output stream. */
+ fprintf(pgen->out,
+ "YYPRIVATE YY_TTYPE %s[ %d ][ %d ] =\n",
+ DTRAN_NAME, nstates, MAX_CHARS);
+
+ /* Print the DFA array to the output stream. */
+ print_array(pgen->out, (int *)dtrans, nstates, MAX_CHARS);
+
+ defnext(pgen->out, DTRAN_NAME);
+
+ /* Print the rest of the driver and everyting after the second %% */
+ pdriver(pgen->out, nstates, accept);
+
+ scan_tail(pgen);
+}
+
+
+
+/**
+ * This is the actual main function.
+ */
+int main(int argc, char *argv[])
+{
+ struct pgen_t *pgen;
+
+ if (!argv[0])
+ halt(SIGABRT, "Nope.");
+
+ pgen = calloc(1, sizeof(struct pgen_t));
+
+ slcpy(pgen->path_in, argv[0], PATHSIZE);
+ slcpy(pgen->path_out, argv[1], PATHSIZE);
+
+ if (!(pgen->in = fopen(pgen->path_in, "r")))
+ halt(SIGABRT,"Can't open input file %s", pgen->path_in);
+
+ flex(pgen);
+ fclose(pgen->in);
+
+ return 0;
+}
+
+
diff --git a/main.h b/main.h
new file mode 100644
index 0000000..7a49f4f
--- /dev/null
+++ b/main.h
@@ -0,0 +1,38 @@
+#ifndef _MAIN_H
+#define _MAIN_H
+
+/*
+ * Name of DFA transition table. Up to 3 characters are appended
+ * to the end of this name in the row-compressed tables.
+ */
+#define DTRAN_NAME "Yy_nxt"
+#define TEMPLATE "lex.par" // Driver template for the state machine.
+
+#define PATHSIZE 255 // Maximum pathsize on Unix
+#define MAXLINE 2048 // Max rule/line size
+
+/**
+ * The parser generator singleton.
+ *
+ * It helps to encapsulate this state and avoid global
+ * variables floating around, getting initalized at all
+ * hours of the day.
+ *
+ * @filename: name of the input file.
+ * @line : buffer holding the current line of input.
+ * @cur : pointer for traversing the line.
+ * @in : input file stream
+ * @out : output file stream
+ *
+ */
+struct pgen_t {
+ char path_in[PATHSIZE];
+ char path_out[PATHSIZE];
+ char line[MAXLINE];
+ char *cur;
+ FILE *in;
+ FILE *out;
+};
+
+
+#endif
diff --git a/nfa.c b/nfa.c
index 3f5ac7f..dda7501 100644
--- a/nfa.c
+++ b/nfa.c
@@ -22,16 +22,11 @@ char *beg_input; // Beginning of input string
enum token cur_tok; // Current token
int lexeme; // Value associated with LITERAL
+struct nfa_t *STACK[SSIZE]; // Stack used by new()
+struct nfa_t **STACKP = &STACK[-1]; // Stack pointer
-struct nfa_t *nfa_stack[SSIZE]; // Stack used by new()
-struct nfa_t **nfa_stack_ptr = &nfa_stack[-1]; // Stack pointer
-struct nfa_t *nfa_base; // Base address of nfa_states array
-struct nfa_t *nfa_states; // State machine array
-int nfa_nstates=0; // Number of states in array.
-int nfa_next; // Index of next element in the array.
-
/*****************************************************************************
* Allocate, create, and destroy NFA blocks in the NFA stack array.
@@ -48,27 +43,29 @@ int nfa_next; // Index of next element in the array.
* contain the maximum number of states. Subsequent calls simply return
* a pointer to a region of this memory block.
*/
-struct nfa_t *new_nfa(void)
+struct nfa_t *new_nfa(struct pgen_t *pgen)
{
static bool init = false;
- struct nfa_t *new;
+ struct nfa_node *new;
- /* Allocate memory for the maximum number of states at once. */
if (!init) {
- nfa_states = calloc(NFA_MAX, sizeof(struct nfa_t));
- if (!nfa_states)
+ /* Allocate memory for the maximum number of states. */
+ nfa_state = calloc(NFA_MAX, sizeof(struct nfa_t));
+
+ if (!nfa_state)
halt(SIGABRT, "Could not allocate NFA.\n");
- nfa_stack_ptr = &nfa_state_stack[-1];
+ STACKP = &STACK[-1];
init = true;
}
-
if (++nfa_nstates >= NFA_MAX)
halt(SIGABRT, "Too long\n");
- /* If the stack is not ok, it's empty */
- new = !STACK_OK() ? &nfa_states[nfa_next++] : POP();
+ /*
+ * If the stack is not ok, it's empty
+ */
+ new = !STACK_OK() ? &nfa_state[nfa_next++] : POP();
new->edge = EPSILON;
return new;
@@ -84,13 +81,13 @@ struct nfa_t *new_nfa(void)
* Actually decrements the number of states and clears the region
* of memory in the allocated NFA stack block.
*/
-void del_nfa(NFA *nfa)
+void del_nfa(struct nfa_t *doomed)
{
--nfa_nstates;
- memset(nfa, 0, sizeof(NFA));
- nfa->edge = EMPTY ;
- PUSH(nfa);
+ memset(doomed, 0, sizeof(NFA));
+ doomed->edge = EMPTY ;
+ PUSH(doomed);
if (!STACK_OK())
halt(SIGABRT, "Stack overflow!");
@@ -168,10 +165,10 @@ char *save(char *str)
* ````````
* The main access routine. Creates an NFA using Thompson's construction.
*
- * @max_state: the maximum number of states, modified to reflect largest used.
+ * @max_state : the maximum number of states, modified to reflect largest used.
* @start_state: modified to be the start state
*/
-struct nfa_t *thompson(int *max_state, NFA **start_state)
+struct nfa_t *thompson(int *max_state, struct nfa_t **start_state)
{
CLEAR_STACK();
@@ -185,7 +182,7 @@ struct nfa_t *thompson(int *max_state, NFA **start_state)
*start_state = machine(); // Manufacture the NFA
*max_state = nfa_next; // Max state # in NFA
- return nfa_base;
+ return nfa_state;
}
@@ -201,14 +198,15 @@ struct nfa_t *thompson(int *max_state, NFA **start_state)
int nfa(FILE *input)
{
struct nfa_t *sstate;
- Nfa = thompson(&nfa_states, &sstate);
- return (sstate - nfa_base);
+
+ nfa_state = thompson(&nfa_nstates, &sstate);
+ return (sstate - nfa_state);
}
void free_nfa(void)
{
- free(nfa_base);
+ free(nfa_state);
}
@@ -271,7 +269,7 @@ SET *e_closure(SET *input, char **accept, int *anchor)
while (INBOUNDS(stack,tos)) {
/* 2 */
i = *tos--;
- p = &nfa_base[i];
+ p = &nfa_state[i];
if (p->accept && (i < accept_num)) {
accept_num = i;
@@ -325,13 +323,13 @@ SET *move(SET *inp_set, int c)
for (i = nfa_states; --i >= 0;) {
if (MEMBER(inp_set, i)) {
- p = &nfa_base[i];
+ p = &nfa_state[i];
if (p->edge==c || (p->edge==CCL && TEST(p->bitset, c))) {
if(!outset)
outset = newset();
- ADD(outset, p->next - nfa_base);
+ ADD(outset, p->next - nfa_state);
}
}
}
@@ -344,7 +342,7 @@ SET *move(SET *inp_set, int c)
#ifdef MAIN
-main(int argc, char **argv )
+void main(int argc, char **argv)
{
struct nfa_t *nfa;
struct nfa_t *start_state;
diff --git a/nfa.h b/nfa.h
index 32a269e..7a95225 100644
--- a/nfa.h
+++ b/nfa.h
@@ -16,6 +16,12 @@ struct nfa_t {
};
+struct nfa_t *nfa_base; // Base address of nfa_states array
+struct nfa_t *nfa_state; // State machine array
+int nfa_nstates; // Number of states in array.
+int nfa_next; // Index of next element in the array.
+
+
/* Non-character edge values */
#define EPSILON -1
#define CCL -2
diff --git a/scan.c b/scan.c
new file mode 100644
index 0000000..48fee67
--- /dev/null
+++ b/scan.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdbool.h>
+
+#include "lib/error.h"
+#include "macro.h"
+#include "main.h"
+
+/*
+ * scan.c
+ *
+ * Everything about breaking up the input file and getting it ready
+ * to be turned into a beautiful butterly.
+ *
+ */
+
+
+/**
+ * strip_comments
+ * ``````````````
+ * Replace C-like comments with whitespace space characters.
+ *
+ * @str : String to strip comment symbols from.
+ * Return: Does not return.
+ *
+ * NOTE
+ * Multi-line comments are supported.
+ */
+void strip_comments(char *str)
+{
+ static bool in_comment = false;
+
+ for (; *str; str++) {
+ if (in_comment) {
+ /* Exiting comment zone */
+ if (str[0] == '*' && str[1] == '/') {
+ in_comment = false;
+ *str++ = ' ';
+ }
+ /* Replace comments with space. */
+ if (!isspace(*str)) {
+ *str = ' ';
+ }
+ } else {
+ /* Entering comment zone */
+ if (str[0] == '/' && str[1] == '*') {
+ in_comment = true;
+ *str++ = ' ';
+ *str = ' ';
+ }
+ }
+ }
+}
+
+
+/**
+ * scan_head
+ * `````````
+ * Process the first third of the file (up to the first %%).
+ *
+ * @pgen: The parser generator singleton.
+ *
+ * NOTES
+ * The following rules are followed by scan_head:
+ *
+ * 0. Any lines beginning with white space or surrounded by %{ and %}
+ * are passed directly to the output stream.
+ *
+ * 1. All other lines are assumed to be macro definitions.
+ *
+ * 2. A %% can not be concealed in a %{ %}.
+ *
+ * 3. A %% or %{ and %} must be anchored at the start of a line (^).
+ * Anywhere else in the input (such as in a printf statement),
+ * they will be passed to the output correctly.
+ */
+void scan_head(struct pgen_t *pgen)
+{
+ /* ignore becomes true inside a %{ ... %} block. */
+ bool ignore = false;
+
+ while (fgets(pgen->line, MAXLINE, pgen->in)) {
+
+ if (!ignore)
+ strip_comments(pgen->line);
+
+ /* Encounter a '%' character. */
+ if (pgen->line[0] == '%') {
+
+ if (pgen->line[1] == '%') {
+ fputs("\n", pgen->out);
+ break;
+ }
+
+ if (pgen->line[1] == '{') ignore = 1;
+ else if (pgen->line[1] == '}') ignore = 0;
+ else
+ halt(SIGABRT, "Ignoring illegal %%%c directive\n", pgen->line[1]);
+
+ /* Skipping blank lines or comments. */
+ } else if (ignore || isspace(pgen->line[0])) {
+
+ fputs(pgen->line, pgen->out);
+
+ /* Otherwise we've got a macro. */
+ } else {
+ /*
+ * Replace macro def with a blank line so that the
+ * line numbers won't get messed up.
+ */
+ new_macro(pgen->line);
+ fputs("\n", pgen->out);
+ }
+ }
+}
+
+
+/**
+ * scan_tail
+ * `````````
+ * Scan and print the final 1/3 of the input file, after the second %%.
+ *
+ * Return: nothing.
+ */
+void scan_tail(struct pgen_t *pgen)
+{
+ while (fgets(pgen->line, MAXLINE, pgen->in))
+ fputs(pgen->line, pgen->out);
+}
+
+
+
+
+
+
+
+
diff --git a/scan.h b/scan.h
new file mode 100644
index 0000000..d3aadcf
--- /dev/null
+++ b/scan.h
@@ -0,0 +1,11 @@
+#ifndef _SCAN_H
+#define _SCAN_H
+
+#include "main.h"
+
+void strip_comments(char *string);
+void scan_head(struct pgen_t *pgen);
+void scan_tail(struct pgen_t *pgen);
+
+
+#endif
diff --git a/tok.h b/tok.h
deleted file mode 100644
index 9188fcd..0000000
--- a/tok.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef __TOKEN_H
-#define __TOKEN_H
-
-/****************************************************************************
- * TOKENS
- ****************************************************************************/
-
-enum token {
- EOS = 1, // end of string
- ANY, // .
- AT_BOL, // ^
- AT_EOL, // $
- CCL_END, // ]
- CCL_START, // [
- CLOSE_CURLY, // }
- CLOSE_PAREN, // )
- CLOSURE, // *
- DASH, // -
- END_OF_INPUT, // EOF
- L, // literal character
- OPEN_CURLY, // {
- OPEN_PAREN, // (
- OPTIONAL, // ?
- OR, // |
- PLUS_CLOSE // +
-};
-
-
-enum token TOKEN_MAP[] = {
-// ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-// ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^]
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-// ^^ ^_ SPACE ! " # $ % & '
- L, L, L, L, L, L, AT_EOL, L, L, L,
-// ( ) * + , - .
- OPEN_PAREN, CLOSE_PAREN, CLOSURE, PLUS_CLOSE, L, DASH, ANY,
-// / 0 1 2 3 4 5 6 7 8 9 : ; < =
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-// > ?
- L, OPTIONAL,
-// @ A B C D E F G H I J K L M N
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-// O P Q R S T U V W X Y Z
- L, L, L, L, L, L, L, L, L, L, L, L,
-// [ \ ] ^
- CCL_START, L, CCL_END, AT_BOL,
-// _ ` a b c d e f g h i j k l m
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-// n o p q r s t u v w x y z
- L, L, L, L, L, L, L, L, L, L, L, L, L,
-// { | } DEL
- OPEN_CURLY, OR, CLOSE_CURLY, L
-};
-
-
-#endif
-