summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Linehan <patientulysses@gmail.com>2012-07-23 00:28:27 -0400
committerJason Linehan <patientulysses@gmail.com>2012-07-23 00:28:27 -0400
commitede6635f4f2124e6766c9cb39b8741e5b54be874 (patch)
tree812a0522e383154b642b59475a029cf6c4b27c13
parent9b0c080b4a69530b8b6e8e945c4afbe24cac684f (diff)
downloadplex-ede6635f4f2124e6766c9cb39b8741e5b54be874.tar.gz
plex-ede6635f4f2124e6766c9cb39b8741e5b54be874.tar.bz2
plex-ede6635f4f2124e6766c9cb39b8741e5b54be874.zip
That's enough for one night.
-rw-r--r--lex.c128
-rw-r--r--lex.h10
-rw-r--r--lib/hash.h178
-rw-r--r--lib/hashfunc.h60
-rw-r--r--lib/list.h497
-rw-r--r--lib/set.h2
-rw-r--r--lib/textutils.c139
-rw-r--r--lib/textutils.h7
-rw-r--r--lib/util.h12
-rw-r--r--macro.c8
-rw-r--r--nfa.c63
-rw-r--r--nfa.h20
-rw-r--r--stack.h64
13 files changed, 1072 insertions, 116 deletions
diff --git a/lex.c b/lex.c
index e5ee7b3..ccc178c 100644
--- a/lex.c
+++ b/lex.c
@@ -5,16 +5,51 @@
#include "lib/textutils.h"
#include "lib/error.h"
+#include "lib/set.h"
#include "nfa.h"
#include "main.h"
+#include "lex.h"
-/*
- * Lexical analyzer.
- */
+
+FILE *Input;
enum token Token;
char *Current;
char *Start;
-char *Lexeme;
+int Lexeme;
+
+
+/*
+ * machine (entry)
+ * ```````
+ * Build the NFA state machine.
+ */
+struct nfa_t *machine(FILE *input)
+{
+ struct nfa_t *start;
+ struct nfa_t *p;
+
+ if (input)
+ Input = input;
+ else
+ halt(SIGABRT, "Bad input file.\n");
+
+ p = start = new_nfa();
+ p->next = rule();
+
+ while (Token != END_OF_INPUT) {
+ p->next2 = new_nfa();
+ p = p->next2;
+ p->next = rule();
+ }
+
+ return start;
+}
+
+
+
+/******************************************************************************
+ * Lexical analyzer.
+ ******************************************************************************/
enum token advance(void)
@@ -39,7 +74,7 @@ enum token advance(void)
* Loop until a non-blank line is read.
*/
do {
- if (!(Current = getline(&Current, MAXLINE, pgen->in))) {
+ if (!(Current = getline(&Current, MAXLINE, Input))) {
Token = END_OF_INPUT;
goto exit;
}
@@ -119,9 +154,9 @@ enum token advance(void)
}
-/*
- * The parser
- * ``````````
+/******************************************************************************
+ * Parser
+ * ``````
* The following components implement a recursive descent parser which
* produces a non-finite automaton for a regular expression using
* Thompson's construction.
@@ -132,38 +167,13 @@ enum token advance(void)
*
* The parser descends through
*
- * machine()
* rule()
* expr()
* factor()
* term()
* dodash()
- *
- */
-
-
-/*
- * machine
- * ```````
- * Build the NFA
- */
-struct nfa_t *machine(void)
-{
- struct nfa_t *start;
- struct nfa_t *p;
-
- p = start = new_nfa();
- p->next = rule();
-
- while (Token != END_OF_INPUT) {
- p->next2 = new_nfa();
- p = p->next2;
- p->next = rule();
- }
-
- return start;
-}
-
+ *
+ ******************************************************************************/
/**
* rule
@@ -183,7 +193,7 @@ struct nfa_t *rule(void)
struct nfa_t *end = NULL;
int anchor = NONE;
- if (MATCH(AT_BOL)) {
+ if (Token == AT_BOL) {
start = new_nfa();
start->edge = '\n';
anchor |= START;
@@ -197,12 +207,12 @@ struct nfa_t *rule(void)
* Pattern followed by a carriage-return or linefeed
* (use a character class).
*/
- if (MATCH(AT_EOL)) {
+ if (Token == AT_EOL) {
advance();
end->next = new_nfa();
end->edge = CCL;
- if (!(end->bitset = newset()))
+ if (!(end->bitset = new_set()))
halt(SIGABRT, "Out of memory.");
ADD(end->bitset, '\n');
@@ -211,10 +221,10 @@ struct nfa_t *rule(void)
anchor |= END;
}
- while (isspace(*input))
- input++;
+ while (isspace(*Current))
+ Input++;
- end->accept = save(input);
+ end->accept = save(Current);
end->anchor = anchor;
advance(); // Skip past EOS
@@ -253,7 +263,7 @@ void expr(struct nfa_t **startp, struct nfa_t **endp)
cat_expr(startp, endp);
- while(MATCH(OR)) {
+ while(Token == OR) {
advance();
cat_expr(&e2_start, &e2_end);
@@ -345,18 +355,18 @@ void factor(struct nfa_t **startp, struct nfa_t **endp)
term(startp, endp);
- if (MATCH(CLOSURE) || MATCH(PLUS_CLOSE) || MATCH(OPTIONAL)) {
+ if (Token == CLOSURE || Token == PLUS_CLOSE || Token == OPTIONAL) {
start = new_nfa();
end = new_nfa();
start->next = *startp;
(*endp)->next = end;
// * or ?
- if (MATCH(CLOSURE) || MATCH(OPTIONAL))
+ if (Token == CLOSURE || Token == OPTIONAL)
start->next2 = end;
// * or +
- if (MATCH(CLOSURE) || MATCH(PLUS_CLOSE))
+ if (Token == CLOSURE || Token == PLUS_CLOSE)
(*endp)->next2 = *startp;
*startp = start;
@@ -379,10 +389,10 @@ void term(struct nfa_t **startp, struct nfa_t **endp)
struct nfa_t *start;
int c;
- if (MATCH(OPEN_PAREN)) {
+ if (Token == OPEN_PAREN) {
advance();
expr(startp, endp);
- if (MATCH(CLOSE_PAREN))
+ if (Token == CLOSE_PAREN)
advance();
else
halt(SIGABRT, "Parenthesis are rotten.");
@@ -390,39 +400,33 @@ void term(struct nfa_t **startp, struct nfa_t **endp)
*startp = start = new_nfa();
*endp = start->next = new_nfa();
- if (!(MATCH(ANY) || MATCH(CCL_START))) {
+ if (!(Token == ANY || Token == CCL_START)) {
start->edge = Lexeme;
advance();
} else {
start->edge = CCL;
- if (!(start->bitset = newset()))
+ if (!(start->bitset = new_set()))
halt(SIGABRT, "Out of memory.");
/* dot (.) */
- if (MATCH(ANY)) {
+ if (Token == ANY) {
ADD(start->bitset, '\n');
- if(!Unix)
- ADD(start->bitset, '\r');
-
COMPLEMENT(start->bitset);
} else {
advance();
/* Negative character class */
- if (MATCH(AT_BOL)) {
+ if (Token == AT_BOL) {
advance();
/* Don't include \n in class */
ADD(start->bitset, '\n');
- if (!Unix)
- ADD(start->bitset, '\r');
-
COMPLEMENT(start->bitset);
}
- if (!MATCH(CCL_END)) {
+ if (Token != CCL_END) {
dodash(start->bitset);
} else { // [] or [^]
for (c=0; c<=' '; ++c)
@@ -441,20 +445,20 @@ void dodash(struct set_t *set)
register int first;
/* Treat [-...] as a literal dash, but print a warning. */
- if (MATCH(DASH)) {
+ if (Token == DASH) {
ADD(set, Lexeme);
advance();
}
- for (; !MATCH(EOS) && !MATCH(CCL_END); advance()) {
- if (!MATCH(DASH)) {
+ for (; Token != EOS && Token != CCL_END; advance()) {
+ if (Token != DASH) {
first = Lexeme;
ADD(set, Lexeme);
/* Looking at a dash */
} else {
advance();
/* Treat [...-] as literal */
- if (MATCH(CCL_END)) {
+ if (Token == CCL_END) {
ADD(set, '-');
} else {
for (; first<=Lexeme; first++)
diff --git a/lex.h b/lex.h
index 977eba7..667b54a 100644
--- a/lex.h
+++ b/lex.h
@@ -1,6 +1,8 @@
#ifndef _LEXER_H
#define _LEXER_H
+#include "lib/set.h"
+
enum token {
EOS = 1, // end of string
@@ -24,8 +26,8 @@ enum token {
+struct nfa_t *machine(FILE *input);
enum token advance(void);
-struct nfa_t *machine(void);
struct nfa_t *rule(void);
void expr(struct nfa_t **startp, struct nfa_t **endp);
@@ -38,11 +40,11 @@ void dodash(struct set_t *set);
-enum token TOKEN_MAP[] = {
+static enum token TOKEN_MAP[] = {
// ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
// ^O ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^]
- L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
// ^^ ^_ SPACE ! " # $ % & '
L, L, L, L, L, L, AT_EOL, L, L, L,
// ( ) * + , - .
diff --git a/lib/hash.h b/lib/hash.h
new file mode 100644
index 0000000..9c3273d
--- /dev/null
+++ b/lib/hash.h
@@ -0,0 +1,178 @@
+/*
+ * hash.h -- Hash tables and hashing routines
+ *
+ * Copyright (C) 2012 Jason Linehan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __HASHTABLE_ROUTINES
+#define __HASHTABLE_ROUTINES
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdint.h>
+#include "error.h"
+#include "hashfunc.h"
+
+typedef unsigned (hash_func_t)(const char *string);
+typedef int (cmp_func_t)(const char *a, const char *b);
+
+struct bucket {
+ struct bucket *next;
+ struct bucket **prev;
+};
+
+struct htab_t {
+ size_t size;
+ int num_syms;
+ hash_func_t *hash; // hash func
+ cmp_func_t *cmp; // Comparison func
+ struct bucket *table[1]; // First element of hash table
+};
+
+
+
+/**
+ * new_symbol
+ * ``````````
+ * Allocate space for a new symbol in the hash table.
+ *
+ * @size : Size of the symbol.
+ * Return: Pointer to the allocated area.
+ */
+static inline void *new_symbol(size_t size)
+{
+ struct bucket *sym;
+
+ if (!(sym = calloc(size + sizeof(struct bucket), 1)))
+ halt(SIGABRT, "Can't get memory for hash bucket.\n");
+
+ return (void *)(sym+1);
+}
+
+
+/**
+ * free_symbol
+ * ```````````
+ * Free the memory space allocated for symbol.
+ *
+ * @sym : Symbol to be freed.
+ * Return: Nothing.
+ */
+static inline void free_symbol(void *sym)
+{
+ free((struct bucket *)sym - 1);
+}
+
+
+static inline struct htab_t *new_htab(int max_sym, hash_func_t *hash, cmp_func_t *cmp)
+{
+ struct htab_t *new;
+
+ if (!max_sym)
+ max_sym= 127;
+
+ new = calloc(1,(max_sym*sizeof(struct bucket *))+sizeof(struct htab_t));
+
+ if (!new)
+ halt(SIGABRT, "Insufficient memory for symbol table.\n");
+
+ new->size = max_sym;
+ new->num_syms = 0;
+ new->hash = hash;
+ new->cmp = cmp;
+
+ return new;
+}
+
+static inline void *add_symbol(struct htab_t *tab, void *my_sym)
+{
+ struct bucket **p;
+ struct bucket *tmp;
+ struct bucket *sym;
+
+ sym = (struct bucket *)my_sym;
+
+ p = &(tab->table)[tab->hash((const char *)(sym--)) % tab->size];
+
+ tmp = *p;
+ *p = sym;
+ sym->prev = p;
+ sym->next = tmp;
+
+ if (tmp)
+ tmp->prev = &sym->next;
+
+ tab->num_syms++;
+
+ return (void *)(sym + 1);
+}
+
+
+
+static inline void del_symbol(struct htab_t *tab, void *my_sym)
+{
+ struct bucket *sym;
+ sym = (struct bucket *)my_sym;
+
+ if (tab && sym) {
+ --tab->num_syms;
+ --sym;
+
+ if ((*(sym->prev) = sym->next))
+ sym->next->prev = sym->prev;
+ }
+}
+
+
+static inline void *get_symbol(struct htab_t *tab, void *sym)
+{
+ struct bucket *p;
+
+ if (!tab)
+ return NULL;
+
+ p = (tab->table)[(*tab->hash)(sym) % tab->size];
+
+ while (p && (tab->cmp((const char *)sym, (const char *)p+1)))
+ p = p->next;
+
+ return (void *)(p ? p+1 : NULL);
+}
+
+
+/**
+ * next_symbol
+ * ```````````
+ * Return the next node in the current chain with the same key as
+ * the last node found.
+ */
+static inline void *next_symbol(struct htab_t *tab, void *i_last)
+{
+ struct bucket *last = (struct bucket *)i_last;
+
+ for (--last; last->next; last = last->next) {
+ if ((tab->cmp)((const char *)last+1, (const char *)last->next+1) == 0) // match
+ return (char *)(last->next + 1);
+ }
+ return NULL;
+}
+
+
+#endif
+
diff --git a/lib/hashfunc.h b/lib/hashfunc.h
new file mode 100644
index 0000000..1252686
--- /dev/null
+++ b/lib/hashfunc.h
@@ -0,0 +1,60 @@
+/******************************************************************************
+ * djb2_hash
+ * `````````
+ * HISTORY
+ * This algorithm (k=33) was first reported by Dan Bernstein many years
+ * ago in comp.lang.c. Another version of this algorithm (now favored by
+ * bernstein) uses XOR:
+ *
+ * hash(i) = hash(i - 1) * 33 ^ str[i];
+ *
+ * The magic of number 33 (why it works better than many other constants,
+ * prime or not) has never been adequately explained.
+ *
+ ******************************************************************************/
+static inline unsigned djb2_hash(const char *str)
+{
+ unsigned hash;
+ int c;
+
+ hash = 5381;
+
+ while ((c = (unsigned char)*str++))
+ hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+
+ return (unsigned) hash;
+}
+
+/******************************************************************************
+ * sdbm_hash
+ * `````````
+ * HISTORY
+ * This algorithm was created for sdbm (a public-domain reimplementation
+ * of ndbm) database library. It was found to do well in scrambling bits,
+ * causing better distribution of the keys and fewer splits. it also
+ * happens to be a good general hashing function with good distribution.
+ *
+ * The actual function is
+ *
+ * hash(i) = hash(i - 1) * 65599 + str[i];
+ *
+ * What is included below is the faster version used in gawk. [there is
+ * even a faster, duff-device version] the magic constant 65599 was picked
+ * out of thin air while experimenting with different constants, and turns
+ * out to be a prime. this is one of the algorithms used in berkeley db
+ * (see sleepycat) and elsewhere.
+ *
+ ******************************************************************************/
+static inline unsigned sdbm_hash(const char *str)
+{
+ unsigned hash;
+ int c;
+
+ hash = 0;
+
+ while ((c = (unsigned char)*str++))
+ hash = c + (hash << 6) + (hash << 16) - hash;
+
+ return (unsigned) hash;
+}
+
diff --git a/lib/list.h b/lib/list.h
new file mode 100644
index 0000000..6a8aaa2
--- /dev/null
+++ b/lib/list.h
@@ -0,0 +1,497 @@
+/*
+ * Augmented version of the circular linked list header authored by
+ * Rusty Russell <rusty@rustcorp.com.au>, who implemented this approach
+ * in the Linux kernel. See http://ccodearchive.net/info/list.html.
+ *
+ * The original is licensed under LGPLv2.1+
+ */
+
+#ifndef CCAN_LIST_H
+#define CCAN_LIST_H
+#include <stdbool.h>
+#include <assert.h>
+#include "util.h"
+
+
+/**
+ * struct list_node - an entry in a doubly-linked list
+ * @next: next entry (self if empty)
+ * @prev: previous entry (self if empty)
+ *
+ * This is used as an entry in a linked list.
+ * Example:
+ * struct child {
+ * const char *name;
+ * // Linked list of all us children.
+ * struct list_node list;
+ * };
+ */
+struct list_node
+{
+ struct list_node *next, *prev;
+};
+
+/**
+ * struct list_head - the head of a doubly-linked list
+ * @h: the list_head (containing next and prev pointers)
+ *
+ * This is used as the head of a linked list.
+ * Example:
+ * struct parent {
+ * const char *name;
+ * struct list_head children;
+ * unsigned int num_children;
+ * };
+ */
+struct list_head
+{
+ struct list_node n;
+};
+
+/**
+ * list_check - check head of a list for consistency
+ * @h: the list_head
+ * @abortstr: the location to print on aborting, or NULL.
+ *
+ * Because list_nodes have redundant information, consistency checking between
+ * the back and forward links can be done. This is useful as a debugging check.
+ * If @abortstr is non-NULL, that will be printed in a diagnostic if the list
+ * is inconsistent, and the function will abort.
+ *
+ * Returns the list head if the list is consistent, NULL if not (it
+ * can never return NULL if @abortstr is set).
+ *
+ * See also: list_check_node()
+ *
+ * Example:
+ * static void dump_parent(struct parent *p)
+ * {
+ * struct child *c;
+ *
+ * printf("%s (%u children):\n", p->name, p->num_children);
+ * list_check(&p->children, "bad child list");
+ * list_for_each(&p->children, c, list)
+ * printf(" -> %s\n", c->name);
+ * }
+ */
+struct list_head *list_check(const struct list_head *h, const char *abortstr);
+
+/**
+ * list_check_node - check node of a list for consistency
+ * @n: the list_node
+ * @abortstr: the location to print on aborting, or NULL.
+ *
+ * Check consistency of the list node is in (it must be in one).
+ *
+ * See also: list_check()
+ *
+ * Example:
+ * static void dump_child(const struct child *c)
+ * {
+ * list_check_node(&c->list, "bad child list");
+ * printf("%s\n", c->name);
+ * }
+ */
+struct list_node *list_check_node(const struct list_node *n,
+ const char *abortstr);
+
+#ifdef CCAN_LIST_DEBUG
+#define list_debug(h) list_check((h), __func__)
+#define list_debug_node(n) list_check_node((n), __func__)
+#else
+#define list_debug(h) (h)
+#define list_debug_node(n) (n)
+#endif
+
+/**
+ * LIST_HEAD_INIT - initializer for an empty list_head
+ * @name: the name of the list.
+ *
+ * Explicit initializer for an empty list.
+ *
+ * See also:
+ * LIST_HEAD, list_head_init()
+ *
+ * Example:
+ * static struct list_head my_list = LIST_HEAD_INIT(my_list);
+ */
+#define LIST_HEAD_INIT(name) { { &name.n, &name.n } }
+
+/**
+ * LIST_HEAD - define and initialize an empty list_head
+ * @name: the name of the list.
+ *
+ * The LIST_HEAD macro defines a list_head and initializes it to an empty
+ * list. It can be prepended by "static" to define a static list_head.
+ *
+ * See also:
+ * LIST_HEAD_INIT, list_head_init()
+ *
+ * Example:
+ * static LIST_HEAD(my_global_list);
+ */
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+/**
+ * list_head_init - initialize a list_head
+ * @h: the list_head to set to the empty list
+ *
+ * Example:
+ * ...
+ * struct parent *parent = malloc(sizeof(*parent));
+ *
+ * list_head_init(&parent->children);
+ * parent->num_children = 0;
+ */
+static inline void list_head_init(struct list_head *h)
+{
+ h->n.next = h->n.prev = &h->n;
+}
+
+/**
+ * list_add - add an entry at the start of a linked list.
+ * @h: the list_head to add the node to
+ * @n: the list_node to add to the list.
+ *
+ * The list_node does not need to be initialized; it will be overwritten.
+ * Example:
+ * struct child *child = malloc(sizeof(*child));
+ *
+ * child->name = "marvin";
+ * list_add(&parent->children, &child->list);
+ * parent->num_children++;
+ */
+static inline void list_add(struct list_head *h, struct list_node *n)
+{
+ n->next = h->n.next;
+ n->prev = &h->n;
+ h->n.next->prev = n;
+ h->n.next = n;
+ (void)list_debug(h);
+}
+
+/**
+ * list_add_tail - add an entry at the end of a linked list.
+ * @h: the list_head to add the node to
+ * @n: the list_node to add to the list.
+ *
+ * The list_node does not need to be initialized; it will be overwritten.
+ * Example:
+ * list_add_tail(&parent->children, &child->list);
+ * parent->num_children++;
+ */
+static inline void list_add_tail(struct list_head *h, struct list_node *n)
+{
+ n->next = &h->n;
+ n->prev = h->n.prev;
+ h->n.prev->next = n;
+ h->n.prev = n;
+ (void)list_debug(h);
+}
+
+/**
+ * list_empty - is a list empty?
+ * @h: the list_head
+ *
+ * If the list is empty, returns true.
+ *
+ * Example:
+ * assert(list_empty(&parent->children) == (parent->num_children == 0));
+ */
+static inline bool list_empty(const struct list_head *h)
+{
+ (void)list_debug(h);
+ return h->n.next == &h->n;
+}
+
+
+
+
+
+/**
+ * list_del - delete an entry from an (unknown) linked list.
+ * @n: the list_node to delete from the list.
+ *
+ * Note that this leaves @n in an undefined state; it can be added to
+ * another list, but not deleted again.
+ *
+ * See also:
+ * list_del_from()
+ *
+ * Example:
+ * list_del(&child->list);
+ * parent->num_children--;
+ */
+static inline void list_del(struct list_node *n)
+{
+ (void)list_debug_node(n);
+ n->next->prev = n->prev;
+ n->prev->next = n->next;
+#ifdef CCAN_LIST_DEBUG
+ /* Catch use-after-del. */
+ n->next = n->prev = NULL;
+#endif
+}
+
+/**
+ * list_del_from - delete an entry from a known linked list.
+ * @h: the list_head the node is in.
+ * @n: the list_node to delete from the list.
+ *
+ * This explicitly indicates which list a node is expected to be in,
+ * which is better documentation and can catch more bugs.
+ *
+ * See also: list_del()
+ *
+ * Example:
+ * list_del_from(&parent->children, &child->list);
+ * parent->num_children--;
+ */
+static inline void list_del_from(struct list_head *h, struct list_node *n)
+{
+#ifdef CCAN_LIST_DEBUG
+ {
+ /* Thorough check: make sure it was in list! */
+ struct list_node *i;
+ for (i = h->n.next; i != n; i = i->next)
+ assert(i != &h->n);
+ }
+#endif /* CCAN_LIST_DEBUG */
+
+ /* Quick test that catches a surprising number of bugs. */
+ assert(!list_empty(h));
+ list_del(n);
+}
+
+/**
+ * list_entry - convert a list_node back into the structure containing it.
+ * @n: the list_node
+ * @type: the type of the entry
+ * @member: the list_node member of the type
+ *
+ * Example:
+ * // First list entry is children.next; convert back to child.
+ * child = list_entry(parent->children.n.next, struct child, list);
+ *
+ * See Also:
+ * list_top(), list_for_each()
+ */
+#define list_entry(n, type, member) container_of(n, type, member)
+
+/**
+ * list_top - get the first entry in a list
+ * @h: the list_head
+ * @type: the type of the entry
+ * @member: the list_node member of the type
+ *
+ * If the list is empty, returns NULL.
+ *
+ * Example:
+ * struct child *first;
+ * first = list_top(&parent->children, struct child, list);
+ */
+#define list_top(h, type, member) \
+ ((type *)list_top_((h), list_off_(type, member)))
+
+static inline const void *list_top_(const struct list_head *h, size_t off)
+{
+ if (list_empty(h))
+ return NULL;
+ return (const char *)h->n.next - off;
+}
+
+/**
+ * list_tail - get the last entry in a list
+ * @h: the list_head
+ * @type: the type of the entry
+ * @member: the list_node member of the type
+ *
+ * If the list is empty, returns NULL.
+ *
+ * Example:
+ * struct child *last;
+ * last = list_tail(&parent->children, struct child, list);
+ */
+#define list_tail(h, type, member) \
+ ((type *)list_tail_((h), list_off_(type, member)))
+
+static inline const void *list_tail_(const struct list_head *h, size_t off)
+{
+ if (list_empty(h))
+ return NULL;
+ return (const char *)h->n.prev - off;
+}
+
+/**
+ * list_for_each - iterate through a list.
+ * @h: the list_head (warning: evaluated multiple times!)
+ * @i: the structure containing the list_node
+ * @member: the list_node member of the structure
+ *
+ * This is a convenient wrapper to iterate @i over the entire list. It's
+ * a for loop, so you can break and continue as normal.
+ *
+ * Example:
+ * list_for_each(&parent->children, child, list)
+ * printf("Name: %s\n", child->name);
+ */
+#define list_for_each(h, i, member) \
+ list_for_each_off(h, i, list_off_var_(i, member))
+
+/**
+ * list_for_each_rev - iterate through a list backwards.
+ * @h: the list_head
+ * @i: the structure containing the list_node
+ * @member: the list_node member of the structure
+ *
+ * This is a convenient wrapper to iterate @i over the entire list. It's
+ * a for loop, so you can break and continue as normal.
+ *
+ * Example:
+ * list_for_each_rev(&parent->children, child, list)
+ * printf("Name: %s\n", child->name);
+ */
+#define list_for_each_rev(h, i, member) \
+ for (i = container_of_var(list_debug(h)->n.prev, i, member); \
+ &i->member != &(h)->n; \
+ i = container_of_var(i->member.prev, i, member))
+
+/**
+ * list_for_each_safe - iterate through a list, maybe during deletion
+ * @h: the list_head
+ * @i: the structure containing the list_node
+ * @nxt: the structure containing the list_node
+ * @member: the list_node member of the structure
+ *
+ * This is a convenient wrapper to iterate @i over the entire list. It's
+ * a for loop, so you can break and continue as normal. The extra variable
+ * @nxt is used to hold the next element, so you can delete @i from the list.
+ *
+ * Example:
+ * struct child *next;
+ * list_for_each_safe(&parent->children, child, next, list) {
+ * list_del(&child->list);
+ * parent->num_children--;
+ * }
+ */
+#define list_for_each_safe(h, i, nxt, member) \
+ list_for_each_safe_off(h, i, nxt, list_off_var_(i, member))
+
+
+/**
+ * list_count -- how many nodes are in a list?
+ * @n: will contain the number of items
+ * @h: the list_head
+ * @i: the structure containing the list_node
+ * @member: the list_node member of the structure
+ *
+ * Used similar to the list_for_each macro, but modifies an integer value n,
+ * leaving it equal to the number of nodes in the list. Note that n must be
+ * initialized to 0.
+ *
+ * Added 05-29-2012
+ */
+#define list_count(h, i, member, n) \
+ n=0; list_for_each_off(h, i, list_off_var_(i, member)) { n++; }
+
+/**
+ * list_for_each_off - iterate through a list of memory regions.
+ * @h: the list_head
+ * @i: the pointer to a memory region wich contains list node data.
+ * @off: offset(relative to @i) at which list node data resides.
+ *
+ * This is a low-level wrapper to iterate @i over the entire list, used to
+ * implement all oher, more high-level, for-each constructs. It's a for loop,
+ * so you can break and continue as normal.
+ *
+ * WARNING! Being the low-level macro that it is, this wrapper doesn't know
+ * nor care about the type of @i. The only assumtion made is that @i points
+ * to a chunk of memory that at some @offset, relative to @i, contains a
+ * properly filled `struct node_list' which in turn contains pointers to
+ * memory chunks and it's turtles all the way down. Whith all that in mind
+ * remember that given the wrong pointer/offset couple this macro will
+ * happilly churn all you memory untill SEGFAULT stops it, in other words
+ * caveat emptor.
+ *
+ * It is worth mentioning that one of legitimate use-cases for that wrapper
+ * is operation on opaque types with known offset for `struct list_node'
+ * member(preferably 0), because it allows you not to disclose the type of
+ * @i.
+ *
+ * Example:
+ * list_for_each_off(&parent->children, child,
+ * offsetof(struct child, list))
+ * printf("Name: %s\n", child->name);
+ */
+#define list_for_each_off(h, i, off) \
+ for (i = list_node_to_off_(list_debug(h)->n.next, (off)); \
+ list_node_from_off_((void *)i, (off)) != &(h)->n; \
+ i = list_node_to_off_(list_node_from_off_((void *)i, (off))->next, \
+ (off)))
+
+/**
+ * list_for_each_safe_off - iterate through a list of memory regions, maybe
+ * during deletion
+ * @h: the list_head
+ * @i: the pointer to a memory region wich contains list node data.
+ * @nxt: the structure containing the list_node
+ * @off: offset(relative to @i) at which list node data resides.
+ *
+ * For details see `list_for_each_off' and `list_for_each_safe'
+ * descriptions.
+ *
+ * Example:
+ * list_for_each_safe_off(&parent->children, child,
+ * next, offsetof(struct child, list))
+ * printf("Name: %s\n", child->name);
+ */
+#define list_for_each_safe_off(h, i, nxt, off) \
+ for (i = list_node_to_off_(list_debug(h)->n.next, (off)), \
+ nxt = list_node_to_off_(list_node_from_off_(i, (off))->next, \
+ (off)); \
+ list_node_from_off_(i, (off)) != &(h)->n; \
+ i = nxt, \
+ nxt = list_node_to_off_(list_node_from_off_(i, (off))->next, \
+ (off)))
+
+
+/* Other -off variants. */
+#define list_entry_off(n, type, off) \
+ ((type *)list_node_from_off_((n), (off)))
+
+#define list_head_off(h, type, off) \
+ ((type *)list_head_off((h), (off)))
+
+#define list_tail_off(h, type, off) \
+ ((type *)list_tail_((h), (off)))
+
+#define list_add_off(h, n, off) \
+ list_add((h), list_node_from_off_((n), (off)))
+
+#define list_del_off(n, off) \
+ list_del(list_node_from_off_((n), (off)))
+
+#define list_del_from_off(h, n, off) \
+ list_del_from(h, list_node_from_off_((n), (off)))
+
+/* Offset helper functions so we only single-evaluate. */
+static inline void *list_node_to_off_(struct list_node *node, size_t off)
+{
+ return (void *)((char *)node - off);
+}
+static inline struct list_node *list_node_from_off_(void *ptr, size_t off)
+{
+ return (struct list_node *)((char *)ptr + off);
+}
+
+/* Get the offset of the member, but make sure it's a list_node. */
+#define list_off_(type, member) \
+ (container_off(type, member) + \
+ check_type(((type *)0)->member, struct list_node))
+
+#define list_off_var_(var, member) \
+ (container_off_var(var, member) + \
+ check_type(var->member, struct list_node))
+
+#endif /* CCAN_LIST_H */
diff --git a/lib/set.h b/lib/set.h
index ab8d982..02cd11a 100644
--- a/lib/set.h
+++ b/lib/set.h
@@ -3,7 +3,7 @@
/* One cell in the bitmap */
-unsigned short _SETTYPE;
+typedef unsigned short _SETTYPE;
#define _BITS_IN_WORD (16)
diff --git a/lib/textutils.c b/lib/textutils.c
index aa5e795..d413f5c 100644
--- a/lib/textutils.c
+++ b/lib/textutils.c
@@ -754,3 +754,142 @@ char *get_expr(FILE *fd_input)
}
+
+/**
+ * hex2bin
+ * ```````
+ * Convert the hexadecimal digit to an int.
+ *
+ * @c: hexadecimal digit
+ *
+ * NOTE
+ * @c must be one of 0123456789abcdefABCDEF
+ */
+int hex2bin(int c)
+{
+ return (isdigit(c)) ? ((c)-'0') : ((((toupper(c))-'A')+10) & 0xf);
+}
+
+/**
+ * oct2bin
+ * ```````
+ * Convert the octal digit represented by 'c' to an int.
+ *
+ * @c: octal digit
+ *
+ * NOTE
+ * @c must be one of 01234567
+ */
+int oct2bin(int c)
+{
+ return (((c)-'0') & 0x7);
+}
+
+
+/**
+ * esc
+ * ```
+ * Return the character associated with the escape sequence pointed to
+ * by *s, and modify *s to point past the sequence.
+ *
+ * @s: Pointer to a string holding escape sequence
+ *
+ * HISTORY
+ * From Alan Holub's "Compiler Design in C"
+ *
+ * NOTES
+ * Recognized characters:
+ *
+ * \b backspace
+ * \f formfeed
+ * \n newline
+ * \r carriage return
+ * \s space
+ * \t tab
+ * \e ASCII ESC character ('\033')
+ * \DDD number formed of 1-3 octal digits
+ * \xDDD number formed of 1-3 hex digits (two required)
+ * \^C C = any letter. Control code.
+ */
+int esc(char **s)
+{
+ register int rval;
+
+ if (**s != '\\')
+ rval = *((*s)++);
+ else {
+ ++(*s);
+ switch (toupper(**s))
+ {
+ case '\0':
+ rval = '\\';
+ break;
+ case 'B':
+ rval = '\b';
+ break;
+ case 'F':
+ rval = '\f';
+ break;
+ case 'N':
+ rval = '\n';
+ break;
+ case 'R':
+ rval = 'r';
+ break;
+ case 'S':
+ rval = ' ';
+ break;
+ case 'T':
+ rval = '\t';
+ break;
+ case 'E':
+ rval = '\033';
+ break;
+ case '^':
+ rval = *++(*s);
+ rval = toupper(rval) - '@';
+ break;
+ case 'X':
+ rval = 0;
+ ++(*s);
+ if (IS_HEXDIGIT(**s)) {
+ rval = hex2bin(*(*s)++);
+ }
+ if (IS_HEXDIGIT(**s)) {
+ rval <<= 4;
+ rval |= hex2bin(*(*s)++);
+ }
+ if (IS_HEXDIGIT(**s)) {
+ rval << 4;
+ rval |= hex2bin(*(*s)++);
+ }
+ --(*s);
+ break;
+
+ default:
+ if (!IS_OCTDIGIT(**s))
+ rval = **s;
+ else {
+ ++(*s);
+ rval = oct2bin(*(*s)++);
+ if (IS_OCTDIGIT(**s)) {
+ rval <<= 3;
+ rval |= oct2bin(*(*s)++);
+ }
+ if (IS_OCTDIGIT(**s)) {
+ rval <<= 3;
+ rval |= oct2bin(*(*s)++);
+ }
+ --(*s);
+ }
+ break;
+ }
+ ++(*s);
+ }
+ return rval;
+}
+
+
+
+
+
diff --git a/lib/textutils.h b/lib/textutils.h
index 475597f..24f285a 100644
--- a/lib/textutils.h
+++ b/lib/textutils.h
@@ -55,6 +55,13 @@ int textutils_getline(char **dest, int n, FILE *stream);
#define STREMPTY(s) (STRCMP((s),""))
+#define IS_HEXDIGIT(x) (isdigit(x)||('a'<=(x)&&(x)<='f')||('A'<=(x)&&(x)<='F'))
+#define IS_OCTDIGIT(x) ('0'<=(x) && (x)<='7')
+int hex2bin(int c);
+int oct2bin(int c);
+int esc(char **s);
+
+
/**
* concat
* ``````
diff --git a/lib/util.h b/lib/util.h
index 1cefe51..b3ef621 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -109,7 +109,7 @@ extern int __build_bug_on_failed;
*/
#define VARIABLE(v) { enum v { }; }
-#define ASSIGN(variable, value) \
+#define V_ASSIGN(variable, value) \
VARIABLE(variable) \
{ enum { E = value }; } \
variable = value;
@@ -339,6 +339,16 @@ extern int __build_bug_on_failed;
* END LINUX KERNEL NOVELTIES
******************************************************************************/
/******************************************************************************
+ * Things I took from Alan Holub's Compiler Design in C
+ ******************************************************************************/
+#define TOOHIGH(a, p) ((p) - (a) > (ARRAY_SIZE(a) - 1))
+#define TOOLOW(a, p) ((p) - (a) < 0)
+#define INBOUNDS(a, p) (!(TOOHIGH(a,p) || TOOLOW(a,p)))
+/* Largest int available on a machine */
+#define LARGEST_INT (int)(((unsigned)(~0)) >> 1)
+
+
+/******************************************************************************
* BEGIN MISCELLANEOUS MONSTROSITIES
******************************************************************************/
diff --git a/macro.c b/macro.c
index 2f7eb37..16a9248 100644
--- a/macro.c
+++ b/macro.c
@@ -2,9 +2,11 @@
#include <string.h>
#include <stdio.h>
+#include "lib/hash.h"
#include "nfa.h"
#include "macro.h"
+
struct htab_t *MACROTABLE; /* Symbol table for macro definitions */
@@ -29,7 +31,7 @@ void new_macro(char *def)
if (first) {
first = 0;
- MACROTABLE = maketab(31, hash_add, strcmp);
+ MACROTABLE = new_htab(31, sdbm_hash, strcmp);
}
/* Isolate name */
@@ -65,10 +67,10 @@ void new_macro(char *def)
*edef = '\0';
/* Add the macro to the symbol table */
- p = (struct macro_t *)newsym(sizeof(struct macro_t));
+ p = (struct macro_t *)new_symbol(sizeof(struct macro_t));
slcpy(p->name, name, MAC_NAME_MAX);
slcpy(p->text, text, MAC_TEXT_MAX);
- addsym(MACROTABLE, p);
+ add_symbol(MACROTABLE, p);
}
diff --git a/nfa.c b/nfa.c
index dda7501..64b4612 100644
--- a/nfa.c
+++ b/nfa.c
@@ -1,17 +1,19 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#include "common/error.h"
+#include <stdbool.h>
+#include "lib/error.h"
#include "nfa.h"
+#include "lex.h"
/* True if stack isn't full or empty. */
-#define STACK_OK() (INBOUNDS(nfa_stack, nfa_stack_ptr))
-#define STACK_USED() ((int)(nfa_stack_ptr-nfa_stack) + 1) // Number of slots used
-#define CLEAR_STACK() (nfa_stack_ptr = nfa_stack - 1) // Reset the stack
-#define PUSH(x) (*++nfa_stack_ptr = (x)) // push x to the stack
-#define POP() (*nfa_stack_ptr--) // pop x from stack
+#define STACK_OK() (INBOUNDS(STACK, STACKP))
+#define STACK_USED() ((int)(STACKP-STACK) + 1) // Number of slots used
+#define CLEAR_STACK() (STACKP = STACK - 1) // Reset the stack
+#define PUSH(x) (*++STACKP = (x)) // push x to the stack
+#define POP() (*STACKP--) // pop x from stack
#define MATCH(t) (cur_tok == (t))
#define SSIZE 32 // State size
@@ -22,6 +24,8 @@ char *beg_input; // Beginning of input string
enum token cur_tok; // Current token
int lexeme; // Value associated with LITERAL
+int LINENO=0;
+
struct nfa_t *STACK[SSIZE]; // Stack used by new()
struct nfa_t **STACKP = &STACK[-1]; // Stack pointer
@@ -43,10 +47,10 @@ struct nfa_t **STACKP = &STACK[-1]; // Stack pointer
* contain the maximum number of states. Subsequent calls simply return
* a pointer to a region of this memory block.
*/
-struct nfa_t *new_nfa(struct pgen_t *pgen)
+struct nfa_t *new_nfa(void)
{
static bool init = false;
- struct nfa_node *new;
+ struct nfa_t *new;
if (!init) {
@@ -85,7 +89,7 @@ void del_nfa(struct nfa_t *doomed)
{
--nfa_nstates;
- memset(doomed, 0, sizeof(NFA));
+ memset(doomed, 0, sizeof(struct nfa_t));
doomed->edge = EMPTY ;
PUSH(doomed);
@@ -168,19 +172,19 @@ char *save(char *str)
* @max_state : the maximum number of states, modified to reflect largest used.
* @start_state: modified to be the start state
*/
-struct nfa_t *thompson(int *max_state, struct nfa_t **start_state)
+struct nfa_t *thompson(FILE *input, int *max_state, struct nfa_t **start_state)
{
CLEAR_STACK();
/* Load first token */
- cur_token = EOS;
+ cur_tok = EOS;
advance();
nfa_nstates = 0;
nfa_next = 0;
- *start_state = machine(); // Manufacture the NFA
- *max_state = nfa_next; // Max state # in NFA
+ *start_state = machine(input); // Manufacture the NFA
+ *max_state = nfa_next; // Max state # in NFA
return nfa_state;
}
@@ -199,7 +203,7 @@ int nfa(FILE *input)
{
struct nfa_t *sstate;
- nfa_state = thompson(&nfa_nstates, &sstate);
+ nfa_state = thompson(input, &nfa_nstates, &sstate);
return (sstate - nfa_state);
}
@@ -234,13 +238,13 @@ void free_nfa(void)
* accepting string if one of the elements of the output state is an
* accepting state.
*/
-SET *e_closure(SET *input, char **accept, int *anchor)
+struct set_t *e_closure(struct set_t *input, char **accept, int *anchor)
{
int stack[NFA_MAX]; // Stack of untested states
int *tos; // Stack pointer
- NFA *p; // NFA state being examined
+ struct nfa_t *p; // NFA state being examined
int i; // State number of "
- int accept_num = LARGEST_INT ;
+ int accept_num = LARGEST_INT;
if (!input)
goto abort;
@@ -315,10 +319,10 @@ SET *e_closure(SET *input, char **accept, int *anchor)
* @inp_set: input set
* @c : transition on this character.
*/
-SET *move(SET *inp_set, int c)
+struct set_t *move(struct set_t *inp_set, int c)
{
- SET *outset = NULL; // Output set
- NFA *p; // Current NFA state
+ struct set_t *outset = NULL; // Output set
+ struct nfa_t *p; // Current NFA state
int i;
for (i = nfa_states; --i >= 0;) {
@@ -327,7 +331,7 @@ SET *move(SET *inp_set, int c)
if (p->edge==c || (p->edge==CCL && TEST(p->bitset, c))) {
if(!outset)
- outset = newset();
+ outset = new_set();
ADD(outset, p->next - nfa_state);
}
@@ -337,20 +341,3 @@ SET *move(SET *inp_set, int c)
}
-
-
-
-#ifdef MAIN
-
-void main(int argc, char **argv)
-{
- struct nfa_t *nfa;
- struct nfa_t *start_state;
- int max_state;
-
- nfa = thompson(getline, &max_state, &start_state);
-}
-
-#endif
-
-
diff --git a/nfa.h b/nfa.h
index 7a95225..1144a85 100644
--- a/nfa.h
+++ b/nfa.h
@@ -1,18 +1,20 @@
#ifndef _NFA_H
#define _NFA_H
+#include "lib/set.h"
+
/*
* An nfa_t is the non-deterministic finite automaton (NFA) produced
* in the Thompson construction.
*/
struct nfa_t {
- int edge; // Edge label: char, CCL, EMPTY, or EPSILON.
- char *bitset; // Set to store character class.
- struct nfa_t *next; // Next state (NULL if no next state).
- struct nfa_t *next2; // Another next state if edge == EPSILON.
- char *accept; // NULL if !accepting state, else the action.
- int anchor; // Says whether pattern is anchored and where.
+ int edge; // Edge label: char, CCL, EMPTY, or EPSILON.
+ struct set_t *bitset; // Set to store character class.
+ struct nfa_t *next; // Next state (NULL if no next state).
+ struct nfa_t *next2; // Another next state if edge == EPSILON.
+ char *accept; // NULL if !accepting state, else the action.
+ int anchor; // Says whether pattern is anchored and where.
};
@@ -43,7 +45,11 @@ int nfa_next; // Index of next element in the array.
#define STR_MAX (10 * 1024)
-struct nfa_t *thompson(int *max_state, struct nfa_t **start_state);
+struct nfa_t *new_nfa(void);
+void del_nfa(struct nfa_t *doomed);
+
+struct nfa_t *thompson(FILE *input, int *max_state, struct nfa_t **start_state);
+
#endif
diff --git a/stack.h b/stack.h
index e69de29..00c5963 100644
--- a/stack.h
+++ b/stack.h
@@ -0,0 +1,64 @@
+#ifndef _STACK_H
+#define _STACK_H
+
+/* stack.h
+ * ```````
+ * Stack maintenance macros.
+ *
+ * Creates downward-growing stacks which should work in all six memory models.
+ */
+
+#define stack_cls /* empty */
+
+
+#define stack_dcl(stack, type, size) \
+ typedef type_t_##stack; \
+ stack_cls t_##stack stack[size] \
+ stack_cls t_##stack (*p_##stack) = stack + (size)
+
+
+#define stack_clear(stack) \
+ ((p_##stack) = (stack + sizeof(stack)/sizeof(*stack)))
+
+#define stack_full(stack) \
+ ((p_##stack) <= stack)
+
+#define stack_empty(stack) \
+ ((p_##stack) >= (stack + sizeof(stack)/sizeof(*stack)))
+
+#define stack_ele(stack) \
+ ((sizeof(stack)/sizeof(*stack)) - (p_##stack - stack))
+
+#define stack_item(stack, offset) \
+ (*(p_##stack + (offset)))
+
+#define stack_p(stack) \
+ p_##stack
+
+#define push_(stack, x) \
+ (*--p_##stack = (x))
+
+#define pop_(stack) \
+ (*p_##stack++)
+
+#define push(stack,x) \
+ ((stack_full(stack)) ? ((t_##stack)(long)(stack_err(1))) \
+ : push_(stack,x))
+
+#define pop(stack) \
+ ((stack_empty(stack) ? ((t_##stack)(long)(stack_err(0))) \
+ : pop_(stack))
+
+#define pipn_(stack,amt) \
+ ((p_##stack += amt)[-amt])
+
+#define popn(stack,amt) \
+ ((stack_ele(stack) < amt) ? ((t_##stack)(long)(stack_err(0))) \
+ : popn_(stack,amt))
+
+#define stack_err(o) \
+ ((o) ? halt(SIGABRT, "Stack overflow\n") \
+ : halt(SIGABRT, "Stack underflow\n"))
+
+
+#endif