diff options
author | Jason Linehan <patientulysses@gmail.com> | 2012-10-01 19:29:38 -0400 |
---|---|---|
committer | Jason Linehan <patientulysses@gmail.com> | 2012-10-01 19:29:38 -0400 |
commit | cc3fe9a51c9223d8f1fd44fd26a5830ba99247ff (patch) | |
tree | 1a398833cb0f3d91c6412be655a8c2cb17cfd75a | |
download | libjdl-dev.tar.gz libjdl-dev.tar.bz2 libjdl-dev.zip |
-rw-r--r-- | .gitignore | 65 | ||||
-rw-r--r-- | abst/bits.h | 412 | ||||
-rw-r--r-- | abst/bloom/bloom.c | 259 | ||||
-rw-r--r-- | abst/bloom/bloom.h | 23 | ||||
-rw-r--r-- | abst/bloom/hashes.h | 159 | ||||
-rw-r--r-- | abst/btree.c | 1688 | ||||
-rw-r--r-- | abst/btree.h | 164 | ||||
-rw-r--r-- | abst/hash.h | 93 | ||||
-rw-r--r-- | abst/list.h | 510 | ||||
-rw-r--r-- | abst/map.h | 320 | ||||
-rw-r--r-- | abst/set.c | 532 | ||||
-rw-r--r-- | abst/set.h | 130 | ||||
-rw-r--r-- | abst/simple_set.h | 2 | ||||
-rw-r--r-- | byte/byte.c | 116 | ||||
-rw-r--r-- | byte/byte.h | 13 | ||||
-rw-r--r-- | byte/fmt.c | 150 | ||||
-rw-r--r-- | byte/fmt.h | 28 | ||||
-rw-r--r-- | byte/scan.c | 180 | ||||
-rw-r--r-- | byte/scan.h | 13 | ||||
-rw-r--r-- | file/dir.c | 331 | ||||
-rw-r--r-- | file/dir.h | 18 | ||||
-rw-r--r-- | file/fdio.c | 472 | ||||
-rw-r--r-- | file/fdio.h | 31 | ||||
-rw-r--r-- | file/file.c | 984 | ||||
-rw-r--r-- | file/file.h | 177 | ||||
-rw-r--r-- | file/iovec.c | 74 | ||||
-rw-r--r-- | file/iovec.h | 12 | ||||
-rw-r--r-- | file/shell.c | 129 | ||||
-rw-r--r-- | file/shell.h | 9 | ||||
-rw-r--r-- | fork/channel.c | 190 | ||||
-rw-r--r-- | fork/channel.h | 38 | ||||
-rw-r--r-- | fork/daemon.c | 173 | ||||
-rw-r--r-- | fork/daemon.h | 28 | ||||
-rw-r--r-- | fork/signals.c | 111 | ||||
-rw-r--r-- | fork/signals.h | 13 | ||||
-rw-r--r-- | hash/sha256.c | 375 | ||||
-rw-r--r-- | hash/sha256.h | 34 | ||||
-rw-r--r-- | inet/checksum.c | 63 | ||||
-rw-r--r-- | inet/checksum.h | 9 | ||||
-rw-r--r-- | inet/ip4.c | 43 | ||||
-rw-r--r-- | inet/ip4.h | 12 | ||||
-rw-r--r-- | inet/ip6.c | 218 | ||||
-rw-r--r-- | inet/ip6.h | 31 | ||||
-rw-r--r-- | inet/ping.c | 264 | ||||
-rw-r--r-- | inet/ping.h | 7 | ||||
-rw-r--r-- | inet/socket.c | 419 | ||||
-rw-r--r-- | inet/socket.h | 136 | ||||
-rw-r--r-- | inet/socketio.c | 31 | ||||
-rw-r--r-- | inet/socketio.h | 27 | ||||
-rw-r--r-- | text/memchr.c | 102 | ||||
-rw-r--r-- | text/memmem.c | 63 | ||||
-rw-r--r-- | text/search.c | 127 | ||||
-rw-r--r-- | text/search.h | 0 | ||||
-rw-r--r-- | text/string.c | 152 | ||||
-rw-r--r-- | text/string.h | 0 | ||||
-rw-r--r-- | text/textutils.c | 1138 | ||||
-rw-r--r-- | text/textutils.h | 94 | ||||
-rw-r--r-- | util/bnfop.c | 218 | ||||
-rw-r--r-- | util/bnfop.h | 51 | ||||
-rw-r--r-- | util/build_assert.h | 48 | ||||
-rw-r--r-- | util/check_type.h | 74 | ||||
-rw-r--r-- | util/container_of.h | 125 | ||||
-rw-r--r-- | util/cpp.h | 46 | ||||
-rw-r--r-- | util/debug.c | 231 | ||||
-rw-r--r-- | util/debug.h | 117 | ||||
-rw-r--r-- | util/time.c | 87 | ||||
-rw-r--r-- | util/time.h | 16 | ||||
-rw-r--r-- | util/util.c | 73 | ||||
-rw-r--r-- | util/util.h | 631 |
69 files changed, 12709 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad10a18 --- /dev/null +++ b/.gitignore @@ -0,0 +1,65 @@ +# Junk left behind by... + +# Autotools +Makefile.in +aclocal.m4 +autom4te.cache +configure +config.status +config.guess +config.h +config.h.in +config.log +config.sub +depcomp +install-sh +libtool +Makefile +Makefile.in +missing +tags +*.log +*.cache +*.m4 +depcomp +install-sh +missing +configure +config.status +*.libs +*.deps +*.dirstamp + +# vim +*.swp + +# Me +*.bak + +# Compiler +*.o + + +# gprof +*gmon.out +*.out +*/*.out + +# objdump +*.s + +# Big files that shouldn't be accidentally committed +*.pdf +*.djvu +*.jpg +*.jpeg +*.Jpg +*.Jpeg +*.JPG +*.JPEG +*.gif +*.Gif +*.GIF +*.png +*.Png +*.PNG diff --git a/abst/bits.h b/abst/bits.h new file mode 100644 index 0000000..03ce131 --- /dev/null +++ b/abst/bits.h @@ -0,0 +1,412 @@ +#ifndef BITHACKS_H +#define BITHACKS_H + +/****************************************************************************** + * Bitstring macros + * ```````````````` + * Macros for working with "bitstrings", that is, arrays of bytes where + * the functional unit of logic is the individual bits. Useful for things + * like radix trees, Bloom filters, bitboards for computer chess, or any + * other succinct data structure or process. + * + * AUTHORS + * Jason Linehan, with lots of help + * + * HISTORY + * These macros were the product of a lot of experimentation and study. + * Although there are few innovative approaches to these macros or their + * equivalents, I consider these to be "my implementation" in that they + * incorporate features, naming conventions, and styles from many + * different sets of macros. Numerous things have also been altered, + * expanded upon, or eliminated, so that they reflect my particular ideas + * of form and function. + * + * The famous header file xtrapbits.h, Copyright 1987, 1988, 1989, 1990, + * 1994 by Digital Equipment Corporation, was a good starting point. + * It was used by the X11 developers to define platform-independent bit + * array manipulation. + * + * Notable contributors included: + * + * Dick Annicchiarico + * Robert Chesler + * Dan Coutu + * Gene Durso + * Marc Evans + * Alan Jamison + * Mark Henry + * Ken Miller + * + * Also, numerous examples floating around comp.lang.c, and many other + * places, I'm sure. + * + * LICENSE + * Public domain. + * + ******************************************************************************/ + +#include <limits.h> +#include <stdint.h> +#include <stdbool.h> + +#define SEGSIZE CHAR_BIT // Number of bits in a word of the bitstring + +#define BITMASK(b) (1 << ((b) % SEGSIZE)) +#define BITSEG(b) ((b) / SEGSIZE) +#define BITSET(a,b) ((a)[BITSEG(b)] |= BITMASK(b)) +#define BITCLR(a,b) ((a)[BITSEG(b)] &= ~BITMASK(b)) +#define BITTOG(a,b) ((a)[BITSEG(b)] ^= BITMASK(b)) +#define BITVAL(a,b) ((a)[BITSEG(b)] & BITMASK(b)) +#define BITFIT(nb) ((nb + SEGSIZE - 1) / SEGSIZE) +#define BIT_IS_SET(a,b) ((BITVAL(a,b)) ? 1 : 0) + +/* +#define NEW_SET(name, size) char name [(BITFIT(size))] +#define SET_ADD(set, val) BITSET(set, val) +#define SET_REM(set, val) BITCLR(set, val) +#define SET_GET(set, val) BITVAL(set, val) +#define set_memberof(set, val) BIT_IS_SET(set, val) +*/ + + +/****************************************************************************** + * Binary constants + * ```````````````` + * Very cool macros that will expand binary literals into the appropriate + * compile time constants. + * + * AUTHORS + * Tom Torfs (basically everything) + * Jason Linehan (added B64) + * + * LICENSE + * donated to the public domain* + * + * USAGE + * + * B8(01010101) -------------------------------> 85 + * B16(10101010,01010101) ---------------------> 43605 + * B32(10000000,11111111,10101010,01010101) ---> 2164238933 + * + ******************************************************************************/ + + +/* Helper macros (not intended for use by the caller) +````````````````````````````````````````````````````````````````````````````` */ + +/* + * Convert a numeric literal into a hexadecimal constant + * (avoids problems with leading zeroes). 8-bit constants + * (max value 0x11111111) always fits in unsigned long. + */ +#define HEX__(X) 0x##X##LU + +/* + * Convert a sequence of eight bit literals into a valid + * compile-time constant literal. + * + * That's a bitwise AND predicating a ternary operation, + * in case you're scratching your head like I was. + */ +#define B8__(X) \ + (((X&0x0000000FLU)?1:0) \ + + ((X&0x000000F0LU)?2:0) \ + + ((X&0x00000F00LU)?4:0) \ + + ((X&0x0000F000LU)?8:0) \ + + ((X&0x000F0000LU)?16:0) \ + + ((X&0x00F00000LU)?32:0) \ + + ((X&0x0F000000LU)?64:0) \ + + ((X&0xF0000000LU)?128:0)) + + +/* User-callable macros +````````````````````````````````````````````````````````````````````````````` */ + +/* for up to 8-bit binary constants */ +#define B8(d) ((unsigned char)B8__(HEX__(d))) + +/* for upto 16-bit binary constants, big endian (MSB first) */ +#define B16(msb, lsb) (((unsigned short)B8(msb)<<8) + B8(lsb)) + +/* for upto 32-bit binary constants, big endian (MSB first) */ +#define B32(msb, b01, b02, lsb) \ + (((unsigned long)B8(msb)<<24) \ + + ((unsigned long)B8(b01)<<16) \ + + ((unsigned long)B8(b02)<<8) \ + + B8(lsb)) + + +/* for upto 64-bit binary constants, big endian (MSB first) */ +#define B64(msb, b01, b02, b03, b04, b05, b06, lsb) \ + (((unsigned long)B8(msb)<<56) \ + (((unsigned long)B8(b01)<<48) \ + (((unsigned long)B8(b02)<<40) \ + (((unsigned long)B8(b03)<<32) \ + (((unsigned long)B8(b04)<<24) \ + + ((unsigned long)B8(b05)<<16) \ + + ((unsigned long)B8(b06)<<8) \ + + B8(lsb)) + + + +/****************************************************************************** + * Miscellaneous dark magic + * ```````````````````````` + * Lots of fun and useful stuff. + ******************************************************************************/ + +#define IS_EVEN(x) (((x) & 1) == 0) ? 1 : 0 +#define IS_ODD(x) (!IS_EVEN((x))) ? 1 : 0 + + +/* + * haszero (4 operations) + * `````````````````````` + * (text adapted from Sean Anderson's Bit Twiddling Hacks page) + * + * NOTES + * The fastest method known -- uses hasless(w,1), which is defined above. + * Requires no subsquent verification. + * + * The subexpression (v - 0x01010101UL), evaluates to a high bit set in any + * byte whenever the corresponding byte in v is zero or greater than 0x80. + * The sub-expression ~v & 0x80808080UL evaluates to high bits set in bytes + * where the byte of v doesn't have its high bit set (so the byte was less + * than 0x80). Finally, by ANDing these two sub-expressions the result is + * the high bits set where the bytes in v were zero, since the high bits + * set due to a value greater than 0x80 in the first sub-expression are + * masked off by the second. + * + * AUTHOR + * Juha Järvi suggested hasless(w,1) on April 6, 2005, which he found on + * Paul Hsieh's Assembly Lab; previously it was written in a newsgroup + * post on April 27, 1987 by Alan Mycroft. + * + */ +#define haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL) + + +/* + * hasvalue + * ```````` + * (text adapted from Sean Anderson's Bit Twiddling Hacks page) + * + * To determine if any byte in a word has a specific value, we can XOR + * the "haystack" word with a word that has been filled with the byte + * values ("needles") we are looking for. Because XORing a value with + * itself results in a zero byte, and nonzero otherwise, we can pass + * the result to the haszero macro defined above, and get our answer. + * + * AUTHOR + * Stephen M Bennet suggested this on December 13, 2009 after reading + * the entry for haszero. + */ +#define hasvalue(x,n) (haszero((x) ^ (~0UL/255 * (n)))) + + +/* + * hasless + * ``````` + * (text adapted from Sean Anderson's Bit Twiddling Hacks page) + * + * Determine if a word has a byte less than n + * + * NOTES + * Test if a word x contains an unsigned byte with value < n. + * Specifically, for n=1, it can be used to find a 0-byte by examining + * one long at a time, or any byte by XORing x with a mask first. + * Uses 4 arithmetic/logical operations when n is constant. + * + * CAVEAT + * Requires x>=0; 0<=n<=128 + * + * AUTHOR + * Juha Järvi suggested hasless(w,1) on April 6, 2005, which he found on + * Paul Hsieh's Assembly Lab; previously it was written in a newsgroup + * post on April 27, 1987 by Alan Mycroft. + */ +#define hasless(x,n) (((x)-~0UL/255*(n))&~(x)&~0UL/255*128) + + +/* + * hasmore + * ``````` + * (text adapted from Sean Anderson's Bit Twiddling Hacks page) + * + * Determine if a word has a byte greater than n + * + * Test if a word x contains an unsigned byte with value > n. + * Uses 3 arithmetic/logical operations when n is constant. + * + * CAVEAT + * Requires x>=0; 0<=n<=127 + * + * AUTHOR + * Juha Järvi submitted hasmore to Bit Twiddling Hacks on April 6, 2005. + */ +#define hasmore(x,n) (((x)+~0UL/255*(127-(n))|(x))&~0UL/255*128) + + +/* + * likelyhasbetween + * ```````````````` + * (text adapted from Sean Anderson's Bit Twiddling Hacks page) + * + * Determine if a word has a byte between m and n + * + * When m < n, this technique tests if a word x contains an unsigned + * byte value such that m<value<n. It uses 7 arithmetic/logical operations + * when n and m are constant. + * + * This technique would be suitable for a fast pretest. A variation that + * takes one more operation (8 total for constant m and n) but provides + * the exact answer is also given. + * + * CAVEAT + * Bytes that equal n can be reported by likelyhasbetween as false + * positives, so this should be checked by character if a certain + * result is needed. + * + * Requires x>=0; 0<=m<=127; 0<=n<=128 + * + * AUTHORS + * Juha Järvi suggested likelyhasbetween on April 6, 2005. + * Sean Anderson created hasbetween on April 10, 2005. + */ +#define likelyhasbetween(x,m,n) \ + ((((x)-~0UL/255*(n))&~(x)&((x)&~0UL/255*127)+~0UL/255*(127-(m)))&~0UL/255*128) + +#define hasbetween(x,m,n) \ + ((~0UL/255*(127+(n))-((x)&~0UL/255*127)&~(x)&((x)&~0UL/255*127)+~0UL/255*(127-(m)))&~0UL/255*128) + + + +/* + * ones32 + * `````` + * Compute the number of set bits (ones) in a 32-bit integer w + * @w: unsigned 32-bit integer value + * Returns: unsigned integer representing the number of '1' bits in w. + * + * NOTES + * The population count of a binary integer value x is the number of one + * bits in the value. Although many machines have single instructions for + * this, the single instructions are usually microcoded loops that test a + * bit per cycle; a log-time algorithm coded in C is often faster. The + * following code uses a variable-precision SWAR algorithm to perform a + * tree reduction adding the bits in a 32-bit value: + * + * It is worthwhile noting that this SWAR population count algorithm can + * be improved upon for the case of counting the population of multi-word + * bit sets. How? The last few steps in the reduction are using only a + * portion of the SWAR width to produce their results; thus, it would be + * possible to combine these steps across multiple words being reduced. + * + * CAVEAT + * The AMD Athlon optimization guidelines suggest a very similar algorithm + * that replaces the last three lines with return((x * 0x01010101) >> 24);. + * For the Athlon (which has a very fast integer multiply), I would have + * expected AMD's code to be faster... but it is actually 6% slower according + * to my benchmarks using a 1.2GHz Athlon (a Thunderbird). Why? Well, it so + * happens that GCC doesn't use a multiply instruction - it writes out the + * equivalent shift and add sequence! + * + * AUTHOR + * Henry Gordon Dietz, The Aggregate Magic Algorithms + * University of Kentucky + * Aggregate.org online technical report (http://aggregate.org/MAGIC/) + */ +static inline unsigned int ones32(register uint32_t x) +{ + /* 32-bit recursive reduction using SWAR... + * but first step is mapping 2-bit values + * into sum of 2 1-bit values in sneaky way + */ + x -= ((x >> 1) & 0x55555555); + x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); + x = (((x >> 4) + x) & 0x0f0f0f0f); + x += (x >> 8); + x += (x >> 16); + return(x & 0x0000003f); +} + + + + +/* + * lzc + * ``` + * Return the number of leading zeroes in a 32-bit word w + * @w: unsigned 32-bit integer value + * Returns: unsigned integer representing the number of leading 0 bits in w + * + * NOTES + * Some machines have had single instructions that count the number of + * leading zero bits in an integer; such an operation can be an artifact + * of having floating point normalization hardware around. Clearly, floor + * of base 2 log of x is (WORDBITS-lzc(x)). In any case, this operation + * has found its way into quite a few algorithms, so it is useful to have + * an efficient implementation: + * + * AUTHOR + * Henry Gordon Dietz, The Aggregate Magic Algorithms + * University of Kentucky + * Aggregate.org online technical report (http://aggregate.org/MAGIC/) + */ +static inline unsigned int lzc(uint32_t w) +{ + #define LZC_WORDBITS 32 + + w |= (w >> 1); + w |= (w >> 2); + w |= (w >> 4); + w |= (w >> 8); + w |= (w >> 16); + + return (LZC_WORDBITS - ones32(w)); +} + + + + +/* + * ffz + * ``` + * Determine the offset of the first 0 bit in 32-bit word 'w' + * + * @w: unsigned 32-bit integer value + * Returns: unsigned integer representing the offset of the first 0 bit in 'w' + * + * NOTES + * + * The algorithm goes like this: + * + * 1. Invert the number + * 2. Compute the two's complement of the inverted number + * 3. AND the results of (1) and (2) + * 4. Find the position by computing the binary logarithm of (3) + * e.g. + * For the number 10110111: + * 1. 01001000 `------- first zero + * 2. 10111000 + * 3. 01001000 AND 10111000 = 00001000 + * 4. log2(00001000) = 3 + * `------- clever girl + */ +static inline unsigned int ffz(uint32_t w) +{ + #define FFZ_WORDBITS 32 + unsigned pos = 0; + + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (pos) + : "r" (~(w))); + + return (pos > FFZ_WORDBITS-1) ? FFZ_WORDBITS : (unsigned short)pos; +} + + +#endif diff --git a/abst/bloom/bloom.c b/abst/bloom/bloom.c new file mode 100644 index 0000000..793335d --- /dev/null +++ b/abst/bloom/bloom.c @@ -0,0 +1,259 @@ +/****************************************************************************** + * bloom.c + * ======= + * Bloom filters + * + * HISTORY + * {x, y, z} + * A Bloom filter is a probibalistic : : : + * data structure with several interesting /|\ /|\ /|\ + * properties, such as low memory usage, / | X | X | \ + * asymmetric query confidence, and a very / |/ \|/ \| \ + * speedy O(k) membership test. / | | \ \ + * / /| /|\ |\ \ + * Because a Bloom filter can . . . . . . . . . + * accept any input that can be 00000000001000101010101010100010000000000 + * hashed effectively (such as " " " + * strings), that membership test \ | / + * tends to draw a crowd. TNSTAAFL, but \ | / + * as caveats go, the Bloom filters' are \ | / + * more interesting than incapacitating. \|/ + * : + * Most notably, it can tell you with certainty {w} + * that an item 'i' is *not* a member of set 's', + * but it can only tell you with some finite + * probability whether an item 'i' *is* a member + * of set 's'. + * + * Still, along with the intriguing possibility of using bitwise AND and OR + * to compute the logical union and intersection of two filters, the cheap + * cost of adding elements to the filter set, and the low memory requirements, + * the Bloom filter is a good choice for many applications. + * + * NOTES + * + * Let's look more closely at the probability values. + * + * Assume that a hash function selects each array position with equal + * probability. If m is the number of bits in the array, and k is the number + * of hash functions, then the probability that a certain bit is not set + * to 1 by a certain hash function during the insertion of an element is + * + * 1-(1/m). + * + * The probability that it is not set to 1 by any of the hash functions is + * + * (1-(1/m))^k. + * + * If we have inserted n elements, the probability that a certain bit is + * set 0 is + * + * (1-(1/m))^kn, + * + * Meaning that the probability said bit is set to 1 is therefore + * + * 1-([1-(1/m)]^kn). + * + * Now test membership of an element that is not in the set. Each of the k + * array positions computed by the hash functions is 1 with a probability + * as above. The probability of all of them being 1, which would cause the + * algorithm to erroneously claim that the element is in the set, is often + * given as + * + * (1-[1-(1/m)]^kn)^k ~~ (1 - e^(-kn/m))^k. + * + * This is not strictly correct as it assumes independence for the + * probabilities of each bit being set. However, assuming it is a close + * approximation we have that the probability of false positives descreases + * as m (the number of bits in the array) increases, and increases as n + * (the number of inserted elements) increases. For a given m and n, the + * value of k (the number of hash functions) that minimizes the probability + * is + * + * (m/n)ln(2) ~~ 0.7(m/n), + * + * which gives the false positive probability of + * + * 2^-k ~~ 0.6185^(m/n). + * + * The required number of bits m, given n and a desired false positive + * probability p (and assuming the optimal value of k is used) can be + * computed by substituting the optimal value of k in the probability + * expression above: + * + * p = (1 - e^(-(((m/n)ln(2))*(n/m))))^((m/n)ln(2)), + * + * which simplifies to + * + * ln(p) = -(m/n) * (ln2)^2. + * + * This results in the equation + * + * m = -((n*ln(p)) / ((ln(2))^2)) + * + * The classic filter uses + * + * 1.44*log2(1/eta) + * + * bits of space per inserted key, where eta is the false positive rate of + * the Bloom filter. + * + * AUTHOR + * Jason Linehan (patientulysses@gmail.com) + * + * LICENSE + * Public domain. + * + ******************************************************************************/ + +#include <limits.h> +#include <stdarg.h> +#include <stdbool.h> + +#include "bloom.h" + + +#define SETBIT(a,n) (a[n/CHAR_BIT] |= (1<<(n%CHAR_BIT))) +#define GETBIT(a,n) (a[n/CHAR_BIT] & (1<<(n%CHAR_BIT))) +#define ROUND(size) ((size + CHAR_BIT - 1) / CHAR_BIT) + + +/** + * bloom_new Allocate and return a pointer to a new Bloom filter. + * ````````` + * @size : size of the bit array in the filter + * @nfuncs: the number of hash functions + * Returns: An allocated bloom filter + * + * USAGE + * For best results, make 'size' a power of 2. + */ +struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) +{ + struct bloom_t *bloom; + va_list hashes; + int n; + + /* Allocate Bloom filter container */ + if (!(bloom = malloc(sizeof(struct bloom_t)))) + return NULL; + + /* Allocate Bloom array */ + if (!(bloom->a = calloc(ROUND(size), sizeof(char)))) { + free(bloom); + return NULL; + } + + /* Allocate Bloom filter hash function pointers */ + if (!(bloom->hash = (hashfp_t*)malloc(num_hashes*sizeof(hashfp_t)))) { + free(bloom->a); + free(bloom); + return NULL; + } + + /* Assign hash functions to pointers in the Bloom filter */ + va_start(hashes, num_hashes); + + for (n=0; n<num_hashes; n++) + bloom->hash[n] = va_arg(hashes, hashfp_t); + + va_end(hashes); + + /* + * Record the number of hash functions (k) and the number of bytes + * in the Bloom array (m). + */ + bloom->k = num_hashes; + bloom->m = size; + + return bloom; +} + + +/** + * bloom_del Delete a Bloom filter. + * ````````` + * @bloom : The condemned. + * Returns: nothing. + */ +void bloom_del(struct bloom_t *bloom) +{ + free(bloom->a); + free(bloom->hash); + free(bloom); +} + + +/** + * bloom_add Add a string to a Bloom filter. + * ````````` + * @bloom : Bloom filter + * @s : string to add + * Returns: nothing. + * + * CAVEAT + * Once a string has been added to the filter, it cannot be "removed"! + */ +void bloom_add(struct bloom_t *bloom, const char *s) +{ + unsigned int hash; + int n; + + for (n=0; n<bloom->k; n++) { + hash = (unsigned int)bloom->hash[n](s); + SETBIT(bloom->a, (hash % bloom->m)); + } +} + + +/** + * bloom_check Determine if a string is in the Bloom filter. + * ``````````` + * @bloom : Bloom filter + * @s : string to add + * Returns: false if string does not exist in the filter, otherwise true. + * + * NOTES + * + * So this is the freakshow that bored programmers pay a nickel to get a + * peek at, step right up. This is the way the membership test works. + * + * The string 's' is hashed once for each of the 'k' hash functions, as + * though we were planning to add it to the filter. Instead of adding it + * however, we examine the bit that we *would* have set, and consider its + * value. + * + * If the bit is 1 (set), the string we are hashing may be in the filter, + * since it would have set this bit when it was originally hashed. However, + * it may also be that another string just happened to produce a hash value + * that would also set this bit. That would be a false positive. This is why + * we have k > 1, so we can minimize the likelihood of false positives + * occuring. + * + * If every bit corresponding to every one of the k hashes of our query + * string is set, we can say with some probability of being correct that + * the string we are holding is indeed "in" the filter. However, we can + * never be sure. + * + * If, however, as we hash our string and peek at the resulting bit in the + * filter, we find the bit is 0 (not set)... well now, that's different. + * In this case, we can say with absolute certainty that the string we are + * holding is *not* in the filter, because if it were, this bit would have + * to be set. + * + * In this way, the Bloom filter can answer NO with absolute surety, but + * can only speak a qualified YES. + */ +bool bloom_check(struct bloom_t *bloom, const char *s) +{ + unsigned int hash; + int n; + + for (n=0; n<bloom->k; n++) { + hash = (unsigned int)bloom->hash[n](s); + if (!(GETBIT(bloom->a, (hash % bloom->m)))) + return false; + } + return true; /* ? */ +} + diff --git a/abst/bloom/bloom.h b/abst/bloom/bloom.h new file mode 100644 index 0000000..5a7d41f --- /dev/null +++ b/abst/bloom/bloom.h @@ -0,0 +1,23 @@ +#ifndef _BLOOM_FILTER_H +#define _BLOOM_FILTER_H + +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include "hashes.h" + +typedef unsigned int (*hashfp_t)(const char *); + +struct bloom_t { + size_t m; + size_t k; + unsigned char *a; + hashfp_t *hash; +}; + +struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...); +void bloom_del (struct bloom_t *bloom); +void bloom_add (struct bloom_t *bloom, const char *s); +bool bloom_check(struct bloom_t *bloom, const char *s); + +#endif diff --git a/abst/bloom/hashes.h b/abst/bloom/hashes.h new file mode 100644 index 0000000..a4a397e --- /dev/null +++ b/abst/bloom/hashes.h @@ -0,0 +1,159 @@ +/****************************************************************************** + * djb2_hash + * ````````` + * HISTORY + * This algorithm (k=33) was first reported by Dan Bernstein many years + * ago in comp.lang.c. Another version of this algorithm (now favored by + * bernstein) uses XOR: + * + * hash(i) = hash(i - 1) * 33 ^ str[i]; + * + * The magic of number 33 (why it works better than many other constants, + * prime or not) has never been adequately explained. + * + ******************************************************************************/ +static inline unsigned long djb2_hash(const char *str) +{ + unsigned long hash; + int c; + + hash = 5381; + + while ((c = (unsigned char)*str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash; +} + +/****************************************************************************** + * sdbm_hash + * ````````` + * HISTORY + * This algorithm was created for sdbm (a public-domain reimplementation + * of ndbm) database library. It was found to do well in scrambling bits, + * causing better distribution of the keys and fewer splits. it also + * happens to be a good general hashing function with good distribution. + * + * The actual function is + * + * hash(i) = hash(i - 1) * 65599 + str[i]; + * + * What is included below is the faster version used in gawk. [there is + * even a faster, duff-device version] the magic constant 65599 was picked + * out of thin air while experimenting with different constants, and turns + * out to be a prime. this is one of the algorithms used in berkeley db + * (see sleepycat) and elsewhere. + * + ******************************************************************************/ +static inline unsigned long sdbm_hash(const char *str) +{ + unsigned long hash; + int c; + + hash = 0; + + while ((c = (unsigned char)*str++)) + hash = c + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +/****************************************************************************** + * lose lose + * ````````` + * HISTORY + * This hash function appeared in K&R (1st ed) but at least the reader + * was warned: + * + * "This is not the best possible algorithm, but it has the merit + * of extreme simplicity." + * + * This is an understatement. It is a terrible hashing algorithm, and it + * could have been much better without sacrificing its "extreme simplicity." + * [see the second edition!] + * + * Many C programmers use this function without actually testing it, or + * checking something like Knuth's Sorting and Searching, so it stuck. + * It is now found mixed with otherwise respectable code, eg. cnews. sigh. + * [see also: tpop] + * + ******************************************************************************/ +static inline unsigned long kr_hash(const char *str) +{ + unsigned int hash; + unsigned int c; + + hash = 0; + + while ((c = (unsigned char)*str++)) + hash += c; + + return hash; +} + +/****************************************************************************** + * sax_hash + * ```````` + * Shift, Add, XOR + * + ******************************************************************************/ +static inline unsigned int sax_hash(const char *key) +{ + unsigned int h; + + h = 0; + + while(*key) + h^=(h<<5)+(h>>2)+(unsigned char)*key++; + + return h; +} + + +/****************************************************************************** + * dek_hash + * ```````` + * HISTORY + * Proposed by Donald E. Knuth in The Art Of Computer Programming Vol. 3, + * under the topic of "Sorting and Search", Chapter 6.4. + * + ******************************************************************************/ +static inline unsigned int dek_hash(const char *str, unsigned int len) +{ + unsigned int hash; + unsigned int c; + + hash = len; + c = 0; + + while ((c = (unsigned int)*str++)) + hash = ((hash << 5) ^ (hash >> 27)) ^ (c); + + return hash; +} + + +/****************************************************************************** + * fnv_hash + * ```````` + * NOTE + * For a more fully featured and modern version of this hash, see fnv32.c + * + ******************************************************************************/ +static inline unsigned int fnv_hash(const char *str, unsigned int len) +{ + #define FNV_PRIME 0x811C9DC5 + unsigned int hash; + unsigned int c; + + hash = 0; + c = 0; + + while ((c = (unsigned int)*str++)) { + hash *= FNV_PRIME; + hash ^= (c); + } + + return hash; +} + diff --git a/abst/btree.c b/abst/btree.c new file mode 100644 index 0000000..363510a --- /dev/null +++ b/abst/btree.c @@ -0,0 +1,1688 @@ +// btree version 2n +// 26 APR 2010 + +// author: karl malbrain, malbrain@yahoo.com + +/* +This work, including the source code, documentation +and related data, is placed into the public domain. + +The orginal author is Karl Malbrain. + +THIS SOFTWARE IS PROVIDED AS-IS WITHOUT WARRANTY +OF ANY KIND, NOT EVEN THE IMPLIED WARRANTY OF +MERCHANTABILITY. THE AUTHOR OF THIS SOFTWARE, +ASSUMES _NO_ RESPONSIBILITY FOR ANY CONSEQUENCE +RESULTING FROM THE USE, MODIFICATION, OR +REDISTRIBUTION OF THIS SOFTWARE. +*/ + +// Please see the project home page for documentation +// http://code.google.com/p/high-concurrency-btree + +#define _FILE_OFFSET_BITS 64 +#define _LARGEFILE64_SOURCE + +#ifdef linux +#define _GNU_SOURCE +#endif + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <errno.h> + +#include <memory.h> +#include <string.h> + +#include "btree.h" + +/* BTree page number constatnts */ +#define ALLOC_page 0 +#define ROOT_page 1 + +/* Number of levels to create in a new BTree */ +#define MIN_lvl 2 + +/* + * OVERVIEW + * ======== + * + * The page is allocated from low and high ends. + * + * The key offsets and row-id's are allocated from + * the bottom, while the text of the key is allocated + * from the top. When the two areas meet, the page is + * split in two. + * + * A key consists of a length byte, two bytes of index + * number (0 - 65534), and up to 253 bytes of key value. + * + * Duplicate keys are discarded. Associated with each key + * is a 48-bit row-id. + * + * The B-tree root is always located at page 1. The first + * leaf page of level zero is always located on page 2. + * + * The B-tree pages are doubly linked, using next and prev + * pointers to facilitate enumerators, and provide for + * concurrency. + * + * When the root page is filled, it is split in two, and + * the tree height is raised by a new root at page one with + * two keys. + * + * Deleted keys are marked with a dead bit until page cleanup. + * The fence key for a node is always present, even after + * deletion and cleanup. + * + * Groups of pages from the B-tree are optionally cached with + * memory mapping. A hash table is used to keep track of the + * cached pages. This behavior is controlled by the cache block + * size parameter to bt_open. + * + * To achieve maximum concurrency, one page is locked at a + * time as the tree is traversed in search of the desired leaf + * node. + * + * The right page numbers are used in cases where the page + * is being split or consolidated. + * + * Page 0 is dedicated to lock for new page extensions, and + * chains empty pages together for re-use. + * + * Parent locks are obtained to prevent re-splitting or deleting + * a node before its fence is posted into its upper level. + * + * Empty nodes are chained together through the ALLOC page, to + * be tracked for re-use. + */ + + + + +/* + * Access the address of a slot, given + * the page address and slot number. + */ +#define slotptr(page, slot) (((BtSlot *)(page+1)) + (slot-1)) + +/* + * Access the address of a key, given + * the page address and slot number. + */ +#define keyptr(page, slot) ((BtKey)((unsigned char*)(page) + slotptr(page, slot)->off)) + + + +void bt_putid(unsigned char *dest, uid id) +{ + int i; + + i = BtId; + + while(i--) { + dest[i] = (unsigned char)id, id >>= 8; + } +} + + +uid bt_getid(unsigned char *src) +{ + uid id = 0; + int i; + + for (i=0; i<BtId; i++) { + id <<= 8, id |= *src++; + } + + return id; +} + + +/** + * bt_lockpage + * ``````````` + * Place a write, read, or parent lock on the requested page number. + * + * @bt : Pointer to the B-tree. + * @page_no: Page number. + * @mode : write, read, or parent lock. + * Return : Error status. + */ +BTERR bt_lockpage(BtDb *bt, uid page_no, BtLock mode) +{ + struct flock lock[1]; + off64_t off; + int flag; + + off = page_no << bt->page_bits; + flag = PROT_READ | (bt->mode == BT_ro ? 0 : PROT_WRITE); + + if (mode == BtLockRead || mode == BtLockWrite) { + off += sizeof(*bt->page); // use second segment + } + + if (mode == BtLockParent) { + off += 2*sizeof(*bt->page); // use third segment + } + + memset(lock, 0, sizeof(lock)); + + lock->l_start = off; + lock->l_type = (mode==BtLockDelete||mode==BtLockWrite||mode==BtLockParent) ? F_WRLCK : F_RDLCK; + lock->l_len = sizeof(*bt->page); + lock->l_whence = 0; + + if (fcntl(bt->idx, F_SETLKW, lock) == -1) { + return bt->err = BTERR_lock; + } + + return 0; +} + + +/** + * bt_unlockpage + * ````````````` + * Remove a write, read, or parent lock from the requested page number. + * + * @bt : Pointer to the B-tree. + * @page_no: Page number. + * @mode : write, read, or parent lock. + * Return : Error status. + */ +BTERR bt_unlockpage(BtDb *bt, uid page_no, BtLock mode) +{ + struct flock lock[1]; + off64_t off; + + off = page_no << bt->page_bits; + + if (mode == BtLockRead || mode == BtLockWrite) { + off += sizeof(*bt->page); // use second segment + } + + if (mode == BtLockParent) { + off += 2*sizeof(*bt->page); // use third segment + } + + memset(lock, 0, sizeof(lock)); + + lock->l_start = off; + lock->l_type = F_UNLCK; + lock->l_len = sizeof(*bt->page); + lock->l_whence = 0; + + if (fcntl(bt->idx, F_SETLK, lock) == -1) { + return bt->err = BTERR_lock; + } + + return bt->err = 0; +} + + +/** + * bt_close + * ```````` + * Close the tree and release memory stored inside. + * + * @bt : Pointer to the B-tree. + * Return: Nothing. + */ +void bt_close(BtDb *bt) +{ + BtHash *hash; + + // release mapped pages + if ((hash = bt->lrufirst)) { + do { + munmap(hash->page, (bt->hashmask+1) << bt->page_bits); + } while ((hash = hash->lrunext)); + } + + if (bt->mem) { + free(bt->mem); + } + + close(bt->idx); + free(bt); +} + + +/** + * bt_open + * ``````` + * Open/create a new B-tree. + * + * @name : Filename of the B-tree. + * @mode : Mode flag to open with. + * @bits : Bits in page size (e.g. 16). + * @nodemax: Size of mapped page cache (e.g. 8192), or 0 for no mapping. + * Return : Pointer to a B-tree. + */ +BtDb *bt_open(char *name, uint mode, uint bits, uint nodemax) +{ + BtLock lockmode; + uint lvl; + uint attr; + uint cacheblk; + BtPage alloc; + off64_t size; + uint amt[1]; + BtKey key; + BtDb* bt; + + lockmode = BtLockWrite; + + bt = malloc(sizeof(BtDb)+nodemax*sizeof(BtHash)); + + memset(bt, 0, sizeof(BtDb)); + + switch (mode & 0x7fff) + { + case BT_fl: + case BT_rw: + bt->idx = open((char*)name, O_RDWR | O_CREAT, 0666); + break; + + case BT_ro: + default: + bt->idx = open((char*)name, O_RDONLY); + lockmode = BtLockRead; + break; + } + if (bt->idx == -1) { + return free(bt), NULL; + } + + if (nodemax) { + cacheblk = 4096; // page size for unix + } else { + cacheblk = 0; + } + + // determine sanity of page size + if (bits > BT_maxbits) { + bits = BT_maxbits; + } else if (bits < BT_minbits) { + bits = BT_minbits; + } + + if (bt_lockpage(bt, ALLOC_page, lockmode)) { + return bt_close (bt), NULL; + } + + *amt = 0; + + // read minimum page size to get root info + + if ((size = lseek(bt->idx, 0L, 2))) { + alloc = malloc(BT_minpage); + pread(bt->idx, alloc, BT_minpage, 0); + bits = alloc->bits; + free(alloc); + } else if(mode == BT_ro) { + return bt_close(bt), NULL; + } + + bt->page_size = 1 << bits; + bt->page_bits = bits; + + bt->nodemax = nodemax; + bt->mode = mode; + + // setup cache mapping + if (cacheblk) { + if (cacheblk < bt->page_size) { + cacheblk = bt->page_size; + } + bt->hashmask = (cacheblk >> bits) - 1; + bt->mapped_io = 1; + } + + bt->mem = malloc(5 *bt->page_size); + + bt->frame = (BtPage)bt->mem; + bt->cursor = (BtPage)(bt->mem + bt->page_size); + bt->page = (BtPage)(bt->mem + 2 * bt->page_size); + bt->alloc = (BtPage)(bt->mem + 3 * bt->page_size); + bt->temp = (BtPage)(bt->mem + 4 * bt->page_size); + + if (size || *amt) { + if (bt_unlockpage(bt, ALLOC_page, lockmode)) { + return bt_close (bt), NULL; + } + return bt; + } + + // initializes an empty b-tree with root page and page of leaves + memset(bt->alloc, 0, bt->page_size); + bt_putid(bt->alloc->right, MIN_lvl+1); + bt->alloc->bits = bt->page_bits; + + if (write (bt->idx, bt->alloc, bt->page_size) < bt->page_size) { + return bt_close(bt), NULL; + } + + memset(bt->frame, 0, bt->page_size); + bt->frame->bits = bt->page_bits; + + for (lvl=MIN_lvl; lvl--;) { + slotptr(bt->frame, 1)->off = bt->page_size-3; + /* next (lower) page number */ + bt_putid(slotptr(bt->frame, 1)->id, lvl ? (MIN_lvl-lvl+1) : 0); + key = keyptr(bt->frame, 1); + key->len = 2; // create stopper key + key->key[0] = 0xff; + key->key[1] = 0xff; + bt->frame->min = bt->page_size - 3; + bt->frame->lvl = lvl; + bt->frame->cnt = 1; + bt->frame->act = 1; + + if (write(bt->idx, bt->frame, bt->page_size) < bt->page_size) { + return bt_close (bt), NULL; + } + } + + // create empty page area by writing last page of first + // cache area (other pages are zeroed by O/S) + + if (bt->mapped_io && bt->hashmask > 2) { + memset(bt->frame, 0, bt->page_size); + pwrite(bt->idx, bt->frame, bt->page_size, bt->hashmask << bt->page_bits); + } + + if (bt_unlockpage(bt, ALLOC_page, lockmode)) { + return bt_close (bt), NULL; + } + + return bt; +} + +// compare two keys, returning > 0, = 0, or < 0 +// as the comparison value +/** + * keycmp + * `````` + * Compare two keys. + * + * @key1: Key to be compared. + * @key2: Key to be compared. + * @len2: Length of the second key. + * Return: > 0 if (key1 > key2), 0 if (key1 == key2), < 0 if (key1 < key2). + */ +int keycmp (BtKey key1, unsigned char *key2, uint len2) +{ + uint len1; + int ans; + + len1 = key1->len; + + if ((ans = memcmp(key1->key, key2, len1 > len2 ? len2 : len1))) { + return ans; + } + + if (len1 > len2) { + return 1; + } + if (len1 < len2) { + return -1; + } + + return 0; +} + + +/** + * bt_update + * ````````` + * Update current page by writing file contents/flushing mapped area to disk. + * + * @bt : Pointer to the B-tree. + * @page : Page to be updated. + * @page_no: Page number of @page. + * Return : Error status. + */ +BTERR bt_update (BtDb *bt, BtPage page, uid page_no) +{ + off64_t off; + + off = page_no << bt->page_bits; + + if (!bt->mapped_io) { + if (pwrite(bt->idx, page, bt->page_size, off)!=bt->page_size) { + return bt->err = BTERR_wrt; + } + } + return 0; +} + +/** + * bt_findhash + * ``````````` + * Retreive a page in the cache. + * + * @bt : Pointer to the B-tree. + * @page_no: Page number to search for. + * Return : hash. + */ +BtHash *bt_findhash(BtDb *bt, uid page_no) +{ + BtHash *hash; + uint idx; + + /* Compute cache block, first page, and hash index. */ + page_no &= ~bt->hashmask; + idx = (uint)(page_no * BT_hashprime % BT_hashsize); + + if (bt->cache[idx]) { + hash = bt->nodes + bt->cache[idx]; + } else { + return NULL; + } + + do { + if (hash->page_no == page_no) { + break; + } + + } while ((hash = hash->hashnext)); + + return hash; +} + + +/** + * bt_linkhash + * ``````````` + * Add page cache entry to hash index. + * + * @bt : Pointer to B-tree. + * @node : Node to be cached. + * @page_no: Page number. + * Return : Nothing. + */ +void bt_linkhash(BtDb *bt, BtHash *node, uid page_no) +{ + uint idx; + BtHash *hash; + + idx = (uint)((page_no & ~bt->hashmask) * BT_hashprime % BT_hashsize); + + if (bt->cache[idx]) { + node->hashnext = hash = bt->nodes + bt->cache[idx]; + hash->hashprev = node; + } + + node->hashprev = NULL; + bt->cache[idx] = (ushort)(node - bt->nodes); +} + + +/** + * bt_unlinkhash + * ````````````` + * Remove a cache entry from the hash table. + * + * @bt : Pointer to a B-tree. + * @node : Node to remove from the table. + * Return: Nothing. + */ +void bt_unlinkhash(BtDb *bt, BtHash *node) +{ + uint idx; + BtHash *hash; + + idx = (uint)((node->page_no & ~bt->hashmask) * BT_hashprime % BT_hashsize); + + /* Unlink node. */ + if ((hash = node->hashprev)) { + hash->hashnext = node->hashnext; + } else if ((hash = node->hashnext)) { + bt->cache[idx] = (ushort)(hash - bt->nodes); + } else { + bt->cache[idx] = 0; + } + + if ((hash = node->hashnext)) { + hash->hashprev = node->hashprev; + } +} + + +/** + * bt_linklru + * `````````` + * Add cache page to lru chain and map pages. + * + * @bt : Pointer to a B-tree. + * @hash : Cache page. + * @page_no: Page number to add. + * Return : New cache page. + */ +BtPage bt_linklru(BtDb *bt, BtHash *hash, uid page_no) +{ + int flag; + off64_t off; + off64_t limit; + BtHash *node; + + off = (page_no & ~bt->hashmask) << bt->page_bits; + limit = off + ((bt->hashmask+1) << bt->page_bits); + + memset(hash, 0, sizeof(BtHash)); + hash->page_no = (page_no & ~bt->hashmask); + bt_linkhash(bt, hash, page_no); + + if ((node = hash->lrunext = bt->lrufirst)) { + node->lruprev = hash; + } else { + bt->lrulast = hash; + } + + bt->lrufirst = hash; + + flag = PROT_READ | (bt->mode == BT_ro ? 0 : PROT_WRITE); + hash->page = (BtPage)mmap(0, (bt->hashmask+1) << bt->page_bits, flag, MAP_SHARED, bt->idx, off); + + if (((int)hash->page == -1)) { + return bt->err = BTERR_map, (BtPage)NULL; + } + + return (BtPage)((char*)hash->page + ((uint)(page_no & bt->hashmask) << bt->page_bits)); +} + + +/** + * bt_hashpage + * ``````````` + * Find or place requested page in page-cache. + * + * @bt : Pointer to a B-tree. + * @page_no: Page number to request. + * Return : Memory address where page @page_no is located. + */ +BtPage bt_hashpage(BtDb *bt, uid page_no) +{ + BtHash *hash; + BtHash *node; + BtHash *next; + BtPage page; + + /* Find page in cache and move to top of lru list. */ + if ((hash = bt_findhash(bt, page_no))) { + page = (BtPage)((char*)hash->page + ((uint)(page_no & bt->hashmask) << bt->page_bits)); + + /* Swap node in lru list. */ + if ((node = hash->lruprev)) { + if ((next = node->lrunext = hash->lrunext)) { + next->lruprev = node; + } else { + bt->lrulast = node; + } + + if ((next = hash->lrunext = bt->lrufirst)) { + next->lruprev = hash; + } else { + return bt->err = BTERR_hash, (BtPage)NULL; + } + + hash->lruprev = NULL; + bt->lrufirst = hash; + } + return page; + } + + /* Map pages and add to cache entry. */ + if (bt->nodecnt < bt->nodemax) { + hash = bt->nodes + ++bt->nodecnt; + return bt_linklru(bt, hash, page_no); + } + + /* + * Hash table is already full, replace + * last lru entry from the cache. + */ + if ((hash = bt->lrulast)) { + /* Unlink from lru list. */ + if ((node = bt->lrulast = hash->lruprev)) { + node->lrunext = NULL; + } else { + return bt->err = BTERR_hash, (BtPage)NULL; + } + + munmap(hash->page, (bt->hashmask+1) << bt->page_bits); + + /* Unlink from hash table. */ + bt_unlinkhash(bt, hash); + + /* Map and add to cache. */ + return bt_linklru(bt, hash, page_no); + } + return bt->err = BTERR_hash, (BtPage)NULL; +} + + +/** + * bt_mappage + * `````````` + * Map a B-tree page onto the current page. + * + * @bt : Pointer to a B-tree. + * @page : Current page. + * @page_no: Page to map onto current. + * Return : Error status. + */ +BTERR bt_mappage (BtDb *bt, BtPage *page, uid page_no) +{ + off64_t off; + + off = page_no << bt->page_bits; + + if (bt->mapped_io) { + bt->err = 0; + *page = bt_hashpage(bt, page_no); + return bt->err; + } + + if (pread(bt->idx, *page, bt->page_size, off) < bt->page_size) { + return bt->err = BTERR_map; + } + + return 0; +} + + +/** + * bt_freepage + * ``````````` + * Deallocate a deleted page, place on allocated page free chain. + * + * @bt : Pointer to a B-tree. + * @page_no: Page to map onto current. + * Return : Error status. + */ +BTERR bt_freepage(BtDb *bt, uid page_no) +{ + /* Obtain delete lock on deleted node. */ + if (bt_lockpage(bt, page_no, BtLockDelete)) { + return bt->err; + } + + /* Obtain write lock on deleted node. */ + if (bt_lockpage(bt, page_no, BtLockWrite)) { + return bt->err; + } + + if (bt_mappage (bt, &bt->temp, page_no)) { + return bt->err; + } + + /* Lock allocation page. */ + if (bt_lockpage(bt, ALLOC_page, BtLockWrite)) { + return bt->err; + } + + if (bt_mappage (bt, &bt->alloc, ALLOC_page)) { + return bt->err; + } + + /* Store chain in second right. */ + bt_putid(bt->temp->right, bt_getid(bt->alloc[1].right)); + bt_putid(bt->alloc[1].right, page_no); + + if (bt_update(bt, bt->alloc, ALLOC_page)) { + return bt->err; + } + if (bt_update(bt, bt->temp, page_no)) { + return bt->err; + } + + /* Unlock page zero. */ + if (bt_unlockpage(bt, ALLOC_page, BtLockWrite)) { + return bt->err; + } + + /* Remove write lock on deleted node. */ + if (bt_unlockpage(bt, page_no, BtLockWrite)) { + return bt->err; + } + + /* Remove delete lock on deleted node. */ + if (bt_unlockpage(bt, page_no, BtLockDelete)) { + return bt->err; + } + + return 0; +} + + +// allocate a new page and write page into it +/** + * bt_newpage + * `````````` + * Allocate a new page and write the page into it. + * + * @bt : Pointer to a B-tree. + * @page : Page to be written into new page. + * Return: Page number of the newly-allocated page. + */ +uid bt_newpage(BtDb *bt, BtPage page) +{ + uid new_page; + char *pmap; + int reuse; + + /* Lock page zero. */ + if (bt_lockpage(bt, ALLOC_page, BtLockWrite)) { + return 0; + } + + if (bt_mappage(bt, &bt->alloc, ALLOC_page)) { + return 0; + } + + /* + * Use empty chain first, else + * allocate an empty page. + */ + if ((new_page = bt_getid(bt->alloc[1].right))) { + if (bt_mappage(bt, &bt->temp, new_page)) { + return 0; // don't unlock on error + } + bt_putid(bt->alloc[1].right, bt_getid(bt->temp->right)); + reuse = 1; + } else { + new_page = bt_getid(bt->alloc->right); + bt_putid(bt->alloc->right, new_page+1); + reuse = 0; + } + + if (bt_update(bt, bt->alloc, ALLOC_page)) { + return 0; /* don't unlock on error */ + } + + /* Unlock page zero. */ + if (bt_unlockpage(bt, ALLOC_page, BtLockWrite)) { + return 0; + } + + if (!bt->mapped_io) { + if (bt_update(bt, page, new_page)) { + return 0; /* don't unlock on error */ + } + + return new_page; + } + + if (pwrite(bt->idx, page, bt->page_size, new_page << bt->page_bits) < bt->page_size) { + return bt->err = BTERR_wrt, 0; + } + + /* + * If writing the first page of a hash block, + * zero the last page in the block. + */ + if (!reuse && bt->hashmask > 0 && (new_page & bt->hashmask) == 0) { + /* Use temporary buffer to write zeros. */ + memset(bt->temp, 0, bt->page_size); + if (pwrite(bt->idx, bt->temp, bt->page_size, (new_page|bt->hashmask)<<bt->page_bits) < bt->page_size) { + return bt->err = BTERR_wrt, 0; + } + } + + return new_page; +} + + +/** + * bt_findslot + * ``````````` + * Find slot in page for given key at a given level. + * + * @bt : Pointer to a B-tree. + * @key: Key value to be inserted. + * @len: Length of the key. + * Return: code. + */ +int bt_findslot (BtDb *bt, unsigned char *key, uint len) +{ + uint diff; + uint higher; + uint slot; + uint low; + uint good; + + higher = bt->page->cnt; + low = 1; + good = 0; + + /* Make stopper key an infinite fence value. */ + if (bt_getid (bt->page->right)) { + higher++; + } else { + good++; + } + + /* + * 'low' is the next candidate, 'higher' has + * already been tested as >= the given key; + * the loop ends when their values match. + */ + while ((diff = higher - low)) { + slot = low + (diff >> 1); + + if (keycmp(keyptr(bt->page, slot), key, len) < 0) { + low = slot + 1; + } else { + higher = slot, good++; + } + } + + /* Return zero if key is on the right link page. */ + return good ? higher : 0; +} + + +/** + * bt_loadpage + * ``````````` + * Find and load a page at given level for a given key. + * + * @bt : Pointer to a B-tree. + * @key: Key to lookup page with. + * @len: Length of @key. + * @lvl: Page level. + * @lock: Lock request. + * Return: code. + * + * NOTE + * Leaves page read or write locked as requested. + */ +int bt_loadpage (BtDb *bt, unsigned char *key, uint len, uint lvl, uint lock) +{ + uid page_no; + uid prevpage; + uint drill; + uint slot; + uint mode; + uint prevmode; + + page_no = ROOT_page; + prevpage = 0; + drill = 0xff; + + /* Start at the root of the B-tree and drill down. */ + do { + /* Determine lock mode of drill level. */ + mode = (lock == BtLockWrite) && (drill == lvl) ? BtLockWrite : BtLockRead; + + bt->page_no = page_no; + + /* Obtain access lock using lock chaining. */ + if (page_no > ROOT_page) { + if (bt_lockpage(bt, bt->page_no, BtLockAccess)) { + return 0; + } + } + + if (prevpage) { + if (bt_unlockpage(bt, prevpage, prevmode)) { + return 0; + } + } + + /* Obtain read lock using lock chaining. */ + if (bt_lockpage(bt, bt->page_no, mode)) { + return 0; + } + + if (page_no > ROOT_page) { + if (bt_unlockpage(bt, bt->page_no, BtLockAccess)) { + return 0; + } + } + + /* Map/obtain page contents. */ + if (bt_mappage (bt, &bt->page, page_no)) { + return 0; + } + + + /* + * Re-read and re-lock root after determining + * actual level of root. + */ + if (bt->page->lvl != drill) { + if (bt->page_no != ROOT_page) { + return bt->err = BTERR_struct, 0; + } + + drill = bt->page->lvl; + + if (lock == BtLockWrite && drill == lvl) { + if (bt_unlockpage(bt, page_no, mode)) { + return 0; + } + } else { + continue; + } + } + + /* + * Find the key on a page at this level, and + * descend to the requested level. + */ + if (!bt->page->kill && (slot = bt_findslot (bt, key, len))) { + if (drill == lvl) { + return slot; + } + + while (slotptr(bt->page, slot)->dead) { + if (slot++ < bt->page->cnt) { + continue; + } else { + page_no = bt_getid(bt->page->right); + goto slideright; + } + } + + page_no = bt_getid(slotptr(bt->page, slot)->id); + drill--; + + /* + * Or slide right into the next page + * (slide left from deleted page). + */ + } else { + page_no = bt_getid(bt->page->right); + } + + /* + * Continue down/right using overlapping locks + * to protect pages being killed or split. + */ +slideright: + prevpage = bt->page_no; + prevmode = mode; + + } while (page_no); + + /* Return error on end of right chain. */ + bt->err = BTERR_struct; + return 0; // return error +} + + +/** + * bt_deletekey + * ```````````` + * Find and delete key on page by marking delete flag bit. + * When page becomes empty, delete it. + * + * @bt : Pointer to a B-tree. + * @key : Key to delete. + * @len : Length of @key. + * @lvl : Page level. + * Return: Error status. + */ +BTERR bt_deletekey(BtDb *bt, unsigned char *key, uint len, uint lvl) +{ + unsigned char lowerkey[256]; + unsigned char higherkey[256]; + + uint slot; + uint tod; + uint dirty; + uid page_no; + uid right; + BtKey ptr; + + dirty = 0; + + if ((slot = bt_loadpage (bt, key, len, lvl, BtLockWrite))) { + ptr = keyptr(bt->page, slot); + } else { + return bt->err; + } + + /* If key is found delete it, otherwise ignore request */ + if (!keycmp (ptr, key, len)) { + if (slotptr(bt->page, slot)->dead == 0) { + dirty = slotptr(bt->page,slot)->dead = 1, bt->page->act--; + } + } + + /* return if page is not empty, or it has no right sibling */ + right = bt_getid(bt->page->right); + page_no = bt->page_no; + + if (!right || bt->page->act) { + if (dirty && bt_update(bt, bt->page, page_no)) { + return bt->err; + } + } else { + return bt_unlockpage(bt, page_no, BtLockWrite); + } + + /* obtain Parent lock over write lock */ + if (bt_lockpage(bt, page_no, BtLockParent)) { + return bt->err; + } + + /* cache copy of key to delete */ + ptr = keyptr(bt->page, bt->page->cnt); + memcpy(lowerkey, ptr, ptr->len + 1); + + /* lock and map right page */ + if (bt_lockpage(bt, right, BtLockWrite)) { + return bt->err; + } + + if (bt_mappage (bt, &bt->temp, right)) { + return bt->err; + } + + /* pull contents of next page into current empty page */ + memcpy(bt->page, bt->temp, bt->page_size); + + /* cache copy of key to update */ + ptr = keyptr(bt->temp, bt->temp->cnt); + memcpy(higherkey, ptr, ptr->len + 1); + + /* + * Mark right page as deleted and point it to left page + * until we can post updates at higher level. + */ + bt_putid(bt->temp->right, page_no); + bt->temp->kill = 1; + bt->temp->cnt = 0; + + if (bt_update(bt, bt->page, page_no)) { + return bt->err; + } + + if (bt_update(bt, bt->temp, right)) { + return bt->err; + } + + if (bt_unlockpage(bt, right, BtLockWrite)) { + return bt->err; + } + if (bt_unlockpage(bt, page_no, BtLockWrite)) { + return bt->err; + } + + /* delete old lower key to consolidated node */ + if (bt_deletekey(bt, lowerkey + 1, *lowerkey, lvl + 1)) { + return bt->err; + } + + /* redirect higher key directly to consolidated node */ + tod = (uint)time(NULL); + + if (bt_insertkey(bt, higherkey+1, *higherkey, lvl + 1, page_no, tod)) { + return bt->err; + } + + /* + * Obtain write lock and add right block + * to the free chain. + */ + if (bt_freepage (bt, right)) { + return bt->err; + } + + /* remove ParentModify lock */ + if (bt_unlockpage(bt, page_no, BtLockParent)) { + return bt->err; + } + + return 0; +} + + +/** + * bt_findkey + * `````````` + * Find key in leaf level and return row-id. + * + * @bt : Pointer to a B-tree. + * @key : Key to search for. + * @len : Length of @key. + * Return: Row id of match. + */ +uint bt_findkey(BtDb *bt, unsigned char *key, uint len) +{ + uint slot; + BtKey ptr; + + if ((slot = bt_loadpage (bt, key, len, 0, BtLockRead))) { + ptr = keyptr(bt->page, slot); + } else { + return 0; + } + + /* If key exists, return id. */ + if (slotptr(bt->page,slot)->dead || memcmp(ptr->key, key, len)) { + slot = 0; + } + + memcpy(bt->cursor, bt->page, bt->page_size); + + if (bt_unlockpage(bt, bt->page_no, BtLockRead)) { + return 0; + } + + return slot; +} + + +/** + * bt_cleanpage + * ```````````` + * Remove any keys flagged for deletion in a B-tree. + * + * @bt : Pointer to B-tree. + * Return: Nothing. + */ +void bt_cleanpage(BtDb *bt) +{ + uint nxt; + BtPage page; + uint cnt; + uint idx; + uint max; + BtKey key; + + nxt = bt->page_size; + page = bt->page; + cnt = 0; + idx = 0; + max = page->cnt; + + memcpy(bt->frame, page, bt->page_size); + + /* Skip page info and set rest of page to zero. */ + memset(page+1, 0, bt->page_size - sizeof(*page)); + page->act = 0; + + /* Try cleaning up the page first. */ + while (cnt++ < max) { + /* Always leave the fence key in the list. */ + if (cnt < max && slotptr(bt->frame,cnt)->dead) { + continue; + } + + /* Copy the key. */ + key = keyptr(bt->frame, cnt); + nxt -= key->len + 1; + memcpy((unsigned char *)page + nxt, key, key->len + 1); + + /* Copy the slot. */ + memcpy(slotptr(page, ++idx)->id, slotptr(bt->frame, cnt)->id, BtId); + + if (!(slotptr(page, idx)->dead = slotptr(bt->frame, cnt)->dead)) { + page->act++; + } + slotptr(page, idx)->tod = slotptr(bt->frame, cnt)->tod; + slotptr(page, idx)->off = nxt; + } + page->min = nxt; + page->cnt = idx; +} + + +/** + * bt_splitroot + * ```````````` + * Split the root node and raise the height of the B-tree. + * + * @bt : Pointer to a B-tree. + * @newkey : Key of new root node. + * @oldkey : Key of old root node. + * @page_no2: Page number of @newkey ? + * Return : Error status. + */ +BTERR bt_splitroot(BtDb *bt, unsigned char *newkey, unsigned char *oldkey, uid page_no2) +{ + uint nxt; + BtPage root; + uid new_page; + + nxt = bt->page_size; + root = bt->page; + + /* + * Obtain an empty page to use, and copy the current + * root node contents into it. + */ + if (!(new_page = bt_newpage(bt, root))) { + return bt->err; + } + + /* + * Preserve the page info at the bottom, + * and set the rest of the bytes to zero. + */ + memset(root+1, 0, bt->page_size - sizeof(*root)); + + /* insert first key on newroot page */ + nxt -= *newkey + 1; + memcpy((unsigned char *)root + nxt, newkey, *newkey + 1); + bt_putid(slotptr(root, 1)->id, new_page); + slotptr(root, 1)->off = nxt; + + /* + * Insert second key on newroot page, + * and increase the root height. + */ + nxt -= *oldkey + 1; + memcpy((unsigned char *)root + nxt, oldkey, *oldkey + 1); + bt_putid(slotptr(root, 2)->id, page_no2); + slotptr(root, 2)->off = nxt; + + bt_putid(root->right, 0); + root->min = nxt; /* reset lowest used offset and key count */ + root->cnt = 2; + root->act = 2; + root->lvl++; + + /* Update and release root (bt->page) */ + if (bt_update(bt, root, bt->page_no)) { + return bt->err; + } + + return bt_unlockpage(bt, bt->page_no, BtLockWrite); +} + + +/** + * bt_splitpage + * ```````````` + * Split an already-locked full node. Return it unlocked. + * + * @bt : Pointer to B-tree. + * @len: + * Return: Error status. + */ +BTERR bt_splitpage(BtDb *bt, uint len) +{ + uint cnt; + uint idx; + uint max; + uint nxt; + unsigned char oldkey[256]; + unsigned char lowerkey[256]; + uid page_no; + uid right; + BtPage page; + uint lvl; + uid new_page; + BtKey key; + uint tod; + + cnt = 0; + idx = 0; + nxt = bt->page_size; + page_no = bt->page_no, right; + page = bt->page; + lvl = page->lvl; + + /* Perform cleanup. */ + bt_cleanpage(bt); + + /* Return if there is enough space now. */ + if (page->min >= (page->cnt + 1) * sizeof(BtSlot) + sizeof(*page) + len + 1) { + if (bt_update(bt, page, page_no)) { + return bt->err; + } + return bt_unlockpage(bt, page_no, BtLockWrite); + } + + /* + * Split higher half of keys to bt->frame. + * The last key (fence key) might be dead. + */ + tod = (uint)time(NULL); + + memset(bt->frame, 0, bt->page_size); + max = (int)page->cnt; + cnt = max / 2; + idx = 0; + + while (cnt++ < max) { + key = keyptr(page, cnt); + nxt -= key->len + 1; + memcpy((unsigned char *)bt->frame + nxt, key, key->len + 1); + memcpy(slotptr(bt->frame,++idx)->id, slotptr(page,cnt)->id, BtId); + + if (!(slotptr(bt->frame, idx)->dead = slotptr(page, cnt)->dead)) { + bt->frame->act++; + } + slotptr(bt->frame, idx)->tod = slotptr(page, cnt)->tod; + slotptr(bt->frame, idx)->off = nxt; + } + + /* remember existing fence key for new page to the right */ + memcpy(oldkey, key, key->len + 1); + + bt->frame->bits = bt->page_bits; + bt->frame->min = nxt; + bt->frame->cnt = idx; + bt->frame->lvl = lvl; + + /* Link right node. */ + if (page_no > ROOT_page) { + right = bt_getid (page->right); + bt_putid(bt->frame->right, right); + } + + /* Get new free page and write frame into it. */ + if (!(new_page = bt_newpage(bt, bt->frame))) { + return bt->err; + } + + /* Update lower keys to continue in old page. */ + memcpy(bt->frame, page, bt->page_size); + memset(page+1, 0, bt->page_size - sizeof(*page)); + nxt = bt->page_size; + page->act = 0; + cnt = 0; + idx = 0; + + /* + * Assemble the pages of smaller keys + * (they're all active keys). + */ + while (cnt++ < max / 2) { + key = keyptr(bt->frame, cnt); + nxt -= key->len + 1; + memcpy((unsigned char *)page + nxt, key, key->len + 1); + memcpy(slotptr(page,++idx)->id, slotptr(bt->frame,cnt)->id, BtId); + slotptr(page, idx)->tod = slotptr(bt->frame, cnt)->tod; + slotptr(page, idx)->off = nxt; + page->act++; + } + + /* Remember fence key for old page. */ + memcpy(lowerkey, key, key->len + 1); + bt_putid(page->right, new_page); + page->min = nxt; + page->cnt = idx; + + /* If current page is the root page, split it. */ + if (page_no == ROOT_page) { + return bt_splitroot(bt, lowerkey, oldkey, new_page); + } + + /* Update left (containing) node. */ + if (bt_update(bt, page, page_no)) { + return bt->err; + } + + /* + * Obtain Parent/Write locks for + * the left and right node pages. + */ + if (bt_lockpage (bt, new_page, BtLockParent)) { + return bt->err; + } + + if (bt_lockpage (bt, page_no, BtLockParent)) { + return bt->err; + } + + /* Release write lock on left page. */ + if (bt_unlockpage (bt, page_no, BtLockWrite)) { + return bt->err; + } + + /* Insert new fence for re-formulated left block. */ + if (bt_insertkey(bt, lowerkey+1, *lowerkey, lvl + 1, page_no, tod)) { + return bt->err; + } + + /* Fix old fence for newly-allocated right block page. */ + if (bt_insertkey(bt, oldkey+1, *oldkey, lvl + 1, new_page, tod)) { + return bt->err; + } + + /* Release Parent/Write locks. */ + if (bt_unlockpage (bt, new_page, BtLockParent)) { + return bt->err; + } + if (bt_unlockpage (bt, page_no, BtLockParent)) { + return bt->err; + } + + return 0; +} + + +/** + * bt_insertkey + * ```````````` + * Insert new key into the B-tree at the requested level. + * + * @bt : Pointer to B-tree. + * @key : Key to insert. + * @len : Length of @key. + * @lvl : Level to insert at. + * @id : Lock id. + * @tod : ? + * Return: Error status. + * + * NOTES + * Level zero pages are leaf pages, and are unlocked on exit. + * Interior nodes remain locked after exit. + */ +BTERR bt_insertkey (BtDb *bt, unsigned char *key, uint len, uint lvl, uid id, uint tod) +{ + uint slot; + uint idx; + BtPage page; + BtKey ptr; + + while (1) { + if ((slot = bt_loadpage(bt, key, len, lvl, BtLockWrite))) { + ptr = keyptr(bt->page, slot); + } else { + if (!bt->err) { + bt->err = BTERR_ovflw; + } + return bt->err; + } + + /* If key already exists, update id and return. */ + page = bt->page; + + if (!keycmp (ptr, key, len)) { + slotptr(page, slot)->dead = 0; + slotptr(page, slot)->tod = tod; + + bt_putid(slotptr(page,slot)->id, id); + + if (bt_update(bt, bt->page, bt->page_no)) { + return bt->err; + } + return bt_unlockpage(bt, bt->page_no, BtLockWrite); + } + + /* Check if page has enough space to contain the new key. */ + if (page->min >= (page->cnt + 1) * sizeof(BtSlot) + sizeof(*page) + len + 1) { + break; + } + if (bt_splitpage (bt, len)) { + return bt->err; + } + } + + /* calculate next available slot and copy key into page */ + page->min -= len + 1; /* reset lowest used offset */ + ((unsigned char *)page)[page->min] = len; + memcpy((unsigned char *)page + page->min +1, key, len); + + for (idx = slot; idx < page->cnt; idx++) { + if (slotptr(page, idx)->dead) { + break; + } + } + + /* + * Now insert key into array before slot, + * preserving the fence slot. + */ + if (idx == page->cnt) { + idx++, page->cnt++; + } + + page->act++; + + while (idx > slot) { + *slotptr(page, idx) = *slotptr(page, idx -1), idx--; + } + + bt_putid(slotptr(page,slot)->id, id); + slotptr(page, slot)->off = page->min; + slotptr(page, slot)->tod = tod; + slotptr(page, slot)->dead = 0; + + if (bt_update(bt, bt->page, bt->page_no)) { + return bt->err; + } + + return bt_unlockpage(bt, bt->page_no, BtLockWrite); +} + + +/** + * bt_startkey + * ``````````` + * Cache page of keys into cursor and return starting slot for given key. + * + * @bt : Pointer to a B-tree. + * @key : Key to find starting slot for. + * @len : Length of @key. + * Return: Starting slot of @key. + */ +uint bt_startkey(BtDb *bt, unsigned char *key, uint len) +{ + uint slot; + + /* cache page for retrieval */ + if ((slot = bt_loadpage (bt, key, len, 0, BtLockRead))) { + memcpy(bt->cursor, bt->page, bt->page_size); + } + + bt->cursor_page = bt->page_no; + + if (bt_unlockpage(bt, bt->page_no, BtLockRead)) { + return 0; + } + + return slot; +} + + +/** + * bt_nextkey + * `````````` + * Return next slot for cursor page, or slide cursor right into next page. + * + * @bt : Pointer to a B-tree. + * @slot : Current slot. + * Return: Next slot. + */ +uint bt_nextkey(BtDb *bt, uint slot) +{ + off64_t right; + + do { + right = bt_getid(bt->cursor->right); + while (slot++ < bt->cursor->cnt) { + if (slotptr(bt->cursor,slot)->dead) { + continue; + } else if (right || (slot < bt->cursor->cnt)) { + return slot; + } else { + break; + } + } + + if (!right) { + break; + } + + bt->cursor_page = right; + + if (bt_lockpage(bt, right,BtLockRead)) { + return 0; + } + if (bt_mappage (bt, &bt->page, right)) { + break; + } + + memcpy(bt->cursor, bt->page, bt->page_size); + + if (bt_unlockpage(bt, right, BtLockRead)) { + return 0; + } + + slot = 0; + + } while (1); + + return bt->err = 0; +} + + +/** + * bt_key + * `````` + * Get a key, given a slot number. + * + * @bt : Pointer to a B-tree. + * @slot : Slot number. + * Return: Pointer to a key. + */ +BtKey bt_key(BtDb *bt, uint slot) +{ + return keyptr(bt->cursor, slot); +} + + +/** + * bt_uid + * `````` + * Get a uid, given a slot number. + * + * @bt : Pointer to a B-tree. + * @slot : Slot number. + * Return: uid value. + */ +uid bt_uid(BtDb *bt, uint slot) +{ + return bt_getid(slotptr(bt->cursor,slot)->id); +} + + +/** + * bt_tod + * `````` + * Get a tod, given a slot number. + * + * @bt : Pointer to a B-tree. + * @slot : Slot number. + * Return: tod value. + */ +uint bt_tod(BtDb *bt, uint slot) +{ + return slotptr(bt->cursor,slot)->tod; +} + diff --git a/abst/btree.h b/abst/btree.h new file mode 100644 index 0000000..726d8e2 --- /dev/null +++ b/abst/btree.h @@ -0,0 +1,164 @@ +#ifndef _B_TREE_H +#define _B_TREE_H + + +typedef unsigned long long uid; +//typedef unsigned long long off64_t; +typedef unsigned short ushort; +typedef unsigned int uint; + + +#define BT_ro 0x6f72 /* Read-only */ +#define BT_rw 0x7772 /* Read-write */ +#define BT_fl 0x6c66 /* File lock */ + +#define BT_maxbits 24 /* maximum page size (bits) */ +#define BT_minbits 9 /* minimum page size (bits) */ +#define BT_minpage (1 << BT_minbits) /* minimum page size (bits) */ + +#define BT_hashsize 512 /* size of hash index for page cache (bits) */ +#define BT_hashprime 8191 /* prime number for hash seed */ + +/* + * There are five lock types for each node, in two independent sets: + * + * 1{1} AccessIntent : Sharable. Going to read the node. + * Incompatible with NodeDelete. + * + * 2{1} NodeDelete : Exclusive. About to release the node. + * Incompatible with AccessIntent. + * + * 3{2} ReadLock : Sharable. Read the node. + * Incompatible with WriteLock. + * + * 4{2} WriteLock : Exclusive. Modify the node. + * Incompatible with ReadLock and other WriteLocks. + * + * 5{3} ParentModify : Exclusive. Change the parent's keys. + * Incompatible with another ParentModify. + */ +typedef enum { + BtLockAccess, + BtLockDelete, + BtLockRead, + BtLockWrite, + BtLockParent +} BtLock; + + +/* Page length and key pointers. */ +#define BtId 6 + +/* + * Page key slots. + * + * If BT_maxbits is <= 15, you can save 4 bytes for each key + * stored by making the first two uints into ushorts. You can + * also save 4 bytes by removing the tod field from the key. + * + * Keys are marked dead, but remain on the page until cleanup + * is called. The fence key (highest key) for the page is + * always present, even after cleanup. + */ + +typedef struct { + uint off:BT_maxbits; /* page offset for key start */ + uint dead:1; /* set for deleted key */ + uint tod; /* time-stamp for key */ + unsigned char id[BtId]; /* id associated with key */ +} BtSlot; + + +/* + * The key structure occupiese space at the upper end of each + * page. It consists of a length byte followed by the value + * bytes. + */ +typedef struct { + unsigned char len; + unsigned char key[1]; +} *BtKey; + + +/* + * The first part of an index page. It is immediately + * followed by the BtSlot array of keys. + */ +typedef struct { + uint cnt; /* count of keys in page */ + uint act; /* count of active keys */ + uint min; /* next key offset */ + unsigned char bits; /* page size in bits */ + unsigned char lvl:7; /* level of page */ + unsigned char kill:1; /* page is being deleted */ + unsigned char right[BtId]; /* page number to right */ +} *BtPage; + + +/* The memory mapping hash table entry. */ +typedef struct { + BtPage page; /* mapped page pointer */ + uid page_no; /* mapped page number */ + void *lruprev; /* least recently used previous cache block */ + void *lrunext; /* lru next cache block */ + void *hashprev; /* previous cache block for the same hash idx */ + void *hashnext; /* next cache block for the same hash idx */ +} BtHash; + + +/* + * The object structure for B-tree access. + */ +typedef struct _BtDb { + uint page_size; /* each page size */ + uint page_bits; /* each page size in bits */ + uid page_no; /* current page number */ + uid cursor_page; /* current cursor page number */ + int err; + uint mode; /* read-write mode */ + uint mapped_io; /* use memory mapping */ + BtPage temp; /* temporary frame buffer (memory mapped/file IO) */ + BtPage alloc; /* frame buffer for alloc page (page 0) */ + BtPage cursor; /* cached frame for start/next (never mapped) */ + BtPage frame; /* spare frame for the page split (never mapped) */ + BtPage page; /* current page */ + int idx; + unsigned char *mem; /* frame, cursor, page memory buffer */ + int nodecnt; /* highest page cache node in use */ + int nodemax; /* highest page cache node allocated */ + int hashmask; /* number of hash headers in cache - 1 */ + BtHash *lrufirst; /* lru list head */ + BtHash *lrulast; /* lru list tail */ + ushort cache[BT_hashsize]; /* hash index for cache */ + BtHash nodes[1]; /* page cache follows */ +} BtDb; + + +/* Error status codes */ +typedef enum { + BTERR_ok = 0, + BTERR_struct, + BTERR_ovflw, + BTERR_lock, + BTERR_map, + BTERR_wrt, + BTERR_hash +} BTERR; + + +/* B-Tree functions. */ +extern void bt_close (BtDb *bt); +extern BtDb *bt_open (char *name, uint mode, uint bits, uint cacheblk); +extern BTERR bt_insertkey (BtDb *bt, unsigned char *key, uint len, uint lvl, uid id, uint tod); +extern BTERR bt_deletekey (BtDb *bt, unsigned char *key, uint len, uint lvl); +extern uint bt_findkey (BtDb *bt, unsigned char *key, uint len); +extern uint bt_startkey (BtDb *bt, unsigned char *key, uint len); +extern uint bt_nextkey (BtDb *bt, uint slot); + +/* Helper functions. */ +extern BtKey bt_key (BtDb *bt, uint slot); +extern uid bt_uid (BtDb *bt, uint slot); +extern uint bt_tod (BtDb *bt, uint slot); + + +#endif diff --git a/abst/hash.h b/abst/hash.h new file mode 100644 index 0000000..9a93e20 --- /dev/null +++ b/abst/hash.h @@ -0,0 +1,93 @@ + +#define hash_const 5381 + +#define hash_1(str, h) (((h << 5) + h) + str[0]) +#define hash_2(str, h) (((h << 5) + h) + str[1]) +#define hash_3(str, h) (((h << 5) + h) + str[2]) +//#define hash_4(str, h) (((h << 5) + h) + str[3]) + +#define compile_hash(str) (hash_3(str, (hash_2(str, hash_1(str, hash_const))))) + +#define _opt(str) (compile_hash(str)) + +/* +#define COLOR_TABLE \ +X(red, "red") \ +X(green, "green") \ +X(blue, "blue") +*/ + + + + + + + + + + + + + +/****************************************************************************** + * djb2_hash + * ````````` + * HISTORY + * This algorithm (k=33) was first reported by Dan Bernstein many years + * ago in comp.lang.c. Another version of this algorithm (now favored by + * bernstein) uses XOR: + * + * hash(i) = hash(i - 1) * 33 ^ str[i]; + * + * The magic of number 33 (why it works better than many other constants, + * prime or not) has never been adequately explained. + * + ******************************************************************************/ +static inline unsigned djb2_hash(const char *str) +{ + unsigned hash; + int c; + + hash = 5381; + + while ((c = (unsigned char)*str++)) { + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + } + + return (unsigned) hash; +} + +/****************************************************************************** + * sdbm_hash + * ````````` + * HISTORY + * This algorithm was created for sdbm (a public-domain reimplementation + * of ndbm) database library. It was found to do well in scrambling bits, + * causing better distribution of the keys and fewer splits. it also + * happens to be a good general hashing function with good distribution. + * + * The actual function is + * + * hash(i) = hash(i - 1) * 65599 + str[i]; + * + * What is included below is the faster version used in gawk. [there is + * even a faster, duff-device version] the magic constant 65599 was picked + * out of thin air while experimenting with different constants, and turns + * out to be a prime. this is one of the algorithms used in berkeley db + * (see sleepycat) and elsewhere. + * + ******************************************************************************/ +static inline unsigned sdbm_hash(const char *str) +{ + unsigned hash; + int c; + + hash = 0; + + while ((c = (unsigned char)*str++)) { + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return (unsigned) hash; +} + diff --git a/abst/list.h b/abst/list.h new file mode 100644 index 0000000..f14ae93 --- /dev/null +++ b/abst/list.h @@ -0,0 +1,510 @@ + +/* + * Augmented version of the circular linked list header authored by + * Rusty Russell <rusty@rustcorp.com.au>, who implemented this approach + * in the Linux kernel. See http://ccodearchive.net/info/list.html. + * + * The original is licensed under LGPLv2.1+ + */ + +#ifndef CCAN_LIST_H +#define CCAN_LIST_H +#include <stdbool.h> +#include <assert.h> +#include "../util/container_of.h" + + +//#define pop(l,t) list_top(l, t, node) +//#define tail(l,t) list_tail(l, t, node) +//#define push(l,m) list_add(l, (struct list_node *)m) +//#define push_tail(l,m) list_add_tail(l, (struct list_node *)m) + + +#define list(type, members) \ + type { \ + struct list_node node; \ + members \ + } + + +/** + * struct list_node + * ```````````````` + * An entry in a doubly-linked list + * + * @next: next entry (self if empty) + * @prev: previous entry (self if empty) + * + * e.g. + * struct child { + * const char *name; + * struct list_node list; + * }; + */ +struct list_node { + struct list_node *next; + struct list_node *prev; +}; + + +/** + * struct list_t + * ````````````` + * The head of a doubly-linked list + * + * @h: the list_t (containing next and prev pointers) + * + * e.g. + * struct parent { + * const char *name; + * struct list_t children; + * unsigned int num_children; + * }; + */ +struct list_t +{ + struct list_node n; +}; + + +/** + * list_check + * `````````` + * Check head of a list for consistency + * + * @h : the list_t + * @abortstr: the location to print on aborting, or NULL. + * + * NOTES + * Because list_nodes have redundant information, consistency + * checking between the back and forward links can be done. + * This is useful as a debugging check. If @abortstr is non-NULL, + * that will be printed in a diagnostic if the list is inconsistent, + * and the function will abort. + * + * Returns the list head if the list is consistent, NULL if not + * (it can never return NULL if @abortstr is set). + * + * See also: list_check_node() + */ +struct list_t *list_check(const struct list_t *h, const char *abortstr); + + +/** + * list_check_node + * ``````````````` + * Check node of a list for consistency. + * + * @n : the list_node + * @abortstr: the location to print on aborting, or NULL. + * + * Check consistency of the list node is in (it must be in one). + * + * See also: list_check() + */ +struct list_node *list_check_node(const struct list_node *n, const char *abortstr); + +#ifdef CCAN_LIST_DEBUG +#define list_debug(h) list_check((h), __func__) +#define list_debug_node(n) list_check_node((n), __func__) +#else +#define list_debug(h) (h) +#define list_debug_node(n) (n) +#endif + + +/** + * LIST_INIT + * ````````` + * Explicit initializer for an empty list_t + * + * @name: the name of the list. + * + * e.g. + * static struct list_t my_list = list_t_INIT(my_list); + */ +#define LIST_INIT(name) { { &name.n, &name.n } } + + +/** + * LIST_INIT + * ````````` + * Define and initialize an empty list_t + * + * @name: the name of the list. + * + * USAGE + * The list_t macro defines a list_t and initializes it to + * an empty list. It can be prepended by "static" to define a + * static list_t. + * + * e.g. + * static list_t(my_global_list); + */ +#define LINKED_LIST(name) \ + struct list_t name = LIST_INIT(name) + +/** + * list_init + * ````````` + * Initialize a list_t + * + * @h: the list_t to set to the empty list + */ +static inline void list_init(struct list_t *h) +{ + h->n.next = h->n.prev = &h->n; +} + +/** + * list_add + * ```````` + * Add an entry at the start of a linked list. + * + * @h: the list_t to add the node to + * @n: the list_node to add to the list. + * + * NOTE + * The list_node does not need to be initialized; it will be overwritten. + */ +static inline void list_add(struct list_t *h, struct list_node *n) +{ + n->next = h->n.next; + n->prev = &h->n; + h->n.next->prev = n; + h->n.next = n; + (void)list_debug(h); +} + +/** + * list_add_tail + * ````````````` + * Add an entry at the end of a linked list. + * + * @h: the list_t to add the node to + * @n: the list_node to add to the list. + * + * NOTE + * The list_node does not need to be initialized; it will be overwritten. + */ +static inline void list_add_tail(struct list_t *h, struct list_node *n) +{ + n->next = &h->n; + n->prev = h->n.prev; + h->n.prev->next = n; + h->n.prev = n; + (void)list_debug(h); +} + +/** + * list_empty + * `````````` + * Is a list empty? + * + * @h : the list_t + * Return: true if list is empty, else false. + */ +static inline bool list_empty(const struct list_t *h) +{ + (void)list_debug(h); + return h->n.next == &h->n; +} + + +/** + * list_del + * ```````` + * Delete an entry from an (unknown) linked list. + * + * @n: the list_node to delete from the list. + * + * NOTE + * This leaves @n in an undefined state; it can be added to + * another list, but not deleted again. + */ +static inline void list_del(struct list_node *n) +{ + (void)list_debug_node(n); + n->next->prev = n->prev; + n->prev->next = n->next; +#ifdef CCAN_LIST_DEBUG + /* Catch use-after-del. */ + n->next = n->prev = NULL; +#endif +} + +/** + * list_del_from + * ````````````` + * Delete an entry from a known linked list. + * + * @h: the list_t the node is in. + * @n: the list_node to delete from the list. + * + * NOTE + * This explicitly indicates which list a node is expected to be in, + * which is better documentation and can catch more bugs. + */ +static inline void list_del_from(struct list_t *h, struct list_node *n) +{ +#ifdef CCAN_LIST_DEBUG + { + /* Thorough check: make sure it was in list! */ + struct list_node *i; + for (i = h->n.next; i != n; i = i->next) + assert(i != &h->n); + } +#endif /* CCAN_LIST_DEBUG */ + + /* Quick test that catches a surprising number of bugs. */ + assert(!list_empty(h)); + list_del(n); +} + +/** + * list_entry + * `````````` + * Convert a list_node back into the structure containing it. + * + * @n : the list_node + * @type : the type of the entry + * @member: the list_node member of the type + */ +#define list_entry(n, type, member) container_of(n, type, member) + +/** + * list_top + * ```````` + * Get the first entry in a list + * + * @h : the list_t + * @type : the type of the entry + * @member: the list_node member of the type + * + * If the list is empty, returns NULL. + */ +#define list_top(h, type, member) \ + ((type *)list_top_((h), list_off_(type, member))) + +static inline const void *list_top_(const struct list_t *h, size_t off) +{ + if (list_empty(h)) + return NULL; + return (const char *)h->n.next - off; +} + +/** + * list_tail + * ````````` + * Get the last entry in a list + * + * @h : the list_t + * @type : the type of the entry + * @member: the list_node member of the type + * + * If the list is empty, returns NULL. + */ +#define list_tail(h, type, member) \ + ((type *)list_tail_((h), list_off_(type, member))) + +static inline const void *list_tail_(const struct list_t *h, size_t off) +{ + if (list_empty(h)) + return NULL; + return (const char *)h->n.prev - off; +} + +/** + * list_for_each + * ````````````` + * Iterate through a list. + * + * @h : the list_t (warning: evaluated multiple times!) + * @i : the structure containing the list_node + * @member: the list_node member of the structure + * + * USAGE + * This is a convenient wrapper to iterate @i over the + * entire list. It's a for loop, so you can break and + * continue as normal. + * + * e.g. + * list_for_each(&parent->children, child, list) + * printf("Name: %s\n", child->name); + */ +#define list_for_each(h, i, member) \ + list_for_each_off(h, i, list_off_var_(i, member)) + +/** + * list_for_each_rev + * ````````````````` + * Iterate through a list backwards. + * + * @h : the list_t + * @i : the structure containing the list_node + * @member: the list_node member of the structure + * + * USAGE + * This is a convenient wrapper to iterate @i over the + * entire list. It's a for loop, so you can break and + * continue as normal. + */ +#define list_for_each_rev(h, i, member) \ + for (i = container_of_var(list_debug(h)->n.prev, i, member); \ + &i->member != &(h)->n; \ + i = container_of_var(i->member.prev, i, member)) + +/** + * list_for_each_safe + * `````````````````` + * Iterate through a list, maybe during deletion + * + * @h : the list_t + * @i : the structure containing the list_node + * @nxt : the structure containing the list_node + * @member: the list_node member of the structure + * + * USAGE + * This is a convenient wrapper to iterate @i over the + * entire list. It's a for loop, so you can break and + * continue as normal. + * + * The extra variable @nxt is used to hold the next + * element, so you can delete @i from the list. + * + * e.g. + * struct child *next; + * list_for_each_safe(&parent->children, child, next, list) { + * list_del(&child->list); + * parent->num_children--; + * } + */ +#define list_for_each_safe(h, i, nxt, member) \ + list_for_each_safe_off(h, i, nxt, list_off_var_(i, member)) + + +/** + * list_count + * `````````` + * How many nodes are in a list? + * + * @n : will contain the number of items + * @h : the list_t + * @i : the structure containing the list_node + * @member: the list_node member of the structure + * + * USAGE + * Used similar to the list_for_each macro, but modifies + * an integer value n, leaving it equal to the number of + * nodes in the list. Note that n must be initialized to 0. + * + * Added 05-29-2012 + */ +#define list_count(h, i, member, n) \ + n=0; list_for_each_off(h, i, list_off_var_(i, member)) { n++; } + +/** + * list_for_each_off + * ````````````````` + * Iterate through a list of memory regions. + * + * @h : the list_t + * @i : the pointer to a memory region wich contains list node data. + * @off: offset(relative to @i) at which list node data resides. + * + * USAGE + * This is a low-level wrapper to iterate @i over the entire list, used to + * implement all oher, more high-level, for-each constructs. It's a for loop, + * so you can break and continue as normal. + * + * CAVEAT + * Being the low-level macro that it is, this wrapper doesn't know + * nor care about the type of @i. The only assumtion made is that @i points + * to a chunk of memory that at some @offset, relative to @i, contains a + * properly filled `struct node_list' which in turn contains pointers to + * memory chunks and it's turtles all the way down. Whith all that in mind + * remember that given the wrong pointer/offset couple this macro will + * happilly churn all you memory untill SEGFAULT stops it, in other words + * caveat emptor. + * + * It is worth mentioning that one of legitimate use-cases for that wrapper + * is operation on opaque types with known offset for `struct list_node' + * member(preferably 0), because it allows you not to disclose the type of + * @i. + * + * Example: + * list_for_each_off(&parent->children, child, + * offsetof(struct child, list)) + * printf("Name: %s\n", child->name); + */ +#define list_for_each_off(h, i, off) \ + for (i = list_node_to_off_(list_debug(h)->n.next, (off)); \ + list_node_from_off_((void *)i, (off)) != &(h)->n; \ + i = list_node_to_off_(list_node_from_off_((void *)i, (off))->next, \ + (off))) + +/** + * list_for_each_safe_off - iterate through a list of memory regions, maybe + * during deletion + * @h: the list_t + * @i: the pointer to a memory region wich contains list node data. + * @nxt: the structure containing the list_node + * @off: offset(relative to @i) at which list node data resides. + * + * For details see `list_for_each_off' and `list_for_each_safe' + * descriptions. + * + * Example: + * list_for_each_safe_off(&parent->children, child, + * next, offsetof(struct child, list)) + * printf("Name: %s\n", child->name); + */ +#define list_for_each_safe_off(h, i, nxt, off) \ + for (i = list_node_to_off_(list_debug(h)->n.next, (off)), \ + nxt = list_node_to_off_(list_node_from_off_(i, (off))->next, \ + (off)); \ + list_node_from_off_(i, (off)) != &(h)->n; \ + i = nxt, \ + nxt = list_node_to_off_(list_node_from_off_(i, (off))->next, \ + (off))) + + +/* Other -off variants. */ +#define list_entry_off(n, type, off) \ + ((type *)list_node_from_off_((n), (off))) + +#define list_t_off(h, type, off) \ + ((type *)list_t_off((h), (off))) + +#define list_tail_off(h, type, off) \ + ((type *)list_tail_((h), (off))) + +#define list_add_off(h, n, off) \ + list_add((h), list_node_from_off_((n), (off))) + +#define list_del_off(n, off) \ + list_del(list_node_from_off_((n), (off))) + +#define list_del_from_off(h, n, off) \ + list_del_from(h, list_node_from_off_((n), (off))) + +/* Offset helper functions so we only single-evaluate. */ +static inline void *list_node_to_off_(struct list_node *node, size_t off) +{ + return (void *)((char *)node - off); +} +static inline struct list_node *list_node_from_off_(void *ptr, size_t off) +{ + return (struct list_node *)((char *)ptr + off); +} + +/* Get the offset of the member, but make sure it's a list_node. */ +#define list_off_(type, member) \ + (container_off(type, member) + \ + check_type(((type *)0)->member, struct list_node)) + +#define list_off_var_(var, member) \ + (container_off_var(var, member) + \ + check_type(var->member, struct list_node)) + +#endif /* CCAN_LIST_H */ diff --git a/abst/map.h b/abst/map.h new file mode 100644 index 0000000..18cfc13 --- /dev/null +++ b/abst/map.h @@ -0,0 +1,320 @@ +/* + * map.h -- Hash tables and hashing routines + * + * Copyright (C) 2012 Jason Linehan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __MAPTABLE_ROUTINES +#define __MAPTABLE_ROUTINES +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <assert.h> +#include <stdint.h> +#include "debug.h" + +static inline unsigned djb2_hash(const char *str); +static inline unsigned sdbm_hash(const char *str); + +/****************************************************************************** + * MAP TABLES + * + * These inline functions and datatypes implement a small database object + * called a hashtable. It stores RECORDS, which are key, value pairings. + * This pairing is a form of mapping a key to a value. Values can then + * be retreived by their keys. + * + * The records which the database accepts must be in the form: + * + * struct { + * char key[SIZE]; + * <type> <some_name>; + * }; + * + * The only requirement is that the first part of the struct's memory + * be an array of char -- a string. This serves as the key field, and + * when passed to strcmp or other functions, the address at the start + * of the array will behave just like the address of a string. + * + * Along for the ride, however, will be the value that you wish to + * store in the database. It is for this reason that new_symbol() + * behaves more like malloc() than anything else. + * + * This is convenient because you can define your own record types + * to store in the table. It is a bit of a hack but damnit, it's a + * good one. + * + ******************************************************************************/ +#define _HASH_FUNC sdbm_hash +#define _CMP(a,b) (strcmp((const char *)(a), (const char *)(b))) +#define _HASH(a) (_HASH_FUNC((const char *)(a))) + + +/****************************************************************************** + * DATA STRUCTURES + ******************************************************************************/ + +/* + * A node or record in the table. + */ +struct bucket { + struct bucket *next; + struct bucket **prev; +}; + + + +/* + * The "object" containing the table. + */ +struct map_t { + size_t size; + int symcount; + struct bucket *table[1]; // First element of hash table +}; + + +/****************************************************************************** + * SYMBOL CREATION/DESTRUCTION + ******************************************************************************/ + + +/** + * new_symbol + * `````````` + * Allocate space for a new symbol in the hash table. + * + * @size : Size of the symbol. + * Return: Pointer to the allocated area. + */ +static inline void *new_symbol(size_t size) +{ + struct bucket *sym; + + if (!(sym = calloc(size + sizeof(struct bucket), 1))) + halt(SIGABRT, "Can't get memory for hash bucket.\n"); + + return (void *)(sym+1); +} + + +/** + * free_symbol + * ``````````` + * Free the memory space allocated for symbol. + * + * @sym : Symbol to be freed. + * Return: Nothing. + */ +static inline void free_symbol(void *sym) +{ + free((struct bucket *)sym - 1); +} + + + +/****************************************************************************** + * MAPTABLE CREATION/DESTRUCTION + ******************************************************************************/ + + +/** + * new_map + * ``````` + * Create a new map object. + * + * @max_sym: The maximum number of symbols the map table can hold. + */ +static inline struct map_t *new_map(int max) +{ + struct map_t *new; + + if (!max) + max = 127; + + new = calloc(1, (max*sizeof(struct bucket *)) + sizeof(struct map_t)); + + if (!new) + halt(SIGABRT, "Insufficient memory for symbol table.\n"); + + new->symcount = 0; + new->size = max; + + return new; +} + + +/****************************************************************************** + * ADD/REMOVE SYMBOLS FROM THE MAP TABLE + ******************************************************************************/ + + +static inline void *add_symbol(struct map_t *map, void *my_sym) +{ + struct bucket **p; + struct bucket *tmp; + struct bucket *sym; + + sym = (struct bucket *)my_sym; + + p = &(map->table)[_HASH(sym--) % map->size]; + + tmp = *p; + *p = sym; + sym->prev = p; + sym->next = tmp; + + if (tmp) + tmp->prev = &sym->next; + + map->symcount++; + + return (void *)(sym + 1); +} + + +static inline void del_symbol(struct map_t *map, void *my_sym) +{ + struct bucket *sym; + sym = (struct bucket *)my_sym; + + if (map && sym) { + --map->symcount; + --sym; + + if ((*(sym->prev) = sym->next)) + sym->next->prev = sym->prev; + } +} + + +/****************************************************************************** + * FIND, POP SYMBOL RECORDS FROM THE MAP TABLE + ******************************************************************************/ + + +static inline void *get_symbol(struct map_t *map, void *sym) +{ + struct bucket *p; + + if (!map) + return NULL; + + p = (map->table)[_HASH(sym) % map->size]; + + while (p && (_CMP(sym, p+1))) + p = p->next; + + return (void *)(p ? p+1 : NULL); +} + + +/** + * next_symbol + * ``````````` + * Return the next node in the current chain with the same key as + * the last node found. + */ +static inline void *next_symbol(struct map_t *map, void *i_last) +{ + struct bucket *last = (struct bucket *)i_last; + + for (--last; last->next; last = last->next) { + if (_CMP(last+1, last->next+1) == 0) // match + return (char *)(last->next + 1); + } + return NULL; +} + + + +/****************************************************************************** + * HASH FUNCTIONS + * + * Two little hash functions to get started. There are better ones, but + * these are small, reasonably fast, and easy to understand. Ease of use + * wins out here, and portability. + * + ******************************************************************************/ + + +/****************************************************************************** + * djb2_hash + * ````````` + * HISTORY + * This algorithm (k=33) was first reported by Dan Bernstein many years + * ago in comp.lang.c. Another version of this algorithm (now favored by + * bernstein) uses XOR: + * + * hash(i) = hash(i - 1) * 33 ^ str[i]; + * + * The magic of number 33 (why it works better than many other constants, + * prime or not) has never been adequately explained. + * + ******************************************************************************/ +static inline unsigned djb2_hash(const char *str) +{ + unsigned hash; + int c; + + hash = 5381; + + while ((c = (unsigned char)*str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return (unsigned) hash; +} + +/****************************************************************************** + * sdbm_hash + * ````````` + * HISTORY + * This algorithm was created for sdbm (a public-domain reimplementation + * of ndbm) database library. It was found to do well in scrambling bits, + * causing better distribution of the keys and fewer splits. it also + * happens to be a good general hashing function with good distribution. + * + * The actual function is + * + * hash(i) = hash(i - 1) * 65599 + str[i]; + * + * What is included below is the faster version used in gawk. [there is + * even a faster, duff-device version] the magic constant 65599 was picked + * out of thin air while experimenting with different constants, and turns + * out to be a prime. this is one of the algorithms used in berkeley db + * (see sleepycat) and elsewhere. + * + ******************************************************************************/ +static inline unsigned sdbm_hash(const char *str) +{ + unsigned hash; + int c; + + hash = 0; + + while ((c = (unsigned char)*str++)) + hash = c + (hash << 6) + (hash << 16) - hash; + + return (unsigned) hash; +} + + + + +#endif + diff --git a/abst/set.c b/abst/set.c new file mode 100644 index 0000000..8571eaf --- /dev/null +++ b/abst/set.c @@ -0,0 +1,532 @@ + +int _add_set(struct set_t *set, int bit); +void _set_op(int op, struct set_t *dest, struct set_t *src); +int _set_test(struct set_t *set1, struct set_t *set2); + + +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <ctype.h> +#include <signal.h> +#include <string.h> +#include "error.h" + +#include "set.h" + +/** + * new_set + * ``````` + * Create a new set_t. + * + * Returns: Pointer to an allocated default set. + */ +struct set_t *new_set(void) +{ + struct set_t *new; + + if (!(new = calloc(1, sizeof(struct set_t)))) { + halt(SIGABRT, "No memory to create SET.\n") + } + + new->map = new->defmap; + new->nwords = _DEFWORDS; + new->nbits = _DEFBITS; + + return new; +} + + +/** + * del_set + * ``````` + * Destroy a set created with new_set(). + * + * @set : pointer to a SET. + * Returns: Nothing. + */ +void del_set(struct set_t *set) +{ + if (set->map != set->defmap) { + free(set->map); + } + + free(set); +} + + +/** + * dup_set + * ``````` + * Create a new set that has the same members as the input set. + * + * @set : Input set (to be duplicated). + * Returns: Pointer to a new set with members identical to @set. + */ +struct set_t *dup_set(struct set_t *set) +{ + struct set_t *new; + + if (!(new = calloc(1, sizeof(struct set_t)))) { + halt(SIGABRT, "No memory to create SET.\n"); + return NULL; // "Shouldn't happen." + } + + new->compl = set->compl; + new->nwords = set->nwords; + new->nbits = set->nbits; + + if (set->map == set->defmap) { + new->map = new->defmap; + memcpy(new->defmap, set->defmap, _DEFWORDS * sizeof(_SETTYPE)); + } else { + new->map = (_SETTYPE *)malloc(set->nwords * sizeof(_SETTYPE)); + if (!new->map) { + halt(SIGABRT, "No memory to duplicate SET.\n"); + } + memcpy(new->map, set->map, set->nwords * sizeof(_SETTYPE)); + } + return new; +} + + +/** + * _add_set + * ```````` + * Called by the ADD() macro when the set isn't big enough. + * + * @set: Pointer to a set. + * @bit: Bit to be added to set. + */ +int _add_set(struct set_t *set, int bit) +{ + grow_set(_ROUND(bit), set); + return _GBIT(set, bit, |=); +} + + +/** + * grow_set + * ```````` + * @set : Pointer to set to be enlarged. + * @need: Number of words needed (target). + * + * NOTE + * This routine calls malloc() and is rather slow. Its use should be + * limited, and avoided if possible. + */ +void grow_set(struct set_t *set, int need) +{ + _SETTYPE *new; + + if (!set || need <= set->nwords) { + return; + } + + if (!(new = (_SETTYPE *) malloc(need * sizeof(_SETTYPE)))) { + halt(SIGABRT, "No memory to expand SET.\n"); + } + + memcpy(new, set->map, set->nwords * sizeof(_SETTYPE)); + memcpy(new + set->nwords, 0, (need - set->nwords) * sizeof(_SETTYPE)); + + if (set->map != set->defmap) { + free(set->map); + } + + set->map = new; + set->nwords = (unsigned char)need; + set->nbits = need * _BITS_IN_WORD; +} + + +/** + * num_set + * ``````` + * Get the number of elements (non-zero bits) in a set. NULL sets are empty. + * + * @set : Pointer to the set. + * Return: number of set bits in @set. + */ +int num_set(struct set_t *set) +{ + /* + * Neat macro that will expand to a lookup table indexed by a + * number in the range 0-255, with tab[i] == the number of set + * bits in i. + * + * Hallvard Furuseth suggested this approach on Sean Anderson's + * Bit Twiddling Hacks website on July 14, 2009. + */ + static const unsigned char nbits[256] = + { + #define B2(n) n, n+1, n+1, n+2 + #define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) + #define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) + B6(0), B6(1), B6(1), B6(2) + }; + + unsigned int count = 0; + unsigned char *p; + int i; + + if (!set) + return 0; + + p = (unsigned char *)set->map; + + for (i=_BYTES_IN_ARRAY(set->nwords); --i >= 0;) { + count += nbits[*p++]; + } + + return count; +} + + +/** + * _set_test + * ````````` + * Compares two sets. Returns as follows: + * + * _SET_EQUIV Sets are equivalent. + * _SET_INTER Sets intersect but aren't equivalent. + * _SET_DISJ Sets are disjoint. + * + * NOTE + * The smaller set is made larger if the two sets are different sizes. + */ +int _set_test(struct set_t *set1, struct set_t *set2) +{ + _SETTYPE *p1; + _SETTYPE *p2; + int rval; + int i; + + rval = _SET_EQUIV; + i = max(set1->nwords, set2->nwords); + + grow_set(i, set1); + grow_set(i, set2); + + p1 = set1->map; + p2 = set2->map; + + for (; --i >= 0; p1++, p2++) { + if (*p1 != *p2) { + return *p1 - *p2; + } + } + + /* + * You get here if the sets are not equivalent. + * If the sets intersect, you can return immediately, + * but have to keep going in the case of disjoint + * sets because they might actually intersect + * at some yet-unseen byte. + */ + if ((j = set1->nwords - i) > 0) // set1 is larger + while (--j >= 0) { + if (*p1++) { + return 1; + } + } + } else if ((j = set2->nwords - i) > 0) // set2 is larger + while (--j >= 0) { + if (*p2++) { + return -1; + } + } + } + return 0; // they are equivalent. +} + + +/** + * set_cmp + * ``````` + * Yet another comparison function. Works like strcmp(). + * + * @set1 : Pointer to a set to be compared. + * @set2 : Pointer to another set. + * Returns: 0 if set1==set2, < 0 if set1<set2, > 0 if set1>set2. + */ +void set_cmp(struct set_t *set1, struct set_t *set2) +{ + _SETTYPE *p1; + _SETTYPE *p2; + int j; + int i; + + i = j = min(set1->nwords, set2->nwords); + + for (p1 = set1->map, p2 = set2->map; --j >= 0; p1++, p2++) { + if (*p1 != *p2) { + return *p1 - *p2; + } + } + + /* + * You get here only if all words in both sets are the same. + * Check the tail end of the larger array for all zeroes. + */ + if ((j = set1->nwords - i) > 0) // set1 is larger + while (--j >= 0) { + if (*p1++) { + return 1; + } + } + } else if ((j = set2->nwords - i) > 0) // set2 is larger + while (--j >= 0) { + if (*p2++) { + return -1; + } + } + } + return 0; // they are equivalent. +} + + +/** + * sethash + * ``````` + * Hash a set by summing together the words in the bitmap. + * + * @set : Pointer to a set. + * Return: hashed value. + */ +unsigned sethash(struct set_t *set) +{ + _SETTYPE *p; + unsigned total; + int j; + + total = 0; + j = set->nwords; + p = set->map; + + while (--j >= 0) { + total += *p++; + } + + return total; +} + +/** + * is_subset + * ````````` + * Attempt to determine if 'sub' is a subset of 'set'. + * + * @set : Pointer to a set. + * @sub : Pointer to a possible subset of @set. + * Return: 1 if @sub is a subset of @set, otherwise 0. + * + * NOTE + * If @sub is larger than @set, the extra bytes must be all zeroes. + */ +int is_subset(struct set_t *set, struct set_t *sub) +{ + _SETTYPE *subsetp; + _SETTYPE *setp; + int common; // Number of bytes in potential subset. + int tail; // Number of implied 0 bytes in subset. + + if (sub->nwords > set->nwords) { + common = set->nwords; + tail = sub->nwords - common; + } else { + common = sub->nwords; + tail = 0; + } + + subsetp = sub->map; + setp = set->map; + + for (; --common >= 0; subsetp++, setp++) { + if ((*subsetp & *setp) != *subsetp) { + return 0; + } + } + + while (--tail >= 0) { + if (*subsetp++) { + return 0; + } + } + + return 1; +} + + +/** + * _set_op + * ``````` + * Performs binary operations depending on op. + * + * @op: One of _UNION, _INTERSECT, _DIFFERENCE, or _ASSIGN. + * @dest : Destination set. + * @src : Source set. + * Returns: nothing. + */ +void _set_op(int op, struct set_t *dest, struct set_t *src) +{ + _SETTYPE *d; // Destination map. + _SETTYPE *s; // Source map. + unsigned ssize; // Number of words in source set. + int tail // Dest is this many words bigger than source. + + ssize = src->nwords; + + /* Make sure destination is big enough. */ + if ((unsigned)dest->nwords < ssize) { + grow_set(ssize, dest); + } + + tail = dest->nwords - ssize; + d = dest->map; + s = src->map; + + switch (op) { + case _UNION: + while (--ssize >= 0) { + *d++ |= *s++; + } + break; + case _INTERSECT: + while (--ssize >= 0) { + *d++ &= *s++; + } + while (--tail >= 0) { + *d++ = 0; + } + break; + case _DIFFERENCE: + while (--ssize >= 0) { + *d++ ^= *s++; + } + break; + case _ASSIGN: + while (--ssize >= 0) { + *d++ = *s++; + } + while (--tail >= 0) { + *d++ = 0; + } + break; + } +} + +/** + * invert_set + * `````````` + * Physically invert the bits in the set. Compare with COMPLIMENT(). + * + * @set : Pointer to a set. + * Return: Nothing. + */ +void invert_set(struct set_t *set) +{ + _SETTYPE *p; + _SETTYPE *end; + + for (p = set->map, end = p + set->nwords; p < end; p++) { + *p = ~*p; + } +} + + +/** + * trunc_set + * ````````` + * Clear a set and truncate it to the default size. Compare with CLEAR(). + * + * @set : Pointer to a set. + * Return: Nothing. + */ +void trunc_set(struct set_t *set) +{ + if (set->map != set->defmap) { + free(set->map); + set->map = set->defmap; + } + set->nwords = _DEFWORDS; + set->nbits = _DEFBITS; + memset(set->defmap, 0, sizeof(set->defmap)); +} + + +/** + * next_member + * ``````````` + * Access all members of a set sequentially. + * + * @set : Pointer to a set. + * Return: value of each bit. + * + * USAGE + * Like strtok() + * set == NULL resets. + * set changed from last call resets and returns first element. + * otherwise the next element is returned, or else -1 if none. + */ +int next_member(struct set_t *set) +{ + static struct set_t *oset = 0; + static int current = 0; + _SETTYPE *map; + + if (!set) { + return ((int)(oset = NULL)); + } + + if (oset != set) { + oset = set; + current = 0; + + for (map = set->map; *map == 0 && current < set->nbits; ++map) { + current += _BITS_IN_WORD; + } + } + + /* + * The increment must be put in the test because if TEST() + * evaluates true, the increment on the right of the for would + * never be executed. + */ + while (current++ < set->nbits) { + if (TEST(set, current-1)) { + return (current - 1); + } + } + return (-1); +} + + +/** + * print_set + * ````````` + * Print a set in human-readable form. + * + * @set: pointer to a set + */ +void print_set(struct set_t *set) +{ + int i; + + if (!set) + printf("Null set.\n"); + + else { + next_member(NULL); + while ((i = next_member(set)) >= 0) { + did_something++; + printf("%d", i); + } + next_member(NULL); + + if (!did_something) { + printf("Empty set.\n"); + } + } +} + + diff --git a/abst/set.h b/abst/set.h new file mode 100644 index 0000000..c4ecca1 --- /dev/null +++ b/abst/set.h @@ -0,0 +1,130 @@ +#ifndef _SET_H +#define _SET_H + + +/* One cell in the bitmap */ +typedef unsigned short _SETTYPE; + + +#define _BITS_IN_WORD (16) +#define _BYTES_IN_ARRAY(x) (x << 1) +#define _DEFWORDS 8 +#define _DEFBITS (_DEFWORDS * _BITS_IN_WORD) + + +/* + * Evaluates to the array element that holds the bit x. + * Performs a simple integer divide by 16, which is + * implemented as a right shift of 4. + */ +#define _DIV_WSIZE(x) ((unsigned)(x) >> 4) + + +/* + * Evaluates to the position of the bit within the word, + * i.e. the offset in bits from the least-significant bit. + * Performs a modulus 16 using a bitwise AND. + */ +#define _MOD_WSIZE(x) ((x) & 0x0f) + + +/* + * Used to expand the size of the array. An array grows in + * _DEFWORDS-sized chunks. + * + * >>3 is equivalent to integer division by 8. + * <<3 is equivalent to integer multiplication by 8. + * + * Imagine we start with the default array of 8 chunks, and + * wish to add the number 200 to the set. The array must be + * expanded to do this, and after the expansion, the array + * should have 16 elements in it: + * + * (((_DIV_WSIZE(200) + 8) >> 3) << 3) + * + * (((((unsigned)(200) >> 4) + 8) >> 3) << 3) + * (((12 + 8) >> 3) << 3) + * ((20 >> 3) << 3) + * (2 << 3) + * 16 + */ +#define _ROUND(bit) (((_DIV_WSIZE(bit) + 8) >> 3) << 3) + + +/* + * The defmap array is the default bitmap. Initially, map is + * set to point at defmap. When the array grows, however, instead + * of calling realloc() to change the size of defmap (and thus _set_), + * map is simply pointed at the newly malloc'd array. + * + * The reason for this is that realloc() will copy the entire memory + * array to the newly allocated one, whereas only the map needs to be + * copied. You can thus save time at the expense of some memory if you + * do it yourself. + */ +struct set_t { + unsigned char nwords; // Number of words in the map + unsigned char compl; // Is a negative true set if true + unsigned nbits; // Number of bits in map + _SETTYPE *map; // Pointer to the map + _SETTYPE defmap[_DEFWORDS]; // The map itself +}; + + +/* + * Op arguments passed to _set_op + */ +#define _UNION 0 // x is in s1 or s2 +#define _INTERSECT 1 // x is in s1 and s2 +#define _DIFFERENCE 2 // (x in s1) && (x not in s2) +#define _ASSIGN 4 // s1 = s2 + +#define UNION(d,s) _set_op(_UNION, d, s) +#define INTERSECT(d,s) _set_op(_INTERSECT, d, s) +#define DIFFERENCE(d,s) _set_op(_DIFFERENCE, d, s) +#define ASSIGN(d,s) _set_op(_ASSIGN, d, s) + +#define CLEAR(s) memset((s)->map, 0, (s)->nwords * sizeof(_SETTYPE)) +#define FILL(s) memset((s)->map, ~0, (s)->nwords * sizeof(_SETTYPE)) +#define COMPLIMENT(s) ((s)->compl = ~(s)->compl) +#define INVERT(s) invert(s) + +/* Value returned from _set_test */ +#define _SET_EQUIV 0 +#define _SET_DISJ 1 +#define _SET_INTER 2 + +#define IS_DISJOINT(s1,s2) (_set_test(s1,s2) == _SET_DISJ) +#define IS_INTERSECTING(s1,s2) (_set_test(s1,s2) == _SET_INTER) +#define IS_EQUIVALENT(s1,s2) (set_cmp((a),(b)) == 0) +#define IS_EMPTY(s) (set_num(s) == 0) + +/* + * CAVEAT + * Heavy duty side effects ahead, be aware. + */ + +#define _GETBIT(s,x,op) (((s)->map)[_DIV_WSIZE(x)] op (1 << _MOD_WSIZE(x))) + +#define ADD(s,x) (((x) >= (s)->nbits) ? _add_set(s,x) : _GETBIT(s,x,|=)) +#define REMOVE(s,x) (((x) >= (s)->nbits) ? 0 : _GETBIT(s,x,&=~)) +#define TEST(s,x) ((MEMBER(s,x)) ? !(s)->compl : (s)->compl ) + +struct set_t *new_set(void); +void del_set(struct set_t *set); +struct set_t *dup_set(struct set_t *set); +void grow_set(struct set_t *set, int need); + + +int set_num (struct set_t *set); +void set_cmp (struct set_t *set1, struct set_t *set2); +unsigned set_hash (struct set_t *set); +void set_invert(struct set_t *set); +void set_trunc (struct set_t *set); +int set_next (struct set_t *set); +void set_print (struct set_t *set); +int is_subset (struct set_t *set, struct set_t *sub); + + +#endif + diff --git a/abst/simple_set.h b/abst/simple_set.h new file mode 100644 index 0000000..139597f --- /dev/null +++ b/abst/simple_set.h @@ -0,0 +1,2 @@ + + diff --git a/byte/byte.c b/byte/byte.c new file mode 100644 index 0000000..87ef632 --- /dev/null +++ b/byte/byte.c @@ -0,0 +1,116 @@ +#include "byte.h" + + +unsigned int byte_chr(const char* s, register unsigned int n, int c) +{ + register char ch; + register const char *t; + + ch = c; + t = s; + + for (;;) { + if (!n) break; if (*t == ch) break; ++t; --n; + if (!n) break; if (*t == ch) break; ++t; --n; + if (!n) break; if (*t == ch) break; ++t; --n; + if (!n) break; if (*t == ch) break; ++t; --n; + } + + return t - s; +} + + + +void byte_copy(register void *To, register unsigned int n, register const void *From) +{ + register char *to; + register const char *from; + + to = To; + from = From; + + for (;;) { + if (!n) return; *to++ = *from++; --n; + if (!n) return; *to++ = *from++; --n; + if (!n) return; *to++ = *from++; --n; + if (!n) return; *to++ = *from++; --n; + } +} + + + + +void byte_copyr(register void *To,register unsigned int n, register const void *From) +{ + register char *to; + register const char *from; + + to = (char *)To + n; + from = (char *)From + n; + + for (;;) { + if (!n) return; *--to = *--from; --n; + if (!n) return; *--to = *--from; --n; + if (!n) return; *--to = *--from; --n; + if (!n) return; *--to = *--from; --n; + } +} + + + + +int byte_diff(register const void *S, register unsigned int n, register const void *T) +{ + register const char *s=S; + register const char *t=T; + + for (;;) { + if (!n) return 0; if (*s != *t) break; ++s; ++t; --n; + if (!n) return 0; if (*s != *t) break; ++s; ++t; --n; + if (!n) return 0; if (*s != *t) break; ++s; ++t; --n; + if (!n) return 0; if (*s != *t) break; ++s; ++t; --n; + } + return ((int)(unsigned int)(unsigned char) *s) - ((int)(unsigned int)(unsigned char) *t); +} + + + +unsigned int byte_rchr(const char *s, register unsigned int n, int c) +{ + register char ch; + register const char *t; + register char *u; + + ch = c; + t = s; + u = 0; + + for (;;) { + if (!n) break; if (*t == ch) u = t; ++t; --n; + if (!n) break; if (*t == ch) u = t; ++t; --n; + if (!n) break; if (*t == ch) u = t; ++t; --n; + if (!n) break; if (*t == ch) u = t; ++t; --n; + } + + if (!u) { + u = t; + } + + return u - s; +} + + + +void byte_zero(register void *S, register unsigned int n) +{ + register char* s=S; + + for (;;) { + if (!n) break; *s++ = 0; --n; + if (!n) break; *s++ = 0; --n; + if (!n) break; *s++ = 0; --n; + if (!n) break; *s++ = 0; --n; + } +} + + diff --git a/byte/byte.h b/byte/byte.h new file mode 100644 index 0000000..07aa847 --- /dev/null +++ b/byte/byte.h @@ -0,0 +1,13 @@ +#ifndef BYTE_H +#define BYTE_H + +unsigned int byte_chr(const char* s,register unsigned int n,int c); +unsigned int byte_rchr(const char* s,register unsigned int n,int c); + void byte_copy(register void* to, register unsigned int n, register const void* from); + void byte_copyr(register void* to, register unsigned int n, register const void* from); + int byte_diff(register const void*, register unsigned int, register const void*); + void byte_zero(register void*, register unsigned int); + +#define byte_equal(s,n,t) (!byte_diff((s),(n),(t))) + +#endif diff --git a/byte/fmt.c b/byte/fmt.c new file mode 100644 index 0000000..3440550 --- /dev/null +++ b/byte/fmt.c @@ -0,0 +1,150 @@ +#include "fmt.h" + + +/* + * Write a 32-bit big-endian long value to a buffer. + */ +void fmt_u32(unsigned char *buf, uint32_t value) +{ + buf[0] = (unsigned char)(value >> 24); + buf[1] = (unsigned char)(value >> 16); + buf[2] = (unsigned char)(value >> 8); + buf[3] = (unsigned char)value; +} + + + +unsigned int fmt_xlong(register char* s, register unsigned long u) +{ + register unsigned int len; + register unsigned long q; + register char c; + + len = 1; + q = u; + + while (q > 15) { + ++len; + q /= 16; + } + + if (s) { + s += len; + do { + c = '0' + (u & 15); + + if (c > '0' + 9) { + c += 'a' - '0' - 10; + } + + *--s = c; + u /= 16; + + } while(u); + } + return len; +} + + + +unsigned int fmt_ulong(register char *s, register unsigned long u) +{ + register unsigned int len; + register unsigned long q; + + len = 1; + q = u; + + while (q > 9) { + ++len; + q /= 10; + } + if (s) { + s += len; + do { + *--s = '0' + (u % 10); + u /= 10; + } while(u); /* handles u == 0 */ + } + return len; +} + + + + +unsigned int fmt_uint0(char* s, unsigned int u, unsigned int n) +{ + unsigned int len; + + len = fmt_uint(FMT_LEN, u); + + while (len < n) { + + if (s) { + *s++ = '0'; + ++len; + } + } + + if (s) { + fmt_uint(s,u); + } + + return len; +} + + + +unsigned int fmt_uint(register char* s, register unsigned int u) +{ + register unsigned long l; + + l = u; + + return fmt_ulong(s,l); +} + + + + +unsigned int fmt_strn(register char* s, register const char* t, register unsigned int n) +{ + register unsigned int len; + char ch; + + len = 0; + + if (s) { + while (n-- && (ch = t[len])) { + s[len++] = ch; + } + } else { + while (n-- && t[len]) { + len++; + } + } + return len; +} + + + +unsigned int fmt_str(register char* s,register const char* t) +{ + register unsigned int len; + char ch; + + len = 0; + + if (s) { + while ((ch = t[len])) { + s[len++] = ch; + } + } else { + while (t[len]) { + len++; + } + } + return len; +} + + diff --git a/byte/fmt.h b/byte/fmt.h new file mode 100644 index 0000000..fcf0883 --- /dev/null +++ b/byte/fmt.h @@ -0,0 +1,28 @@ +#ifndef FMT_H +#define FMT_H +#include <stdint.h> + +#define FMT_ULONG 40 /* enough space to hold 2^128 - 1 in decimal, plus \0 */ +#define FMT_LEN ((char *) 0) /* convenient abbreviation */ + +extern unsigned int fmt_uint(char *,unsigned int); +extern unsigned int fmt_uint0(char *,unsigned int,unsigned int); +extern unsigned int fmt_xint(char *,unsigned int); +extern unsigned int fmt_nbbint(char *,unsigned int,unsigned int,unsigned int,unsigned int); +extern unsigned int fmt_ushort(char *,unsigned short); +extern unsigned int fmt_xshort(char *,unsigned short); +extern unsigned int fmt_nbbshort(char *,unsigned int,unsigned int,unsigned int,unsigned short); +extern unsigned int fmt_ulong(char *,unsigned long); +extern unsigned int fmt_xlong(char *,unsigned long); +extern unsigned int fmt_nbblong(char *,unsigned int,unsigned int,unsigned int,unsigned long); + +extern unsigned int fmt_plusminus(char *,int); +extern unsigned int fmt_minus(char *,int); +extern unsigned int fmt_0x(char *,int); + +extern unsigned int fmt_str(char *,const char *); +extern unsigned int fmt_strn(char *,const char *,unsigned int); + +extern void fmt_u32(unsigned char *buf, uint32_t value); + +#endif diff --git a/byte/scan.c b/byte/scan.c new file mode 100644 index 0000000..581f6c4 --- /dev/null +++ b/byte/scan.c @@ -0,0 +1,180 @@ +#include "scan.h" + +/** + * tohex + * ````` + * Convert a numeric value to an ASCII hexadecimal character. + * + * @num : Numeric value to be converted. + * Return: Symbolic hexadecimal value (ASCII). + */ +char tohex(char num) +{ + if (num < 10) { + return num + '0'; + } else if (num < 16) { + return num - 10 + 'a'; + } else { + return -1; + } +} + + + +/** + * fromhex + * ``````` + * Convert an ASCII hexadecimal character to its numeric value. + * + * @hex : Hexadecimal character. + * Return: Numeric value. + */ +long int fromhex(unsigned char hex) +{ + if (hex >= '0' && hex <= '9') { + return hex - '0'; + } else if (hex >= 'A' && hex <= 'F') { + return hex - 'A' + 10; + } else if (hex >= 'a' && hex <= 'f') { + return hex - 'a' + 10; + } + + return -1; +} + + + +/** + * scan_0x + * ``````` + * Scan a string for a hexadecimal value. + * + * @s : String to scan. + * @u : Destination of hexadecimal value. + * Return: Position from beginning of @s. + */ +unsigned int scan_0x(register const char *s, register unsigned int *u) +{ + register unsigned int pos = 0; + register unsigned long result = 0; + register long int c; + + while ((c = fromhex((unsigned char) (s[pos]))) >= 0) { + result = (result << 4) + c; + ++pos; + } + + *u = result; + + return pos; +} + + + +/** + * scan_8long + * `````````` + * Scan an octet from a string. + * + * @s : String to scan. + * @u : Destination of octet value. + * Return: Position from beginning of @s. + */ +unsigned int scan_8long(register const char *s, register unsigned long *u) +{ + register unsigned int pos; + register unsigned long result; + register unsigned long c; + + pos = 0; + result = 0; + + while ((c = (unsigned long) (unsigned char) (s[pos] - '0')) < 8) { + result = result * 8 + c; + ++pos; + } + + *u = result; + + return pos; +} + + + +/** + * scan_plusminus + * `````````````` + * Scan a sign character from a string. + * + * @s : String to scan. + * @u : Destination of sign character. + * Return: Position from beginning of @s. + */ +unsigned int scan_plusminus(register const char *s, register int *sign) +{ + if (*s == '+') { + *sign = 1; return 1; + } + + if (*s == '-') { + *sign = -1; return 1; + } + + *sign = 1; + return 0; +} + + + +/** + * scan_long + * ````````` + * Scan a long integer from a string. + * + * @s : String to scan. + * @u : Destination of long. + * Return: Position from beginning of @s. + */ +unsigned int scan_long(register const char *s, register long *i) +{ + int sign; + unsigned long u; + register unsigned int len; + + len = scan_plusminus(s, &sign); + s += len; + len += scan_ulong(s, &u); + + *i = (sign < 0) ? -u : u; + + return len; +} + + + +/** + * scan_ulong + * `````````` + * Scan an unsigned long integer from a string. + * + * @s : String to scan. + * @u : Destination of unsigned long. + * Return: Position from beginning of @s. + */ +unsigned int scan_ulong(register const char *s, register unsigned long *u) +{ + register unsigned int pos = 0; + register unsigned long result = 0; + register unsigned long c; + + while ((c = (unsigned long) (unsigned char) (s[pos] - '0')) < 10) { + result = result * 10 + c; + ++pos; + } + + *u = result; + + return pos; +} + + diff --git a/byte/scan.h b/byte/scan.h new file mode 100644 index 0000000..a1b761b --- /dev/null +++ b/byte/scan.h @@ -0,0 +1,13 @@ +#ifndef _SCAN_H +#define _SCAN_H + +char tohex(char num); +long int fromhex(unsigned char c); + +unsigned int scan_0x (register const char *s, register unsigned int *u); +unsigned int scan_8long (register const char *s, register unsigned long *u); +unsigned int scan_plusminus(register const char *s, register int *sign); +unsigned int scan_long (register const char *s, register long *i); +unsigned int scan_ulong (register const char *s, register unsigned long *u); + +#endif diff --git a/file/dir.c b/file/dir.c new file mode 100644 index 0000000..d54c661 --- /dev/null +++ b/file/dir.c @@ -0,0 +1,331 @@ +#define USE_ERRNO_H + +#include <stdlib.h> +#include <stdio.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <dirent.h> + +#include "../text/textutils.h" +#include "../util/util.h" +#include "../util/debug.h" +#include "../abst/bits.h" +#include "../abst/bloom/bloom.h" +#include "dir.h" +#include "file.h" + + +/****************************************************************************** + * DIRECTORY ENTRY GENERATORS + * + * Internally, this module uses the functions _diterate() and diterate() + * to provide a uniform iteration interface for all other functions listed + * in this source file. + * + * A generator is a special function that controls the iteration behavior + * of a loop. It generates a sequence of values, but instead of delivering + * the values all at once, it yields them one at a time. + * + * The most familiar example of generator behavior is strtok(), which allows + * for continutation. In practice, strtok() accepts a valid pointer to signal + * a fresh invocation, and subsequent "continuation" calls are indicated by + * passing a NULL pointer in place of the valid one. + * + * There is a good example from the strtok_r manpage that constructs this + * convention as a for loop, Adapted to our purposes, it would look like: + * + * struct dirent *entry; + * DIR *dir; + * + * for (entry = _diterate(dir)); + * entry != NULL; + * entry = _diterate(NULL)) + * { + * ...do something with entry + * } + * + * This still looks more like a hack than a generator. Wikipedia sayeth: + * + * "In short, a generator *looks* like a function but *behaves* + * like an iterator." + * + * The for loop construct is some halfway horror between these two ideals. + * To nudge it toward readability, we move the canonical form to an + * implementation function, _diterate(), and wrap this with diterate(). + * + * diterate() keeps track of the continuation status of the generator, + * so the caller can forgo the initialization step. This allows for a while + * loop, with the termination condition triggered implicitly. + * + * Thus sugar-coated, the generator assumes a brevity exemplified by the + * filecount() function: + * + * int filecount(DIR *dir, int filter) + * { + * struct dirent *entry; + * int count = 0; + * + * while ((entry = diterate(dir, filter))) { + * count++; + * } + * + * return count; + * } + * + * CAVEAT + * These functions are not re-entrant safe! + * + ******************************************************************************/ + +/** + * _diterate + * ````````` + * Provides iteration over directory contents to diterate() + * + * @dir : an open directory stream + * @filter: a set of predicates (see note on file predicates) + * Return : a struct dirent pointer set to the current item + * + * USAGE + * Invocation is similar to strtok(), see note above. + * + * NOTES + * This is an internal function which is wrapped by diterate(). + * + * CAVEAT + * The call to stat() has been a rich source of bugs. It expects a + * file path as its first argument, and many heads were scratched + * before realizing that the name of the directory entry would only + * produce a valid path if the current working directory was the same + * as the path the DIR stream was opened at. + * + * cwd--+-somedir + * +-subdir--+-file0 + * +-file1 + * +-file2 + * + * If we are executing in cwd, and open the DIR stream with + * + * DIR *dir = opendir(subdir), + * + * and we retreive the entries of the directory stream with + * + * char *entry = readdir(dir); + * + * we're going to get values that look like + * + * entry == "file0" + * entry == "file1" + * entry == "file2". + * + * When we give these paths to stat(), we're in trouble because stat() + * naturally assumes that all relative paths are rooted in the current + * working directory, 'cwd'. + * + * In fact, these paths are relative to the root of the DIR stream, + * 'subdir'. So stat() returns negative, unless the path of the DIR + * stream just happens to coincide with the current working directory, + * in which case it works as intended. + * + * The solution was to write the function getdirpath() (see file.c) and + * use it to set the cwd before the call to _diterate() in the wrapper + * function diterate(), and then revert the directory once _diterate() + * returned. It works... for now. + */ +struct dirent *_diterate(DIR *dir, int filter) +{ + struct dirent *entry; + struct stat dstat; + + static DIR *_dir; + + + if (dir != NULL) { + _dir = dir; + } + + while ((entry = readdir(_dir)), entry != NULL) { + + /* If we cannot stat a file, we move on. */ + if (stat(entry->d_name, &dstat) == -1) + continue; + + /* If file is hidden and we don't have F_HID, move on */ + if (entry->d_name[0] == '.') + continue; + + /* If filetype is not included in the filter, move on */ + if (!(hasvalue(filter, F_TYPE(dstat.st_mode)))) + continue; + + return entry; + } + /* Finished scanning directory */ + rewinddir(_dir); + return NULL; +} + + +/** + * diterate + * ```````` + * Provide a uniform directory entry generator. + * + * @dir : open directory stream + * @filter: a set of predicates (see note on file predicates) + * Return : struct dirent pointing to the current item + * + * NOTE + * All the working directory jumps are addressing a bug outlined + * in the notes for _diterate(), above. + */ +struct dirent *diterate(DIR *dir, int filter) +{ + /* Used to resolve relative paths */ + static struct cwd_t cwd; + static bool running = false; + struct dirent *entry; + DIR *_dir = NULL; + + if (!running) { + _dir = dir; + running = true; + cwd_setjump(&cwd, getdirpath(dir)); + } + + /* Jump to directory root */ + cwd_jump(&cwd); + + entry = _diterate(_dir, filter); + + /* Return to working directory */ + cwd_jump(&cwd); + + if (entry == NULL) + running = false; + + return entry; +} + + +/****************************************************************************** + * NON-GENERATIVE + * + * These functions use the directory entry generators, but are not intended + * to be used as generators themselves. They perform a one-and-done traversal + * and report a single result with no continuation. + * + ******************************************************************************/ + + +/** + * filecount + * ````````` + * Count the number of files in a directory + * + * @dir : open directory stream + * @filter: a set of predicates (see note on file predicates) + * Return : the number of files in 'dir' which passed the filter + * + * NOTE + * This is a one-and-done traversal of the directory entries, with + * a definite sum that is returned to the caller. + */ +int filecount(DIR *dir, int filter) +{ + struct dirent *entry; + int count = 0; + + while ((entry = diterate(dir, filter))) { + count++; + } + + return count; +} + + +/****************************************************************************** + * GENERATIVE + * + * These are the public generator functions availible for retreiving + * information about the set of files (entries) in a directory stream. + * They use the directory entry generators outlined above, yielding + * data about each entry to the caller in a serial fashion. + * + ******************************************************************************/ + + +/** + * getfile + * ``````` + * Yield the filenames of each entry in a directory stream. + * + * @dir : open directory stream + * @filter: a set of predicates (see note on file predicates) + * Return : the filename of the current entry in the iteration + */ +const char *getfile(DIR *dir, int filter) +{ + struct dirent *entry; + + while ((entry = diterate(dir, filter))) { + return entry->d_name; + } + + return NULL; +} + + +/** + * getdiff + * ``````` + * Yield the filename of each entry in a directory stream exactly once. + * + * @dir : open directory stream + * @filter: a set of predicates (see note on file predicates) + * Return : the filename of the current unique entry in the iteration + * + * TODO + * A mechanism to re-set the Bloom filter. + * + * NOTES + * getdiff() remembers the filenames it has already yielded, even across + * multiple continuations. This is not the same as maintaining state + * between calls, like all generators, but between multiple traversals + * of the directory entirely. + * + * USAGE + * getdiff() will only yield those filenames which have changed between + * continuations, so that you can repeatedly scan a directory without + * re-listing the same entries every time. + */ +const char *getdiff(DIR *dir, int filter) +{ + static struct bloom_t *bloom; + struct dirent *entry; + + /* Construct the Bloom filter */ + if (!bloom) + bloom = bloom_new(250000, 3, fnv_hash, sdbm_hash, djb2_hash); + + while ((entry = diterate(dir, filter))) { + /* + * If the filename is in the Bloom filter, continue. + */ + if (bloom_check(bloom, entry->d_name)) + continue; + + /* + * Otherwise, it is probably a new entry. + * Add it to the Bloom filter and yield + * it to the caller + */ + bloom_add(bloom, entry->d_name); + + return entry->d_name; + } + return NULL; /* signals end of iteration run */ +} + + diff --git a/file/dir.h b/file/dir.h new file mode 100644 index 0000000..a61396b --- /dev/null +++ b/file/dir.h @@ -0,0 +1,18 @@ +#ifndef _DIR_LISTING_H +#define _DIR_LISTING_H + +#include <sys/types.h> +#include <dirent.h> + +/* Procedures +``````````````````````````````````````````````````````````````````````````````*/ +int filecount(DIR *dir, int options); + + +/* Generators +``````````````````````````````````````````````````````````````````````````````*/ +const char *getfile(DIR *dir, int options); +const char *getdiff(DIR *dir, int filter); + + +#endif diff --git a/file/fdio.c b/file/fdio.c new file mode 100644 index 0000000..93a9529 --- /dev/null +++ b/file/fdio.c @@ -0,0 +1,472 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <stdbool.h> +#include <sys/time.h> /* for struct timeval */ + +#include "../util/debug.h" +#include "../text/textutils.h" +#include "fdio.h" +#include "file.h" + + +/****************************************************************************** + * SAFE LOW-LEVEL I/O + * + * Read, write, and readline functions for file descriptors. + ******************************************************************************/ + +/** + * fd_read + * ``````` + * Read data from a file descriptor in chunks of specified size. + * + * @fd : File descriptor of the socket. + * @dst : Destination of the data being read off the socket. + * @nbytes: Maximum number of bytes to read. + * Return : Number of bytes actually read. + */ +size_t fd_read(int fd, void *dst, size_t nbytes) +{ + register size_t nleft; + register size_t nread; + + nleft = nbytes; + + while (nleft > 0) { + nread = read(fd, dst, nleft); + if (nread < 0) { + return nread; /* error, return < 0 */ + } else if (nread == 0) { + break; /* EOF */ + } + + nleft -= nread; + dst += nread; + } + return nbytes - nleft; /* >= 0 */ +} + + +/** + * fd_readloop + * ``````````` + * Similar to fd_read, except it accepts an additional @loop parameter. + * + * @fd : File descriptor of the socket. + * @dst : Destination of the data being read off the socket. + * @nbytes: Maximum number of bytes to read. + * @loop : integer address, used to determine EOF emission. + * Return : Number of bytes actually read. + * + * USAGE + * When calling fd_read() in a while loop, any behavior in the loop body + * will not be executed if the last read is less than the maximum number + * of bytes (@nbytes). Thus, the last partial chunk of data will not be + * acted upon. + * + * This function sets the value of @loop, instead of returning EOF, so + * that the next time it is invoked, EOF will be emitted based on the + * value of @loop. + */ +size_t fd_readloop(int fd, void *dst, size_t nbytes, volatile int *loop) +{ + register size_t nleft; + register size_t nread; + + if (*loop == 1) { + return EOF; + } + + nleft = nbytes; + + while (nleft > 0) { + nread = read(fd, dst, nleft); + if (nread < 0) { + return nread; /* error, return < 0 */ + } else if (nread == 0) { + *loop = 1; /* EOF */ + break; /* Execute loop body once more */ + } + + nleft -= nread; + dst += nread; + } + return nbytes - nleft; /* >= 0 */ +} + + +/** + * fd_write + * ```````` + * Write data to a file descriptor in chunks of specified size. + * + * @fd : File descriptor of the socket. + * @dst : Source of the data being written to the socket. + * @nbytes: Maximum number of bytes to write. + * Return : Number of bytes actually written. + */ +size_t fd_write(int fd, void *src, size_t nbytes) +{ + register size_t nleft; + register size_t nwritten; + + nleft = nbytes; + while (nleft > 0) { + nwritten = write(fd, src, nleft); + if (nwritten < 0) { + return nwritten; /* error */ + } else if (nwritten == 0) { + break; /* EOF */ + } + nleft -= nwritten; + src += nwritten; + } + return nbytes - nleft; +} + + +/** + * fd_readline + * ``````````` + * Read data from a file descriptor, one (\n-terminated) line at a time. + * + * @fd : File descriptor of the socket. + * @dst : Destination of the data being read off the socket. + * @nbytes: Maximum number of bytes to read. + * Return : 0 on EOF, -1 on error, else number of bytes actually read. + */ +size_t fd_readline(int fd, char *dst, size_t nbytes) +{ + return fd_read_tok(fd, dst, nbytes, '\n'); +} + + + +/** + * fd_read_tok + * ``````````` + * Read data from a file descriptor, one token-delimited chunk at a time. + * + * @fd : File descriptor of the socket. + * @dst : Destination of the data being read off the socket. + * @nbytes: Maximum number of bytes to read. + * @tok : Token + * Return : 0 on EOF, -1 on error, else number of bytes actually read. + */ +size_t fd_read_tok(int fd, char *dst, size_t nbytes, int tok) +{ + int n; + int rc; + char c; + + for (n=1; n<nbytes; n++) { + if ((rc = read(fd, &c, 1)) == 1) { + *dst++ = c; + if (c == tok) { + break; + } + } else if (rc == 0) { + if (n == 1) { + return 0; /* EOF, no data read */ + } else { + break; /* EOF, some data read */ + } + } else { + return -1; /* error */ + } + } + *dst = 0; // might not be a string ? + return n; +} + + + +/** + * fd_copy + * ``````` + * Copy the contents of one file descriptor into another. + * + * @fd_src : source descriptor + * @fd_dst : destination descriptor + * Return : number of bytes copied + */ +size_t fd_copy(int fd_src, int fd_dst) +{ + #define MAXLINE size_kb(1) + + char send[MAXLINE]; + size_t bytes = 0; + size_t size = 0; + + while ((fd_readline(fd_src, send, MAXLINE)) > 0) { + size = strlen(send); + if (fd_write(fd_dst, send, size) != size) { + panic("Could not write to file descriptor.\n"); + } + bytes += size; + } + + return bytes; +} + + +/** + * fd_dump + * ``````` + * Write the contents of a file descriptor into a buffer. + * + * @buf_src: source buffer + * @fd_dst : destination file descriptor + * Return : Number of bytes written. Will not be > maxbytes. + */ +size_t fd_dump(int fd_src, char *buf_dst, size_t maxbytes) +{ + char line[MAXLINE]; + size_t total = 0; + size_t len; + + while ((len = fd_readline(fd_src, line, MAXLINE)) > 0) { + + if (len == -1) { + return -1; /* error */ + } + + /* Truncate len if greater than maxbytes */ + if (maxbytes <= (total + len)) { + len = (maxbytes - total); + } + + total += len; + + /* Concatenate len bytes into the buffer */ + slcat(buf_dst, line, len); + + /* max size reached */ + if (total >= maxbytes) { + buf_dst[maxbytes - 1] = '\n'; /* newline-terminate */ + break; + } + } + + return total; +} + + +/** + * fd_spool + * ```````` + */ +size_t fd_spool(int fd, FILE *stream) +{ + size_t bytes = 0; + int c; + + while ((c = fgetc(stream)) != EOF) { + if (fd_write(fd, &c, 1) != 1) { + panic("Write error on file descriptor.\n"); + } + bytes += 1; + } + return bytes; +} + + +/** + * fd_print + * ```````` + * Write the contents of a file descriptor to a text stream. + * + * @fd_src: source descriptor + * @fs_dst: stream to print on + * Return : -1 on error, else number of bytes printed. + */ +size_t fd_print(int fd_src, FILE *fs_dst) +{ + size_t bytes; + size_t len; + char c; + + while (fd_poll(fd_src, 0, 500) > 0) { + if ((len = fd_read(fd_src, &c, 1)) == 1) { + fputc(c, fs_dst); + bytes += len; + } else { + break; + } + } + + fflush(fs_dst); + + return (len > 0) ? bytes : len; +} + + +/** + * fd_poll + * ``````` + * Poll a descriptor for the allotted time, and report if it's ready. + * + * @fd : file descriptor + * @seconds : number of seconds to listen + * @useconds: number of microseconds to listen + * Return : true if descriptor is ready, else false. + * + * USAGE + * The underlying behavior here uses select(), and is a convenient + * way to use it in its most common function. + */ +bool fd_poll(int fd, long int seconds, long int useconds) +{ + struct timeval tv; + fd_set descriptors; + int rc; + + FD_ZERO(&descriptors); + FD_SET(fd, &descriptors); + + tv.tv_sec = seconds; + tv.tv_usec = useconds; + + rc = select(fd+1, &descriptors, NULL, NULL, &tv); + + if (rc < 0) { + return -1; + } + + return FD_ISSET(fd, &descriptors) ? 1 : 0; +} + + +int isready(int fd) +{ + int rc; + fd_set fds; + struct timeval tv; + + FD_ZERO(&fds); + FD_SET(fd, &fds); + + tv.tv_sec = tv.tv_usec = 0; + + rc = select(fd+1, &fds, NULL, NULL, &tv); + + if (rc < 0) { + return -1; + } + + return FD_ISSET(fd, &fds) ? 1 : 0; +} + + +int fd_getc(int fd) +{ + register size_t nread; + int c = 0; + + nread = read(fd, &c, 1); + + if (nread != 1) { + return nread; /* <=0 */ + } + + return c; +} + +int fd_putc(int c, int fd) +{ + register size_t nwritten; + + nwritten = write(fd, &c, 1); + + if (nwritten != 1) { + return nwritten; /* <=0 */ + } + + return 1; +} + + + +size_t fd_readb(void *dst, size_t bs, size_t nb, register int fd) +{ + register char *ptr; + register int c; + register size_t size; + size_t nread = 0; + + ptr = dst; + + if (bs > 0) { + while (nread < nb) { + size = bs; + do { + if ((c = fd_getc(fd)) != EOF) { + *ptr++ = c; + } else { + return nread; + } + } while (--size); + + nread++; + } + } + + return nread; +} + + + +size_t fd_writeb(const void *src, size_t bs, size_t nb, register int fd) +{ + register const unsigned char *ptr; + register size_t size; + size_t nwritten = 0; + + ptr = src; + + if (bs > 0) { + while (nwritten < nb) { + size = bs; + do { + if (fd_putc((int)*ptr, fd) == EOF) { + return nwritten; + } + ptr++; + + } while (--size); + + nwritten++; + } + } + + return nwritten; +} + + + + +/*************************************************************************** + * SEEKING + ***************************************************************************/ +#define SEEK_BEG 0xb1abbb + +int fd_setpos(int fd, int ofs, int whence) +{ + return (int)lseek(fd, (off_t)ofs, whence); +} + + +int fd_getpos(int fd) +{ + return (int)lseek(fd, 0, SEEK_CUR); +} + + + + + + + diff --git a/file/fdio.h b/file/fdio.h new file mode 100644 index 0000000..2653cc4 --- /dev/null +++ b/file/fdio.h @@ -0,0 +1,31 @@ +#ifndef _FD_IO_H +#define _FD_IO_H +#include <stdbool.h> + +size_t fd_read(int fd, void *dst, size_t nbytes); +size_t fd_write(int fd, void *src, size_t nbytes); + +int fd_getc(int fd); +int fd_putc(int c, int fd); + +size_t fd_readb (void *dst, size_t bs, size_t nb, register int fd); +size_t fd_writeb(const void *src, size_t bs, size_t nb, register int fd); + +size_t fd_readloop(int fd, void *dst, size_t nbytes, volatile int *loop); + +size_t fd_readline(int fd, char *dst, size_t nbytes); +size_t fd_read_tok(int fd, char *dst, size_t nbytes, int tok); + +size_t fd_dump(int fd_src, char *buf_dst, size_t maxbytes); +size_t fd_print(int fd_src, FILE *fs_dst); + +size_t fd_copy(int fd_src, int fd_dst); + +bool fd_poll(int fd, long int seconds, long int microseconds); +int isready(int fd); +size_t fd_spool(int fd, FILE *stream); + +int fd_setpos(int fd, int ofs, int whence); +int fd_getpos(int fd); + +#endif diff --git a/file/file.c b/file/file.c new file mode 100644 index 0000000..796c9de --- /dev/null +++ b/file/file.c @@ -0,0 +1,984 @@ +/****************************************************************************** + * SAFE GENERAL FILE OPERATIONS + * + * Small functions that wrap common file operations with exception handling + * so that we can flexibly arrange for contingencies if we want to. For the + * moment, they simply abort execution, following the principle of "failing + * badly." + * + * The behavior could always be altered with minimal effort, now that we've + * funneled the calls into this interface. + * + ******************************************************************************/ +#define USE_ERRNO_H + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdarg.h> + +#include <unistd.h> +#include <dirent.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <pwd.h> +#include <grp.h> + +#include <libgen.h> + +#include "file.h" +#include "../util/debug.h" +#include "../util/util.h" +#include "../text/textutils.h" + + + + +/****************************************************************************** + * DESCRIPTORS + * Operations on raw file descriptors (unbuffered). + ******************************************************************************/ + +/** + * fd_create + * ````````` + * Create a new file descriptor in the filesystem + * + * @path : path of file to be removed + * @oflag: various modes to apply to the file + * @perms: permissions to create file with. + * Return: nothing. + */ +int fd_create(const char *path, int oflag, int perms) +{ + int fd; + + if ((fd = open(path, O_CREAT | oflag, perms)) == -1) { + panic("Could not create %s\n", path); + } + + return fd; +} + + +/** + * unlink_path + * ``````````` + * Unlink a path from the filesystem + * + * @path : path of file to be removed + * Return: nothing. + * + * NOTE + * This is used to remove any file descriptor + * which is NOT a directory. + */ +void unlink_path(const char *path) +{ + if ((unlink(path)) == -1) { + panic("Could not unlink %s\n", path); + } +} + + +/** + * fd_open + * ``````` + * Open a file and return a file descriptor. + * + * @path : path to the desired file + * @oflag: various modes to apply to the file + * @... : optional permissions argument (on create) + * Return: a file descriptor (integer) + */ +int fd_open(const char *path, int oflag, ...) +{ + va_list arg; + int perms = 0; + int fd; + + va_start(arg, oflag); + perms = va_arg(arg, int); + va_end(arg); + + if ((fd = open(path, oflag, perms)) == -1) { + panic("Could not open %s\n", path); + } + + return fd; +} + + +/** + * fd_close + * ```````` + * Close a file descriptor safely. + * + * @fd : open file descriptor (integer). + * Return: nothing. + */ +void fd_close(int fd) +{ + if ((close(fd)) == -1) { + panic("Could not close file\n"); + } +} + + + +/****************************************************************************** + * FILE STREAMS + * Operations on <stdio.h> file streams (buffered). + ******************************************************************************/ + +/** + * fs_open + * ``````` + * Open a stream pointer to the file identified by 'path' safely + * + * @path : path to the desired file + * @mode : mode to open file with + * Return: a pointer to a FILE stream + */ +FILE *fs_open(const char *path, const char *mode) +{ + FILE *file; + + if ((file = fopen(path, mode)) == NULL) { + panic("Could not open %s\n", path); + } + + return file; +} + + +/** + * fs_close + * ```````` + * Close a FILE stream pointer safely + * + * @file : pointer to an open file stream + * Return: nothing. + */ +void fs_close(FILE *file) +{ + if (fclose(file) == EOF) { + panic("Could not close file\n"); + } +} + + + +/****************************************************************************** + * DIRECTORIES + * Operations on <unistd.h> directory streams (buffered). + ******************************************************************************/ + +/** + * dir_create + * `````````` + * Create a new directory safely + * + * @path : path of the directory to be created + * Return: nothing. + */ +void dir_create(const char *path, int perms) +{ + if ((mkdir(path, perms)) == -1) { + panic("Could not create directory %s\n", path); + } +} + + +/** + * dir_remove + * `````````` + * Remove a directory safely. + * + * @path : path of the directory to be removed + * Return: nothing. + */ +void dir_remove(const char *path) +{ + if ((rmdir(path)) == -1) { + panic("Could not remove directory %s\n", path); + } +} + + +/** + * open_dir + * ```````` + * Open a stream pointer to the directory identified by 'path'. + * + * @path : path to the desired directory + * Return: pointer to an open DIR stream + */ +DIR *dir_open(const char *path) +{ + DIR *dir; + + if ((dir = opendir(path)) == NULL) + panic("Could not open directory %s\n", path); + + return dir; +} + + +/** + * dir_close + * ````````` + * Close a DIR stream pointer safely + * + * @dir : pointer to an open directory stream + * Return: nothing. + */ +void dir_close(DIR *dir) +{ + if (closedir(dir) == EOF) { + panic("Could not close directory\n"); + } +} + + + +/****************************************************************************** + * FIFOs + * Operations on named pipes. + ******************************************************************************/ + +/** + * fifo_create + * ``````````` + * Create a new named pipe file + * + * @path : path of the new file + * @perm : permissions word + * Return: nothing + */ +void fifo_create(const char *path, int perm) +{ + if ((mknod(path, S_IFIFO | perm, 0)) == -1) { + panic("Fee! Fie! Foe! Fum! The FIFO failed to open!\n"); + } +} + + +/** + * fifo_remove + * ``````````` + * Remove a named pipe from the filesystem + * + * @path : path of the new file + * Return: nothing + */ +void fifo_remove(const char *path) +{ + if ((unlink(path)) == -1) { + panic("Could not remove named pipe at %s\n", path); + } +} + + +/** + * fifo_open + * ````````` + * Open named pipe and return a file descriptor. + * + * @path : path to the desired file + * @oflag: various modes to apply to the file + * @... : optional permissions argument (on create) + * Return: a file descriptor (integer) + */ +int fifo_open(const char *path, int oflag, ...) +{ + va_list arg; + int perms = 0; + int fd; + + va_start(arg, oflag); + perms = va_arg(arg, int); + va_end(arg); + + if ((fd = open(path, oflag, perms)) == -1) { + panic("Could not open fifo %s\n", path); + } + + return fd; +} + + +/** + * fifo_close + * `````````` + * Close an open named pipe. + * + * @fd : file descriptor of the file to be closed + * Return: nothing + */ +void fifo_close(int fd) +{ + if ((close(fd)) == -1) { + panic("Could not close fifo\n"); + } +} + + +/** + * fifo_nonblock_set + * ````````````````` + * Set non-blocking status. + * + * @fd : fifo descriptor + * @op : true or false + * Return: + */ +int fifo_nonblock_set(int fd, int op) +{ + int flags_to_set; + int flags_current; + + flags_current = fcntl(fd, F_GETFL, 0); + flags_to_set = O_NONBLOCK; + + if (op == true) { + flags_current |= flags_to_set; + } else { + flags_current &= ~flags_to_set; + + } + + fcntl(fd, F_SETFL, flags_current); + + return 1; +} + + +int fifo_nonblock_on(int fd) +{ + return fifo_nonblock_set(fd, true); +} + + +int fifo_nonblock_off(int fd) +{ + return fifo_nonblock_set(fd, false); +} + + + +/****************************************************************************** + * FILENAMES AND PATHS + * + * Retreive, transform and manipulate pathnames. Lots of hairy things like + * transforming relative to absolute and back, detecting the home directory + * of a user, etc. Creating a temporary name for a file or directory, + * retreiving and changing the current working directory. + * + * CAVEAT + * Pathnames are slow, prone to errors, and generally a pain in the ass. + * Try to avoid using these if an alternative exists. + * + ******************************************************************************/ + +/** + * curdir + * + * Return: the name of the current directory (not the full path) + */ +const char *curdir(void) +{ + static char cwd[PATHSIZE]; + + slcpy(cwd, scwd(), PATHSIZE); + + return basename(cwd); +} + + +const char *base_name(char *path) +{ + return basename(path); +} + + + +/** + * getdirpath + * `````````` + * Get the full path of an open directory stream. + * + * @dir : pointer to an open directory stream + * Return: path in a static buffer + * + * CAVEAT + * This is NOT portable, because it relies explicitly on the organization + * of file descriptors in the Linux filesystem. This is the technique used + * by lsof. + */ +const char *getdirpath(DIR *dir) +{ + static char dirpath[PATHSIZE]; + char linkpath[PATHSIZE]; + int dir_fd; + + /* Determine file descriptor of the DIR stream */ + dir_fd = dirfd(dir); + + /* + * Construct the path of the symlink representing + * the file descriptor to the filesystem (Linux only). + */ + snprintf(linkpath, PATHSIZE, "/proc/self/fd/%d", dir_fd); + + /* + * Resolve the symlink path to its target, which will + * be the path of the DIR stream. + */ + readlink(linkpath, dirpath, PATHSIZE); + + return dirpath; +} + + +/** + * gethome_uid + * ``````````` + * Get the home directory of user with 'uid' + * + * @uid : uid of the user whose home directory you want + * Return: home directory path + */ +const char *gethome_uid(uid_t uid) +{ + struct passwd *pw; + + pw = getpwuid(uid); + + return pw->pw_dir; +} + + +/** + * gethome + * ``````` + * Get the home directory set in the current terminal environment. + * + * Return: home directory path + * + * NOTE + * This is literally returning the value of $HOME as set in the + * environment. + */ +const char *gethome(void) +{ + return gethome_uid(getuid()); +} + + +/** + * scwd + * ```` + * Get the current working directory + * + * Return: path of the current working directory. + */ +const char *scwd(void) +{ + static char buf[PATHSIZE]; + + if ((getcwd(buf, PATHSIZE)) == NULL) + bye("Could not stat working directory."); + + return buf; +} + + +/** + * is_relpath + * `````````` + * Check if path is relative + * + * @path : path to be checked + * Return: true if path is relative, otherwise false + * + * FIXME + * This needs to be ... a little more sophisticated. + */ +bool is_relpath(const char *path) +{ + return (path[0] == '/') ? false : true; +} + + +/** + * make_path_absolute + * `````````````````` + * Resolve a relative path to an absolute path + * + * @buf : destination of the absolute path + * @path : the raw path (may be relative) + * Return: nothing. + * + * USAGE + * 'buf' must be large enough to contain at least PATHSIZE + * bytes. 'path' will be checked using is_relpath() to see + * whether or not it needs to be converted. + */ +void make_path_absolute(char *path) +{ + static char buf[PATHSIZE]; + /* + * If it's already an absolute path, simply copy + * it into the buffer and return. + */ + if (!is_relpath(path)) { + return; + } + /* + * Otherwise, get the current working directory + * and append the relative path to it. + */ + slcpy(buf, path, PATHSIZE); + snprintf(path, PATHSIZE, "%s/%s", scwd(), buf); +} + + +/** + * absolute_path + * ````````````` + * Returns the absolute path of the path supplied. + * + * @path : path to be expanded (possibly). + * Return: statically allocated string holding the absolute path. + */ +const char *absolute_path(const char *path) +{ + static char abspath[PATHSIZE]; + + /* Already absolute path */ + if (!is_relpath(path)) + return path; + + snprintf(abspath, PATHSIZE, "%s/%s", scwd(), path); + return abspath; +} + + +/** + * tmpname + * ``````` + * Generate a temporary name according to a template + * + * @template: used to determine how many random bytes to make + * Return : position of first random byte in name + * + * USAGE + * The template should be a string of characters, where 'X' + * will be replaced with a random byte, e.g. + * + * tmp.XXXXXX ----> tmp.042192 + */ +int tempname(char *template) +{ + pid_t val; + int start; + + val = getpid(); + start = strlen(template) - 1; + + while (template[start] == 'X') { + template[start] = '0' + val % 10; + val /= 10; + start--; + } + return start; +} + + +void srename(const char *oldname, const char *newname) +{ + static char old[PATHSIZE]; + static char new[PATHSIZE]; + + slcpy(old, oldname, PATHSIZE); + slcpy(new, newname, PATHSIZE); + + make_path_absolute(old); + make_path_absolute(new); + + if (rename(old, new) == -1) + bye("Could not rename"); +} + + +/****************************************************************************** + * FILE PREDICATES + * + * Queries that can be applied to files. There are three kinds of files that + * may be queried: open files for which the caller has a file descriptor, + * open files for which the caller has a stream pointer, and files that may + * or may not be open, for which the caller has a pathname. + ******************************************************************************/ + +/** + * file_exists + * ``````````` + * Test for the existence of a path on the filesystem. + * + * @path : Path to the file to be tested. + * Return: TRUE if path exists and is accessible, else FALSE. + */ +bool file_exists(const char *path) +{ + if ((access(path, F_OK)) != 0) { + switch (errno) { + case ENOENT: + return false; /* does not exist */ + /*case EACCES:*/ + /*return false; [> exists (?) but access is denied <]*/ + } + } + return true; +} + + +/** + * file_access + * ``````````` + * Test for the accessibility of a path on the filesystem. + * + * @path : Path to the file to be tested. + * Return: TRUE if path is accessible, else FALSE. + */ +bool file_access(const char *path) +{ + if ((access(path, F_OK)) != 0) { + switch (errno) { + case EACCES: + return false; /* exists (?) but access is denied */ + } + } + return true; +} + + + +/** + * file_readable + * ````````````` + * Test for the readability of a path on the filesystem. + * + * @path : Path to the file to be tested. + * Return: TRUE if path is readable, else FALSE. + */ +bool file_readable(const char *path) +{ + if ((access(path, R_OK)) != 0) { + return false; /* read access denied */ + } + return true; +} + + +/** + * file_writable + * ````````````` + * Test for the writability of a path on the filesystem. + * + * @path : Path to the file to be tested. + * Return: TRUE if path is writable, else FALSE. + */ +bool file_writable(const char *path) +{ + if ((access(path, W_OK)) != 0) { + switch (errno) { + case EACCES: + return false; /* access denied */ + break; + case EROFS: + return false; /* read-only filesystem */ + break; + } + return false; + } + return true; +} + + +/** + * exists + * `````` + * Test if a pathname is valid (i.e. the file it names exists) + * + * @path : pathname to test + * Return: true if pathname is valid, otherwise false. + */ +bool exists(const char *path) +{ + struct stat buf; + return ((stat(path, &buf) == -1) && (errno == ENOENT)) ? false : true; +} + +/** + * is_valid_fd + * ``````````` + * Checks if a file descriptor is valid. + * + * @fd : file descriptor to be checked. + * Return: true if descriptor is valid, else false. + */ +int is_valid_fd(int fd) +{ + return fcntl(fd, F_GETFL) != -1 || errno != EBADF; +} + + +/** + * ftype + * ````` + * Get the type of a file from its path + * + * @path: pathname of the file to be typed + * Returns a type value, one of the macros F_xxx defined above. + */ +int ftype(const char *path) +{ + struct stat statbuf; + + if ((stat(path, &statbuf) == -1)) + bye("ftype: Could not stat file %s", path); + + return F_TYPE(statbuf.st_mode); +} + + +/** + * sperm + * ````` + * Format file information as a string, e.g. "drwxr-xr-x" + * + * @mode : the file mode value (the st_mode member of a struct stat) + * Return: a statically-allocated string formatted as seen above. + * + * NOTES + * Adapted from an unsourced reproduction. I have added the switch + * statement to examine the full range of POSIX-supported filetypes. + * + * HISTORY + * I did not name this function. The original version is part of the + * standard library in Solaris, but although it is referenced in the + * example program given in man(3) stat, sperm is not included in most + * Unices anymore. The disappointing consequence is that man sperm + * fails to satisfy the curious. + */ +const char *sperm(__mode_t mode) +{ + static char local_buf[16] = {0}; + int i = 0; + + /* File type */ + switch (F_TYPE(mode)) { + case F_REG: local_buf[i++] = '-'; break; + case F_DIR: local_buf[i++] = 'd'; break; + case F_LINK: local_buf[i++] = 'l'; break; + case F_SOCK: local_buf[i++] = 's'; break; + case F_PIPE: local_buf[i++] = 'p'; break; + case F_CHAR: local_buf[i++] = 'c'; break; + case F_BLOCK: local_buf[i++] = 'b'; break; + default: local_buf[i++] = '?'; break; + } + + /* User permissions */ + local_buf[i] = ((mode & S_IRUSR)==S_IRUSR) ? 'r' : '-'; i++; + local_buf[i] = ((mode & S_IWUSR)==S_IWUSR) ? 'w' : '-'; i++; + local_buf[i] = ((mode & S_IXUSR)==S_IXUSR) ? 'x' : '-'; i++; + + /* Group permissions */ + local_buf[i] = ((mode & S_IRGRP)==S_IRGRP) ? 'r' : '-'; i++; + local_buf[i] = ((mode & S_IWGRP)==S_IWGRP) ? 'w' : '-'; i++; + local_buf[i] = ((mode & S_IXGRP)==S_IXGRP) ? 'x' : '-'; i++; + + /* Other permissions */ + local_buf[i] = ((mode & S_IROTH)==S_IROTH) ? 'r' : '-'; i++; + local_buf[i] = ((mode & S_IWOTH)==S_IWOTH) ? 'w' : '-'; i++; + local_buf[i] = ((mode & S_IXOTH)==S_IXOTH) ? 'x' : '-'; + + return local_buf; +} + + +/****************************************************************************** + * CURRENT WORKING DIRECTORY TRACKING + * + * Provide a very simplistic structure for marking the current working + * directory, changing it to another path, and later reverting to the + * marked directory. Basically it cleans up some of the messiness involved, + * making the code that uses it less distracting. + * + ******************************************************************************/ + + +/** + * cwd_mark + * ```````` + * Mark the current working directory. + * + * @breadcrumb: pointer to an awd_t + * Return : nothing + */ +void cwd_mark(struct cwd_t *breadcrumb) +{ + slcpy(breadcrumb->home, scwd(), PATHSIZE); +} + + +/** + * cwd_shift + * ````````` + * Change the current working directory. + * + * @breadcrumb: pointer to an awd_t + * @path : path of the new working directory + * Return : nothing + */ +void cwd_shift(struct cwd_t *breadcrumb, const char *path) +{ + cwd_mark(breadcrumb); + chdir(path); + breadcrumb->away = true; +} + + +/** + * cwd_revert + * `````````` + * Revert the current working directory to the previously-marked path. + * + * @breadcrumb: pointer to an awd_t + * Return : nothing + */ +void cwd_revert(struct cwd_t *breadcrumb) +{ + if (breadcrumb->away) { + chdir(breadcrumb->home); + breadcrumb->away = false; + } +} + + +/** + * cwd_setjump + * ``````````` + * Set the home and alternate directories to be jumped between. + * + * @breadcrumb: pointer to an awd_t + * @path : path to an alternate directory + * Return : nothing + */ +void cwd_setjump(struct cwd_t *breadcrumb, const char *path) +{ + cwd_mark(breadcrumb); + slcpy(breadcrumb->jump, path, PATHSIZE); +} + + +/** + * cwd_jump + * ```````` + * Jump between the home directory and the jump directory. + * + * @breadcrumb: pointer to a cwd_t + * Return : nothing + */ +void cwd_jump(struct cwd_t *breadcrumb) +{ + if (breadcrumb->away) + cwd_revert(breadcrumb); + else + cwd_shift(breadcrumb, breadcrumb->jump); +} + + +/****************************************************************************** + * TEXT FILE PARSING + * + * Provide easy facilities for the most common parsing situation in Unix, + * in which a text file exists as a list of tuples, each representing an + * identifier and a value which is bound to the identifier, with each + * tuple in the list being separated by a newline character, i.e. on its + * own line. + * + * Such files also commonly contain comments which are not to be parsed, + * and these are delimited by a comment character. + * + ******************************************************************************/ + + +/** + * get_tokenf + * `````````` + * Get a token from the file at 'path'. + * + * @dest : the destination buffer (token value will be placed here) + * @token: the token to be scanned for + * @B : the breakpoint character (separates tuples) + * @S : the separator between identifier and value of the tuple + * @C : the comment delimiter character + * @path : the path of the file to be parsed + * Return: nothing + */ +/*void get_tokenf(char *dst, char B, char S, char C, const char *tok, const char *path)*/ +/*{*/ + /*char buffer[LINESIZE];*/ + /*char *pruned;*/ + /*size_t offset;*/ + /*FILE *file;*/ + + /*file = sopen(path, "r");*/ + + /*while (fgets(buffer, LINESIZE, file)) {*/ + /*[> Remove leading and trailing whitespace <]*/ + /*trimws(buffer);*/ + /*[> If line begins with comment character, continue <]*/ + /*if (buffer[0] == C)*/ + /*continue;*/ + /*[> If the token exists in the line <]*/ + /*if (strstr(buffer, tok)) {*/ + /*[> Calculate offset of the token <]*/ + /*offset = strlen(tok) + 1;*/ + /*[> Prune the token text from the return string <] */ + /*pruned = &buffer[offset];*/ + /* + * If any comment character exists in the line, + * replace it with the separator character, so + * that the line is effectively truncated. + */ + /*chrswp(pruned, C, S, strlen(pruned));*/ + + /*snprintf(dst, LINESIZE, "%s", pruned);*/ + /*break;*/ + /*}*/ + /*}*/ + /*sclose(file);*/ +/*}*/ + + +/** + * token + * ````` + * Get token at 'path' as a statically allocated string. + * @path : the path of the file to be parsed + * @token: the token to be scanned for + * Return: token in a statically-allocated buffer. + */ +/*char *tokenf(char B, char S, char C, const char *tok, const char *path)*/ +/*{*/ + /*static char buffer[LINESIZE];*/ + /*get_tokenf(buffer, B, S, C, tok, path);*/ + /*return buffer;*/ +/*}*/ + + diff --git a/file/file.h b/file/file.h new file mode 100644 index 0000000..4dc9cae --- /dev/null +++ b/file/file.h @@ -0,0 +1,177 @@ +#ifndef _MY_FILE_LIB_H +#define _MY_FILE_LIB_H + +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/stat.h> + + +/* Limits +``````````````````````````````````````````````````````````````````````````````*/ +#define PATHSIZE 256 +#define LINESIZE 1024 + +#define PERMS (0666) +#define DIR_PERMS ((S_IRWXU | S_IRGRP | S_IROTH | S_IXOTH)) + +#define BYTES_IN_KILOBYTE 1024 +#define BYTES_IN_MEGABYTE 1048576 + +#define size_kb(bytes) bytes * BYTES_IN_KILOBYTE +#define size_mb(bytes) bytes * BYTES_IN_MEGABYTE + + +void unlink_path(const char *path); + +/* Raw file descriptors +``````````````````````````````````````````````````````````````````````````````*/ +int fd_create(const char *path, int oflag, int perms); +int fd_open (const char *path, int oflag, ...); +void fd_close (int fd); + + +/* Buffered IO streams +``````````````````````````````````````````````````````````````````````````````*/ +FILE *fs_open (const char *path, const char *mode); +void fs_close(FILE *file); + + +/* Directory structures +``````````````````````````````````````````````````````````````````````````````*/ +void dir_create(const char *path, int perms); +void dir_remove(const char *path); +DIR *dir_open (const char *path); +void dir_close (DIR *dir); + + +/* Named pipes (FIFOs) +``````````````````````````````````````````````````````````````````````````````*/ +void fifo_create(const char *path, int perm); +void fifo_remove(const char *path); +int fifo_open (const char *path, int oflag, ...); +void fifo_close (int fd); + +int fifo_nonblock_set(int fd, int op); +int fifo_nonblock_on (int fd); +int fifo_nonblock_off(int fd); + + +/* File predicates +``````````````````````````````````````````````````````````````````````````````*/ +bool exists(const char *path); +int is_valid_fd(int fd); +int ftype(const char *path); +const char *sperm(__mode_t mode); + + +/* Filenames and pathnames +``````````````````````````````````````````````````````````````````````````````*/ +const char *scwd(void); +const char *curdir(void); + +const char *getdirpath(DIR *dir); +bool is_relpath(const char *path); +void make_path_absolute(char *path); +const char *absolute_path(const char *path); + +const char *gethome_uid(uid_t uid); +const char *gethome(void); + +int tempname(char *templ); +const char *base_name(char *path); + +void srename(const char *oldname, const char *newname); + + +bool file_exists(const char *path); +bool file_access(const char *path); +bool file_readable(const char *path); +bool file_writable(const char *path); + + +/* Current working directory tracking +``````````````````````````````````````````````````````````````````````````````*/ +struct cwd_t { + char home[PATHSIZE]; + char jump[PATHSIZE]; + bool away; +}; + +void cwd_mark (struct cwd_t *breadcrumb); +void cwd_shift (struct cwd_t *breadcrumb, const char *path); +void cwd_revert (struct cwd_t *breadcrumb); +void cwd_setjump(struct cwd_t *breadcrumb, const char *path); +void cwd_jump (struct cwd_t *breadcrumb); + + + +/* Text file parsing +``````````````````````````````````````````````````````````````````````````````*/ +void get_tokenf(char *dst, char, char, char, const char *tok, const char *path); +char *tokenf(char, char, char, const char *tok, const char *path); + +/* Macros for most common scenario */ +#define get_token(dst, tok, path) get_tokenf(dst, '\n', ' ', '#', tok, path) +#define token(tok, path) tokenf('\n', ' ', '#', tok, path) + + +/* Pipes +``````````````````````````````````````````````````````````````````````````````*/ +int bounce(char *buf, size_t max, const char *fmt, ...); + + + +/****************************************************************************** + * FILETYPE EXTENSIONS + * + * The following macros should be defined in <sys/stat.h>: + * + * #define S_IFMT 00170000 Mask the mode bytes describing file type + * #define S_IFSOCK 0140000 Socket + * #define S_IFLNK 0120000 Symlink + * #define S_IFREG 0100000 Regular file + * #define S_IFBLK 0060000 Block device + * #define S_IFDIR 0040000 Directory + * #define S_IFCHR 0020000 Character device + * #define S_IFIFO 0010000 FIFO (named pipe) + * + * The same file should contain type predicates of the form: + * + * #define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) + * ... + ******************************************************************************/ + +/* + * Expands to the type value used in the mode quantity 'm'. + * + * CAVEAT + * The result of this macro is NOT compatible as an argument to the + * S_ISxxx macros outlined above. Those perform the equivalent of + * F_TYPE(m) internally. + */ +#define F_TYPE(mode) ((mode) & S_IFMT) + +/* + * Easier-to-read filetype names + */ +#define F_PIPE S_IFIFO +#define F_SOCK S_IFSOCK +#define F_LINK S_IFLNK +#define F_REG S_IFREG +#define F_BLOCK S_IFBLK +#define F_CHAR S_IFCHR +#define F_DIR S_IFDIR + +/* + * Sometimes we want to signal that we wish to test for a + * hidden file, whatever the implementation may define that + * as. + */ +//#define F_HID (0160000) // Filter for hidden files + + +#endif + diff --git a/file/iovec.c b/file/iovec.c new file mode 100644 index 0000000..db99a1a --- /dev/null +++ b/file/iovec.c @@ -0,0 +1,74 @@ +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> + +#include "iovec.h" + +ssize_t writev(int fildes, const struct iovec *iov, int iovcnt) +{ + int i; + int r; + char *p; + ssize_t l; + ssize_t sum; + + /* We should buffer */ + sum = 0; + + for (i=0; i<iovcnt; i++) { + + p = iov[i].iov_base; + l = iov[i].iov_len; + + while (l > 0) { + r = write(fildes, p, l); + if (r <= 0) { + assert(sum == 0); + return r; + } + p += r; + l -= r; + sum += r; + } + } + return sum; +} + + +ssize_t readv(int d, const struct iovec *iov, int iovcnt) +{ + ssize_t ret; + ssize_t nb; + size_t tot = 0; + int i; + char *buf; + char *p; + + for (i=0; i<iovcnt; i++) { + tot += iov[i].iov_len; + } + + buf = malloc(tot); + + if (tot != 0 && buf == NULL) { + errno = ENOMEM; + return -1; + } + + nb = ret = read(d, buf, tot); + p = buf; + + while (nb > 0) { + ssize_t cnt = min(nb, iov->iov_len); + + memcpy(iov->iov_base, p, cnt); + + p += cnt; + nb -= cnt; + } + + free(buf); + return ret; +} + + diff --git a/file/iovec.h b/file/iovec.h new file mode 100644 index 0000000..b26b3a6 --- /dev/null +++ b/file/iovec.h @@ -0,0 +1,12 @@ +#ifndef _VECIO_H +#define _VECIO_H + +struct iovec { + char *iov_base; /* Base address. */ + size_t iov_len; /* Length. */ +}; + +ssize_t writev(int fildes, const struct iovec *iov, int iovcnt); +ssize_t readv(int d, const struct iovec *iov, int iovcnt); + +#endif diff --git a/file/shell.c b/file/shell.c new file mode 100644 index 0000000..de34190 --- /dev/null +++ b/file/shell.c @@ -0,0 +1,129 @@ +#define USE_ERRNO_H + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdarg.h> + +#include "file.h" +#include "../util/debug.h" +#include "../util/util.h" +#include "../text/textutils.h" + +/****************************************************************************** + * PIPES + * + ******************************************************************************/ + +/** + * shell + * ````` + * Get the result of command from the shell. + * + * @buf : destination buffer + * @cmd : command buffer + * Return: nothing + */ +static inline int shell(char *buf, const char *cmd) +{ + FILE *pipe; + int retval; + + /* + * Write the command buffer to the shell and read + * the response into the pipe descriptor. + */ + pipe = popen(cmd, "r"); + + /* + * Scan the contents of the pipe descriptor into + * the destination buffer. + */ + if ((fgets(buf, LINESIZE, pipe)) == 0) + retval = 0; + else + retval = 1; + + fclose(pipe); + + return retval; +} + + +/** + * bounce + * `````` + * Write the result of a shell command to a buffer. + * + * @buf : destination buffer + * @max : size of destination buffer + * @fmt : format string + * @... : arguments for the format string + * Return: shell return value. + */ +int bounce(char *buf, size_t max, const char *fmt, ...) +{ + FILE *pipe; + char cmd[LINESIZE]; + va_list args; + int status = 0; + + /* Parse the format string into the command buffer */ + va_start(args, fmt); + vsnprintf(cmd, LINESIZE, fmt, args); + va_end(args); + + pipe = popen(cmd, "r"); + + while ((fread(buf, sizeof(char), max, pipe)) != 0) { + status = 1; + } + + fclose(pipe); + + return status; +} + + + +/** + * echo + * ```` + * Print the result of a shell command to the stdout of the calling process. + * + * @fmt : format string + * @... : arguments for the format string + * Return: nothing. + * + * NOTE + * This essentially wraps the bounce() function, but prints the returned + * buffer contents to stdout, since any echo performed in the bounce pipe + * will print to the stdout of the forked shell process instead of the + * caller's process. + */ +int echo(const char *fmt, ...) +{ + char buf[LINESIZE]; + char cmd[LINESIZE]; + va_list args; + int status = 0; + FILE *pipe; + + /* Parse the format string into the command buffer */ + va_start(args, fmt); + vsnprintf(cmd, LINESIZE, fmt, args); + va_end(args); + + pipe = popen(cmd, "r"); + + while ((fgets(buf, LINESIZE, pipe)) != 0) { + status = 1; + printf("%s", buf); + } + + fclose(pipe); + + return status; +} + + diff --git a/file/shell.h b/file/shell.h new file mode 100644 index 0000000..f7a2b15 --- /dev/null +++ b/file/shell.h @@ -0,0 +1,9 @@ +#ifndef _SHELL_H +#define _SHELL_H + + +int bounce(char *buf, size_t max, const char *fmt, ...); +int echo(const char *fmt, ...); + + +#endif diff --git a/fork/channel.c b/fork/channel.c new file mode 100644 index 0000000..cf31f64 --- /dev/null +++ b/fork/channel.c @@ -0,0 +1,190 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "../file/file.c" +#include "../file/fdio.c" +#include "../util/debug.h" +#include "channel.h" + + + +/****************************************************************************** + * CHANNEL CREATION + ******************************************************************************/ + + +#define PERMS_DIR ((S_IRWXU | S_IRGRP | S_IROTH | S_IXOTH)) +#define PERMS_FILE (0666) + + +/** + * ch_open + * ``````` + * Open a new channel, create files if requested. + * + * @ch : pointer to a channel. + * @name : name of the channel. + * @mode : options and mode flags. + * Return: Nothing. + */ +void ch_open(struct channel_t *ch, const char *path, int mode) +{ + static char buf[PATHSIZE]; + char *end; + size_t len; + + len = strlen(path); + memcpy(buf, path, len); + end = buf+len; + + ch->role = CH_ROLE(mode); + + switch (ch->role) { + case CH_PUB: + /* + * Create paths and files on disk. + */ + if (CH_CREATE(mode)) { + /* Channel files are contained in a directory. */ + dir_create(path, PERMS_DIR); + + /* Two FIFOs to support duplexed IPC. */ + memcpy(end, "/sub", 5); + fifo_create(buf, PERMS_FILE); + memcpy(end, "/pub", 5); + fifo_create(buf, PERMS_FILE); + + /* Flat files for the buffer and log. */ + memcpy(end, "/buf", 5); + fd_create(buf, O_CREAT|O_RDWR|O_APPEND, PERMS_FILE); + memcpy(end, "/log", 5); + fd_create(buf, O_CREAT|O_RDWR|O_APPEND, PERMS_FILE); + } + /* + * Open the required files. + */ + memcpy(end, "/sub", 5); + ch->sub = fifo_open(buf, O_RDONLY); + ch->nub = fifo_open(buf, O_WRONLY); + memcpy(end, "/pub", 5); + ch->pub = fifo_open(buf, O_WRONLY); + memcpy(end, "/buf", 5); + ch->buf = fd_open(buf, O_RDWR|O_APPEND, 0); + memcpy(end, "/log", 5); + ch->log = fd_open(buf, O_RDWR|O_APPEND, 0); + break; + + case CH_SUB: + /* 'sub' and 'pub' are reversed. */ + memcpy(end, "/sub", 5); + ch->pub = fifo_open(buf, O_WRONLY); + memcpy(end, "/pub", 5); + ch->sub = fifo_open(buf, O_RDONLY); + break; + + default: + panic("Invalid duplex role\n"); + break; + } +} + + +/** + * ch_remove + * ````````` + * Destroy files associated with a channel. + * + * @path : Path of the channel to be removed. + * Return: Nothing + */ +void ch_remove(const char *path) +{ + static char buf[PATHSIZE]; + char *end; + size_t len; + + len = strlen(path); + memcpy(buf, path, len); + end = buf+len; + + memcpy(end, "/sub", 5); + fifo_remove(buf); + memcpy(end, "/pub", 5); + fifo_remove(buf); + memcpy(end, "/buf", 5); + unlink_path(buf); + memcpy(end, "/log", 5); + unlink_path(buf); + + dir_remove(path); +} + + +/** + * ch_close + * ```````` + * Close an open channel. + * + * @ch : pointer to a channel object. + * Return: nothing. + */ +void ch_close(struct channel_t *ch) +{ + switch (ch->role) { + + case CH_PUB: + fifo_close(ch->sub); + fifo_close(ch->nub); + fifo_close(ch->pub); + break; + + case CH_SUB: + fifo_close(ch->pub); + fifo_close(ch->sub); + break; + + default: + panic("Invalid duplex role\n"); + break; + } +} + + + +/****************************************************************************** + * CHANNEL TRANSMISSION + ******************************************************************************/ + +/** + * ch_read + * ``````` + * Read bytes from a channel object into a buffer. + * + * @ch : pointer to a channel structure. + * @dst : destination buffer. + * @max : maximum number of bytes to read. + * Return: nothing. + */ +size_t ch_read(struct channel_t *ch, void *dst, size_t max) +{ + return fd_read(ch->sub, dst, max); +} + + +/** + * ch_write + * ```````` + * Write bytes from a buffer into a channel object. + * + * @ch : pointer to a channel structure. + * @src : source buffer. + * @max : maximum number of bytes to write. + * Return: nothing. + */ +size_t ch_write(struct channel_t *ch, void *src, size_t nbytes) +{ + return fd_write(ch->pub, src, nbytes); +} + + diff --git a/fork/channel.h b/fork/channel.h new file mode 100644 index 0000000..13ad302 --- /dev/null +++ b/fork/channel.h @@ -0,0 +1,38 @@ +#ifndef _CHANNEL_H +#define _CHANNEL_H +#include <stdbool.h> + +/****************************************************************************** + * MODES AND OPTIONS + ******************************************************************************/ +#define CH_ROLE(mode) (((mode) & CH_PUB) == CH_PUB) ? CH_PUB : CH_SUB +#define CH_CREATE(mode) (((mode) & CH_NEW) == CH_NEW) ? true : false + +enum ch_role { CH_NEW = 0, CH_PUB = 1<<0, CH_SUB = 1<<1 }; + + +/****************************************************************************** + * CHANNEL OBJECT + ******************************************************************************/ +struct channel_t { + enum ch_role role; + int pub; + int sub; + int nub; + int buf; + int log; +}; + + +/****************************************************************************** + * CHANNEL METHODS + ******************************************************************************/ +void ch_open (struct channel_t *ch, const char *path, int mode); +void ch_close (struct channel_t *ch); +void ch_remove (const char *path); + +size_t ch_read (struct channel_t *ch, void *dst, size_t max); +size_t ch_write(struct channel_t *ch, void *src, size_t nbytes); + + +#endif diff --git a/fork/daemon.c b/fork/daemon.c new file mode 100644 index 0000000..11357a7 --- /dev/null +++ b/fork/daemon.c @@ -0,0 +1,173 @@ +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ipc.h> + +#include "../file/file.h" +#include "../util/debug.h" +#include "../text/textutils.h" +#include "daemon.h" +#include "signals.h" + + +#define PERMS (0666) + + +/****************************************************************************** + * PID MANAGEMENT + * + * Once a process has been daemonized and is executing in the background, + * client processes need some means of getting in touch with the daemon. + * The traditional kludge is for a daemon to leave behind a "pidfile" at + * some predetermined location, soon after forking into the background. + * + * A pidfile is, unsurprisingly, a small file containing the pid (process + * id) of the daemon. The daemon should create one soon after it begins to + * execute, and delete the file before it terminates. + ******************************************************************************/ + +/** + * pidfile - read or write a pidfile for the current process + * @path: path to the pidfile + * @mode: mode to open the pidfile with (influences return value) + */ +int pidfile(const char *path, const char *mode) +{ + FILE *fp; + int pid; + + fp = fopen(path, mode); + pid = getpid(); + + if (*mode == 'w') { + (fp) ? fprintf(fp, "%d", pid) + : bye("daemon: Cannot open pidfile for writing."); + } + else if (*mode == 'r') { + (fp) ? fscanf(fp, "%d", &pid) + : bye("daemon: No pidfile. Is the daemon running?"); + } + else + bye("daemon: Invalid mode supplied to pidfile()."); + + fclose(fp); + return pid; +} + +void pidw(const char *path, int pid) +{ + static char pidstr[PATHSIZE]; + int fd; + + sprintf(pidstr, "%d", pid); + + fd = open(path, O_CREAT|O_WRONLY, 0666); + write(fd, pidstr, strlen(pidstr)); + close(fd); +} + + +int pidr(const char *path) +{ + static char pidstr[PATHSIZE]; + int fd; + + fd = open(path, O_RDONLY); + read(fd, pidstr, 16); + close(fd); + + return atoi(pidstr); +} + + +/** + * get_ids + * ``````` + * Assign a struct containing all [p[p]gu]id values for the calling process. + * + * @id : id struct + * Return: nothing + */ +void get_ids(struct ids_t *id) +{ + id->process = getpid(); + id->pgroup = getpgrp(); + id->parent = getppid(); + id->group = getgid(); + id->user = getuid(); +} + + +/** + * set_ids + * ``````` + * Set the process/user/group ids for the calling process. + * + * @id : id struct + * Return: nothing + */ +void set_ids(struct ids_t *id) +{ + setgid(id->group); + setuid(id->user); +} + +/****************************************************************************** + * DAEMONIZE + ******************************************************************************/ + +int fork_daemon(void) +{ + int pid; + int i; + + pid = fork(); + + /* Return if parent or error */ + if (pid != 0) { + return pid; + } + + /* Close files inherited from parent */ + for (i=0; i<NOFILE; i++) { + close(i); + } + + umask(0); /* Reset file access creation mask */ + ignore(SIGCHLD); /* Ignore child death */ + ignore(SIGHUP); /* Ignore terminal hangups */ + setpgrp(); /* Create new process group */ + + return pid; +} + + +int fork_coprocess(void) +{ + int pid; + int i; + + pid = fork(); + + /* Return if parent or error */ + if (pid != 0) { + return pid; + } + + /* Close files inherited from parent */ + for (i=0; i<NOFILE; i++) { + close(i); + } + + ignore(SIGCHLD); /* Ignore child death */ + ignore(SIGHUP); /* Ignore terminal hangups */ + setpgrp(); /* Create new process group */ + + return pid; +} + + diff --git a/fork/daemon.h b/fork/daemon.h new file mode 100644 index 0000000..b49623d --- /dev/null +++ b/fork/daemon.h @@ -0,0 +1,28 @@ +#ifndef _DAEMON_LIBRARY_H +#define _DAEMON_LIBRARY_H +#include <unistd.h> + +struct ids_t { + pid_t process; + pid_t pgroup; + pid_t parent; + gid_t group; + uid_t user; +}; + +void get_ids(struct ids_t *id); +void set_ids(struct ids_t *id); + + +/* PID management */ +int pidfile(const char *path, const char *mode); +void pidw(const char *path, int pid); +int pidr(const char *path); + + +/* Process grooming */ +int fork_daemon(void); +int fork_coprocess(void); + + +#endif diff --git a/fork/signals.c b/fork/signals.c new file mode 100644 index 0000000..cdfa529 --- /dev/null +++ b/fork/signals.c @@ -0,0 +1,111 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <stdarg.h> +#include "signals.h" + +/****************************************************************************** + * SIGNAL HANDLING + * + * Overview + * -------- + * Signals are usually Bad News that a process receives from the kernel. + * + * + * Signal Default action Description + * -------------------------------------------------------------------------- + * SIGABRT A Process abort signal. + * SIGALRM T Alarm clock. + * SIGBUS A Access to an undefined memory portion. + * SIGCHLD I Child process terminated/stopped/continued. + * SIGCONT C Continue executing, if stopped. + * SIGFPE A Erroneous arithmetic operation. + * SIGHUP T Terminal hangup. + * SIGILL A Illegal instruction. + * SIGINT T Terminal interrupt. + * SIGKILL T Kill (cannot be caught or ignored). + * SIGPIPE T Write on a pipe with no one to read it. + * SIGQUIT A Terminal quit signal. + * SIGSEGV A Invalid memory reference. + * SIGSTOP S Stop executing (cannot be caught or ignored). + * SIGTERM T Termination signal. + * SIGTSTP S Terminal stop signal. + * SIGTTIN S Background process attempting read. + * SIGTTOU S Background process attempting write. + * SIGUSR1 T User-defined signal 1. + * SIGUSR2 T User-defined signal 2. + * SIGPOLL T Pollable event. + * SIGPROF T Profiling timer expired. + * SIGSYS A Bad system call. + * SIGTRAP A Trace/breakpoint trap. + * SIGURG I High bandwidth data availible at a socket. + * SIGVTALRM T Virtual timer expired. + * SIGXCPU A CPU time limit exceeded. + * SIGXFSZ A File size limit exceeded. + * -------------------------------------------------------------------------- + * + * + * signal.h defines the sigaction() function: + * + * int sigaction(int sig, const struct sigaction *restrict act, + * struct sigaction *restrict oact); + * + * where 'act' specifies the implementation-defined signal handling, and + * 'oact' refers to the location at which the default signal handling + * configuration will be stored. These are of type struct sigaction, which + * is also defined in signal.h. See man(3) signal.h + * + ******************************************************************************/ + +sig_handler_t my_handler; +sig_cleanup_t my_cleanup; + +/** + * catch_signal + * ```````````` + * Call kill_tap() to perform cleanup before process termination. + * + * @signo : signal number sent from the kernel + * Returns: does not return. + */ +void catch_signal(int signo) +{ + my_cleanup(); + + signal(signo, SIG_DFL); + raise(signo); +} + + +/** + * sigreg -- register a function to handle standard signals + */ +void sigreg(sig_handler_t handler, sig_cleanup_t cleanup) +{ + my_handler = handler; + my_cleanup = cleanup; + + /* You say stop */ + signal(SIGINT, my_handler); + signal(SIGABRT, my_handler); + signal(SIGINT, my_handler); + signal(SIGTERM, my_handler); + signal(SIGQUIT, my_handler); + signal(SIGSTOP, my_handler); + + /* I say go */ + signal(SIGPIPE, my_handler); + signal(SIGSEGV, my_handler); + + /* You say goodbye */ + signal(SIGUSR1, my_handler); + signal(SIGUSR2, my_handler); +} + + +void ignore(int signo) +{ + signal(signo, SIG_IGN); +} + diff --git a/fork/signals.h b/fork/signals.h new file mode 100644 index 0000000..7f63a11 --- /dev/null +++ b/fork/signals.h @@ -0,0 +1,13 @@ +#ifndef _SIGNALS_H +#define _SIGNALS_H + +#include <signal.h> + +typedef void (*sig_handler_t)(int signo); +typedef void (*sig_cleanup_t)(void); + +void sigreg(sig_handler_t handler, sig_cleanup_t cleanup); +void catch_signal(int signo); +void ignore(int signo); + +#endif diff --git a/hash/sha256.c b/hash/sha256.c new file mode 100644 index 0000000..98c8788 --- /dev/null +++ b/hash/sha256.c @@ -0,0 +1,375 @@ +/* + * sha256.c - Implementation of the Secure Hash Algorithm-256 (SHA-256). + * + * Implemented from the description on the NIST Web site: + * http://csrc.nist.gov/cryptval/shs.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "sha256.h" +#include "../file/fdio.h" +#include "../file/file.h" +#include "../byte/fmt.h" +#include "../byte/byte.h" + +/****************************************************************************** + * CONSTANTS + ******************************************************************************/ + +/* + * Some helper macros for processing 32-bit values, while + * being careful about 32-bit vs 64-bit system differences. + */ +#if SIZEOF_LONG > 4 + #define TRUNCLONG(x) ((x) & uint32_t) + #define ROTATE(x,n) (TRUNCLONG(((x) >> (n))) | ((x) << (32 - (n)))) + #define SHIFT(x,n) (TRUNCLONG(((x) >> (n)))) +#else + #define TRUNCLONG(x) (x) + #define ROTATE(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) + #define SHIFT(x,n) ((x) >> (n)) +#endif + +/* + * Helper macros used by the SHA-256 computation. + */ +#define CH(x,y,z) (((x) & (y)) ^ (TRUNCLONG(~(x)) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define SUM0(x) (ROTATE((x), 2) ^ ROTATE((x), 13) ^ ROTATE((x), 22)) +#define SUM1(x) (ROTATE((x), 6) ^ ROTATE((x), 11) ^ ROTATE((x), 25)) +#define RHO0(x) (ROTATE((x), 7) ^ ROTATE((x), 18) ^ SHIFT((x), 3)) +#define RHO1(x) (ROTATE((x), 17) ^ ROTATE((x), 19) ^ SHIFT((x), 10)) + + +/* + * Constants used in each of the SHA-256 rounds. + */ +static uint32_t const K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + + +void sha256_init(struct sha256_t *sha) +{ + sha->len_input = 0; + sha->A = 0x6a09e667; + sha->B = 0xbb67ae85; + sha->C = 0x3c6ef372; + sha->D = 0xa54ff53a; + sha->E = 0x510e527f; + sha->F = 0x9b05688c; + sha->G = 0x1f83d9ab; + sha->H = 0x5be0cd19; + sha->len_total = 0; +} + + +/****************************************************************************** + * HELPER + ******************************************************************************/ + +/** + * hash_block + * `````````` + * Hash a single block of input. + * + * @sha : Destination of the hashed input. + * @block: Input block. + * Return: Nothing. + */ +static void hash_block(struct sha256_t *sha, const unsigned char *block) +{ + uint32_t W[64]; + uint32_t a, b, c, d, e, f, g, h; + uint32_t temp; + uint32_t temp2; + int t; + + /* Unpack the block into 64 32-bit words */ + for (t=0; t<16; ++t) { + W[t] = (((uint32_t)(block[t * 4 + 0])) << 24) | + (((uint32_t)(block[t * 4 + 1])) << 16) | + (((uint32_t)(block[t * 4 + 2])) << 8) | + ((uint32_t)(block[t * 4 + 3])); + } + for(t = 16; t < 64; ++t) { + W[t] = TRUNCLONG(RHO1(W[t-2]) + W[t-7] + RHO0(W[t-15]) + W[t-16]); + } + + /* Load the SHA-256 state into local variables */ + a = sha->A; + b = sha->B; + c = sha->C; + d = sha->D; + e = sha->E; + f = sha->F; + g = sha->G; + h = sha->H; + + /* Perform 64 rounds of hash computations */ + for (t=0; t<64; ++t) { + temp = TRUNCLONG(h + SUM1(e) + CH(e, f, g) + K[t] + W[t]); + temp2 = TRUNCLONG(SUM0(a) + MAJ(a, b, c)); + h = g; + g = f; + f = e; + e = TRUNCLONG(d + temp); + d = c; + c = b; + b = a; + a = TRUNCLONG(temp + temp2); + } + + /* Combine the previous SHA-256 state with the new state */ + sha->A = TRUNCLONG(sha->A + a); + sha->B = TRUNCLONG(sha->B + b); + sha->C = TRUNCLONG(sha->C + c); + sha->D = TRUNCLONG(sha->D + d); + sha->E = TRUNCLONG(sha->E + e); + sha->F = TRUNCLONG(sha->F + f); + sha->G = TRUNCLONG(sha->G + g); + sha->H = TRUNCLONG(sha->H + h); + + /* Clear the temporary state */ + memset(W, 0, sizeof(uint32_t) * 64); + a = b = c = d = e = f = g = h = temp = temp2 = 0; +} + + +/****************************************************************************** + * SHA-256 + ******************************************************************************/ + +/** + * sha256_data + * ``````````` + * Add data to the sha256 context prior to final hashing. + * + * @sha : SHA-256 context object. + * @buffer: Buffer with data to be hashed. + * @len : Length (bytes) of @buffer. + * Return : Nothing. + */ +void sha256_data(struct sha256_t *sha, const void *buffer, unsigned long len) +{ + unsigned long templen; + + /* Add to the total length of the input stream */ + sha->len_total += (uint64_t)len; + + /* Copy the blocks into the input buffer and process them */ + while (len > 0) { + if (!(sha->len_input) && len >= 64) { + /* Short cut: no point copying the data twice */ + hash_block(sha, (const unsigned char *)buffer); + buffer = (const void *)(((const unsigned char *)buffer) + 64); + len -= 64; + } else { + templen = len; + + if (templen > (64 - sha->len_input)) { + templen = 64 - sha->len_input; + } + + memcpy(sha->input + sha->len_input, buffer, templen); + + if ((sha->len_input += templen) >= 64) { + hash_block(sha, sha->input); + sha->len_input = 0; + } + + len -= templen; + buffer = (const void *)(((const unsigned char *)buffer) + templen); + + } + } +} + + +/** + * sha256_hash + * ``````````` + * Finalize a SHA-256 context block and output the hash. + * + * @sha : The SHA256 context. + * @hash : The destination hash buffer. + * Return: Nothing. + */ +void sha256_hash(struct sha256_t *sha, unsigned char hash[SHA256_SIZE]) +{ + uint64_t total_bits; + + /* Compute the final hash if necessary */ + if (hash) { + + /* + * Pad the input data to a multiple of 512 bits + */ + + /* Need two blocks of padding. */ + if (sha->len_input >= 56) { + sha->input[(sha->len_input)++] = (unsigned char)0x80; + while (sha->len_input < 64) { + sha->input[(sha->len_input)++] = (unsigned char)0x00; + } + hash_block(sha, sha->input); + sha->len_input = 0; + /* Need one block of padding. */ + } else { + sha->input[(sha->len_input)++] = (unsigned char)0x80; + } + + while (sha->len_input < 56) { + sha->input[(sha->len_input)++] = (unsigned char)0x00; + } + + total_bits = (sha->len_total << 3); + + fmt_u32(sha->input + 56, (uint32_t)(total_bits >> 32)); + fmt_u32(sha->input + 60, (uint32_t)total_bits); + + hash_block(sha, sha->input); + + /* Write the final hash value to the supplied buffer */ + fmt_u32(hash, sha->A); + fmt_u32(hash + 4, sha->B); + fmt_u32(hash + 8, sha->C); + fmt_u32(hash + 12, sha->D); + fmt_u32(hash + 16, sha->E); + fmt_u32(hash + 20, sha->F); + fmt_u32(hash + 24, sha->G); + fmt_u32(hash + 28, sha->H); + } + + /* Fill the entire context structure with zeros to blank it */ + memset(sha, 0, sizeof(struct sha256_t)); +} + + +/****************************************************************************** + * HELPER FUNCTIONS + ******************************************************************************/ + +/** + * print_hash32 + * ```````````` + * Print a 32-byte hash value in hexadecimal ASCII. + * + * @hash : Hash value. + * Return: Nothing. + */ +void sha256_print(unsigned char *hash) +{ + char s[SHA256_SIZE+1]; + int i; + + for (i=0; i<SHA256_SIZE; i++) { + fmt_xlong(&s[i], (unsigned long)hash[i]); + } + s[i] = 0; /* NUL Terminate. */ + + printf("%s\n", s); +} + + +/** + * sha256_sprint + * ````````````` + * Print a 32-byte hash value in hexadecimal ASCII. + * + * @dst : Destination buffer, should be >= SHA256_SIZE+1 bytes. + * @hash : Hash value. + * Return: Nothing. + */ +void sha256_sprint(char *dst, unsigned char *hash) +{ + int i; + for (i=0; i<SHA256_SIZE; i++) { + fmt_xlong(&dst[i], (unsigned long)hash[i]); + } + dst[i] = 0; /* NUL Terminate. */ +} + + +/** + * sha256 + * `````` + * Directly hash a buffer of given length. + * + * @data : Data to be hashed. + * @len : Length (bytes) of @data. + * @hash : Destination of the hash. + * Return: Nothing. + */ +void sha256(const void *data, unsigned long len, unsigned char *hash) +{ + struct sha256_t sha; + + sha256_init(&sha); + sha256_data(&sha, data, len); + sha256_hash(&sha, hash); +} + + +/** + * sha256_fd + * ````````` + * Given a file descriptor, make a SHA-256 sum of the file's contents. + * + * @fd : File descriptor. + * @hash : Destination buffer to contain the hash. + * Return: Nothing (@hash is modified). + */ +void sha256_fd(int fd, unsigned char *hash) +{ + static char buf[4096]; + volatile int loop = 0; + unsigned long size; + struct sha256_t sha; + + sha256_init(&sha); + + fd_setpos(fd, 0, SEEK_SET); + + while ((size = fd_readloop(fd, buf, 4096, &loop)) != EOF) { + sha256_data(&sha, buf, size); + } + + fd_setpos(fd, 0, SEEK_SET); + + sha256_hash(&sha, hash); +} + + +/** + * sha256_cmp + * `````````` + * Compare two SHA-256 sums. + * + * @sum1 : Sum to be compared. + * @sum2 : Sum to be compared. + * Return: < 0 if sum1 < sum2, == 0 if sum1 == sum2, > 0 if sum1 > sum2. + */ +int sha256_cmp(unsigned char *sum1, unsigned char *sum2) +{ + return byte_diff(sum1, SHA256_SIZE, sum2); +} + + diff --git a/hash/sha256.h b/hash/sha256.h new file mode 100644 index 0000000..d3f360d --- /dev/null +++ b/hash/sha256.h @@ -0,0 +1,34 @@ +#ifndef _SHA256_H +#define _SHA256_H +#include <stdint.h> + +/* Number of bytes in a SHA-256 hash value. */ +#define SHA256_SIZE 32 +#define SHA256_BUF 33 // NUL byte + + +/* SHA-256 context block. */ +struct sha256_t { + unsigned char input[64]; + uint32_t len_input; + uint32_t A, B, C, D, E, F, G, H; + uint64_t len_total; +}; + + +void sha256_init(struct sha256_t *sha); +void sha256_data(struct sha256_t *sha, const void *buffer, unsigned long len); +void sha256_hash(struct sha256_t *sha, unsigned char hash[SHA256_SIZE]); + +void sha256 (const void *data, unsigned long len, unsigned char *hash); +void sha256_fd (int fd, unsigned char *hash); +int sha256_cmp(unsigned char *sum1, unsigned char *sum2); + + +void sha256_print(unsigned char *hash); +void sha256_sprint(char *dst, unsigned char *hash); + + + +#endif + diff --git a/inet/checksum.c b/inet/checksum.c new file mode 100644 index 0000000..7228eb5 --- /dev/null +++ b/inet/checksum.c @@ -0,0 +1,63 @@ +#include "checksum.h" + +/** + * checksum_ip4 + * ```````````` + * Compute the IPv4 header checksum. + * + * @header : Buffer of unsigned short (may be cast from a struct). + * @len : Size of the header. + * Return : Checksum value. + * + * AUTHOR + * Taken from TCP/IP Illustrated Vol. 2(1995) by Gary R. Wright + * and W. Richard Stevens. Page 236 + */ +unsigned short checksum_ip4(void *header, int len) +{ + long sum = 0; /* assume 32 bit long, 16 bit short */ + + while (len > 1) { + sum += *((unsigned short *)header); + (unsigned short *)header++; + + /* If high order bit is set, fold. */ + if (sum & 0x80000000) { + sum = (sum & 0xFFFF) + (sum >> 16); + } + len -= 2; + } + + /* Take care of leftover byte. */ + if (len) { + sum += (unsigned short) *(unsigned char *)header; + } + + while (sum >> 16) { + sum = (sum & 0xFFFF) + (sum >> 16); + } + + return ~sum; +} + + +unsigned short checksum(unsigned short *buffer, int size) +{ + unsigned long cksum = 0; + + while (size > 1) { + cksum += *buffer++; + size -= sizeof(unsigned short); + } + + if (size) { + cksum += *(unsigned char *) buffer; + } + + cksum = (cksum >> 16) + (cksum & 0xffff); + cksum += (cksum >> 16); + + return (unsigned short) (~cksum); +} + + diff --git a/inet/checksum.h b/inet/checksum.h new file mode 100644 index 0000000..b736db1 --- /dev/null +++ b/inet/checksum.h @@ -0,0 +1,9 @@ +#ifndef _INET_CHECKSUM_H +#define _INET_CHECKSUM_H + + +unsigned short checksum_ip4(void *header, int len); +unsigned short checksum(unsigned short *buffer, int size); + + +#endif diff --git a/inet/ip4.c b/inet/ip4.c new file mode 100644 index 0000000..87ee6c1 --- /dev/null +++ b/inet/ip4.c @@ -0,0 +1,43 @@ +#include "../byte/scan.h" +#include "ip4.h" + + +unsigned int ip4_scan(const char *s, char ip[4]) +{ + unsigned int i; + unsigned int len; + unsigned long u; + + len = 0; + + i = scan_ulong(s,&u); if (!i) return 0; ip[0] = u; s += i; len += i; + if (*s != '.') return 0; ++s; ++len; + i = scan_ulong(s,&u); if (!i) return 0; ip[1] = u; s += i; len += i; + if (*s != '.') return 0; ++s; ++len; + i = scan_ulong(s,&u); if (!i) return 0; ip[2] = u; s += i; len += i; + if (*s != '.') return 0; ++s; ++len; + i = scan_ulong(s,&u); if (!i) return 0; ip[3] = u; s += i; len += i; + + return len; +} + + + +unsigned int ip4_fmt(char *s, const char ip[4]) +{ + unsigned int len; + unsigned int i; + + len = 0; + + i = fmt_ulong(s,(unsigned long) (unsigned char) ip[0]); len += i; if (s) s += i; + if (s) *s++ = '.'; ++len; + i = fmt_ulong(s,(unsigned long) (unsigned char) ip[1]); len += i; if (s) s += i; + if (s) *s++ = '.'; ++len; + i = fmt_ulong(s,(unsigned long) (unsigned char) ip[2]); len += i; if (s) s += i; + if (s) *s++ = '.'; ++len; + i = fmt_ulong(s,(unsigned long) (unsigned char) ip[3]); len += i; if (s) s += i; + + return len; +} + diff --git a/inet/ip4.h b/inet/ip4.h new file mode 100644 index 0000000..b00492d --- /dev/null +++ b/inet/ip4.h @@ -0,0 +1,12 @@ +#ifndef IP4_H +#define IP4_H + +extern unsigned int ip4_scan(const char *src,char *ip); +extern unsigned int ip4_fmt(char *dest,const char *ip); + +#define IP4_ARR 4 +#define IP4_FMT 20 + +#define NEW_IP4(decl) char decl[IP4_ARR] + +#endif diff --git a/inet/ip6.c b/inet/ip6.c new file mode 100644 index 0000000..67db14a --- /dev/null +++ b/inet/ip6.c @@ -0,0 +1,218 @@ +#include "ip6.h" +#include "ip4.h" +#include "../byte/scan.h" +#include "../byte/fmt.h" +#include "../byte/byte.h" + + +/* + * IPv6 addresses are really ugly to parse. + * Syntax: (h = hex digit) + * 1. hhhh:hhhh:hhhh:hhhh:hhhh:hhhh:hhhh:hhhh + * 2. any number of 0000 may be abbreviated as "::", but only once + * 3. The last two words may be written as IPv4 address + */ + +unsigned int ip6_scan(const char *s, char ip[16]) +{ + unsigned int i; + unsigned int len=0; + unsigned int u; + + char suffix[16]; + int prefixlen=0; + int suffixlen=0; + + for (i=0; i<16; i++) { + ip[i]=0; + } + + for (;;) { + if (*s == ':') { + len++; + if (s[1] == ':') { /* Found "::", skip to part 2 */ + s+=2; + len++; + break; + } + s++; + } + + i = scan_0x(s,&u); + if (!i) { + return 0; + } + + if (prefixlen==12 && s[i]=='.') { + /* the last 4 bytes may be written as IPv4 address */ + i = ip4_scan(s, ip+12); + if (i) { + return i+len; + } else { + return 0; + } + } + + ip[prefixlen++] = (u >> 8); + ip[prefixlen++] = (u & 255); + s += i; + len += i; + + if (prefixlen==16) { + return len; + } + } + + /* part 2, after "::" */ + for (;;) { + if (*s == ':') { + if (suffixlen==0) { + break; + } + s++; + len++; + } else if (suffixlen!=0) { + break; + } + + i = scan_0x(s,&u); + + if (!i) { + len--; + break; + } + + if (suffixlen+prefixlen<=12 && s[i]=='.') { + int j=ip4_scan(s,suffix+suffixlen); + if (j) { + suffixlen+=4; + len+=j; + break; + } else { + prefixlen=12-suffixlen; /* make end-of-loop test true */ + } + } + suffix[suffixlen++] = (u >> 8); + suffix[suffixlen++] = (u & 255); + s += i; + len += i; + if (prefixlen+suffixlen==16) { + break; + } + } + + for (i=0; i<suffixlen; i++) { + ip[16-suffixlen+i] = suffix[i]; + } + + return len; +} + + + +unsigned int ip6_scan_flat(const char *s, char ip[16]) +{ + int i; + + for (i=0; i<16; i++) { + + int tmp; + + tmp = fromhex(*s++); + + if (tmp < 0) { + return 0; + } + + ip[i] = tmp << 4; + tmp = fromhex(*s++); + + if (tmp < 0) { + return 0; + } + + ip[i] += tmp; + } + + return 32; +} + + + +unsigned int ip6_fmt(char *s, const char ip[16]) +{ + unsigned int len; + unsigned int i; + unsigned int temp; + unsigned int compressing; + int j; + + len = 0; + compressing = 0; + + for (j=0; j<16; j+=2) { + if (j == 12 && ip6_isv4mapped(ip)) { + temp = ip4_fmt(s, ip+12); + len += temp; + if (s) { + s += temp; + } + break; + } + temp = ((unsigned long) (unsigned char) ip[j] << 8) + + (unsigned long) (unsigned char) ip[j+1]; + + if (temp == 0) { + if (!compressing) { + compressing = 1; + if (j == 0) { + if (s) { + *s++ = ':'; + ++len; + } + } + } + } else { + if (compressing) { + compressing = 0; + if (s) { + *s++ = ':'; + ++len; + } + } + + i = fmt_xlong(s, temp); + len += i; + + if (s) { + s += i; + } + if (j < 14) { + if (s) { + *s++ = ':'; + } + ++len; + } + } + } + /* if (s) *s=0; */ + return len; +} + + + +unsigned int ip6_fmt_flat(char *s, const char ip[16]) +{ + int i; + + if (!s) { + return 32; + } + + for (i=0; i<16; i++) { + *s++ = tohex((unsigned char)ip[i] >> 4); + *s++ = tohex((unsigned char)ip[i] & 15); + } + return 32; +} + diff --git a/inet/ip6.h b/inet/ip6.h new file mode 100644 index 0000000..5baac76 --- /dev/null +++ b/inet/ip6.h @@ -0,0 +1,31 @@ +#ifndef IP6_H +#define IP6_H + +extern unsigned int ip6_scan(const char *src,char *ip); +extern unsigned int ip6_fmt(char *dest,const char *ip); + +extern unsigned int ip6_scan_flat(const char *src,char *); +extern unsigned int ip6_fmt_flat(char *dest,const char *); + +/* + * ip6 address syntax: (h = hex digit), no leading '0' required + * 1. hhhh:hhhh:hhhh:hhhh:hhhh:hhhh:hhhh:hhhh + * 2. any number of 0000 may be abbreviated as "::", but only once + * flat ip6 address syntax: + * hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh + */ + +#define IP6_ARR 8 +#define IP6_FMT 40 + +#define NEW_IP6(decl) char decl[IP6_ARR] + +static const unsigned char V4mappedprefix[12]={0,0,0,0,0,0,0,0,0,0,0xff,0xff}; +static const unsigned char V6loopback[16]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; +static const unsigned char V6any[16]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +#define ip6_isv4mapped(ip) (byte_equal(ip,12,V4mappedprefix)) + +static const char ip4loopback[4] = {127,0,0,1}; + +#endif diff --git a/inet/ping.c b/inet/ping.c new file mode 100644 index 0000000..6566f1b --- /dev/null +++ b/inet/ping.c @@ -0,0 +1,264 @@ +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "../util/time.h" +#include "checksum.h" +#include "ping.h" + +#define ICMP_ECHO 8 +#define ICMP_ECHOREPLY 0 +#define ICMP_MIN 8 /* Minimum 8 byte icmp packet (just header) */ + + +/* IP header */ +struct hdr_ip { + unsigned short h_len_and_vers; // Length of the header and IP version + unsigned short total_len; // Total length of the packet + unsigned short ident; // Unique identifier + unsigned short frag_and_flags; // Flags + unsigned char ttl; + unsigned char proto; // Protocol (TCP, UDP etc) + unsigned short checksum; // IP checksum + unsigned int source_ip; // Source IP address + unsigned int dest_ip; // Destination IP address +}; + + +/* ICMP header */ +struct hdr_icmp { + unsigned char i_type; + unsigned char i_code; // type sub code + unsigned short i_cksum; + unsigned short i_id; + unsigned short i_seq; + /* This is not the std header, but we reserve space for time */ + /*unsigned long timestamp; */ +}; + + +/* Ping statistics */ +struct pingstat { + int tmin; + int tmax; + int tsum; + int nsent; + int nrecv; +}; + + +/*#define (sizeof(struct hdr_icmp) + MAX_PACKET)*/ + + + +int icmp_packet(char *data, int packetsize) +{ + struct hdr_icmp *hdr; + char *ptr; + + hdr = (struct hdr_icmp *)data; + + hdr->i_type = ICMP_ECHO; + hdr->i_code = 0; + hdr->i_id = (unsigned short)getpid(); /* PID or Thread ID */ + hdr->i_cksum = 0; + hdr->i_seq = 0; + + ptr = data + sizeof(struct hdr_icmp); + + if (packetsize < sizeof(struct hdr_icmp)) { + return 0; + } + + /*memset(ptr, 'E', packetsize - sizeof(struct hdr_icmp)); */ + + return 1; +} + + + +void ping_unpack(char *packet, int bytes, struct sockaddr_in *from, struct pingstat *stat) +{ + struct hdr_ip *ip_h; + struct hdr_icmp *icmp_h; + unsigned short ip_h_len; + int triptime; + + ip_h = (struct hdr_ip *)packet; + ip_h_len = (ip_h->h_len_and_vers & 0x0F) * 4; + + if (bytes < ip_h_len + ICMP_MIN) { + printf("Too few bytes from %s\n", inet_ntoa(from->sin_addr)); + return; + } + + icmp_h = (struct hdr_icmp *)(packet + ip_h_len); + + if (icmp_h->i_type != ICMP_ECHOREPLY + && icmp_h->i_type != ICMP_ECHO) { + fprintf(stderr, "non-echo type %d recvd\n", icmp_h->i_type); + return; + } + + if (icmp_h->i_id != (unsigned short)getpid()) { + fprintf(stderr, "someone else's packet!\n"); + return; + } + + /*triptime = clock() - icmp_h->timestamp;*/ + /*stat->tsum += triptime;*/ + + /*if (triptime < stat->tmin) {*/ + /*stat->tmin = triptime;*/ + /*}*/ + /*if (triptime > stat->tmax) {*/ + /*stat->tmax = triptime;*/ + /*}*/ + + stat->nrecv++; + + bytes -= ip_h_len + sizeof(struct hdr_icmp); + + printf("%d bytes from %s:", bytes, inet_ntoa(from->sin_addr)); + printf(" icmp_seq=%d", icmp_h->i_seq); + printf(" time=%d ms", triptime); + printf(" TTL=%d", ip_h->ttl); + printf("\n"); +} + + + +void do_ping(struct sock_t *sock, int npacket, struct pingstat *stat) +{ + static char response[PACKETSIZE_ICMP_SM]; + unsigned short seq_no; + int i; + + seq_no = 1; + + for (i=0; i<npacket; i++) { + int bytes; + struct hdr_icmp *h = (struct hdr_icmp *)sock->data; + + sleep_ms(100); + + h->i_cksum = 0; + /*h->timestamp = clock();*/ + h->i_seq = seq_no++; + h->i_cksum = checksum((unsigned short *)sock->packet, PACKETSIZE_ICMP_SM); + + fprintf(stderr, "sending ICMP\n"); + bytes = sock_send(sock, sock->packet, PACKETSIZE_ICMP_SM); + fprintf(stderr, "sent\n"); + + if (bytes < 0) { + if (errno == ETIMEDOUT) { + printf("timed out\n"); + continue; + } + perror("ping: sendto"); + return; + } + + if (bytes < PACKETSIZE_ICMP) { + fprintf(stdout, "Wrote %d bytes\n", bytes); + } + + fflush(stdout); + + stat->nsent++; + + fprintf(stderr, "scanning ICMP\n"); + bytes = sock_scan(sock, response, PACKETSIZE_ICMP_SM); + fprintf(stderr, "scanned\n"); + + if (bytes < 0) { + if (errno == ETIMEDOUT) { + printf("timed out\n"); + continue; + } + perror("ping: recvfrom"); + return; + } + + ping_unpack(response, bytes, &sock->a_dest, stat); + + sleep_ms(1000); + } + +} + + +int ping(struct sock_t *sock, char *host, int npacket) +{ + struct pingstat stat = {}; + int timeout; + + sock_setup_client(sock, host, ICMP); + + timeout = 100; + + if (!sock_option(sock, SO_RCVTIMEO, timeout)) { + return 0; + } + + timeout = 100; + + if (!sock_option(sock, SO_SNDTIMEO, timeout)) { + return 0; + } + + stat.tmin = 999999999; + stat.tmax = 0; + stat.tsum = 0; + stat.nsent = 0; + stat.nrecv = 0; + + sock_memset_packet(sock, 0); + + icmp_packet(sock->packet, PACKETSIZE_ICMP_SM); + + /* Incipit */ + + if (sock->a_dest.sin_family == AF_INET) { + printf("PING %s (%s): %d data bytes\n", + host, inet_ntoa(addr_dest(sock)), PACKETSIZE_ICMP_SM); + } else { + printf("PING %s: %d data bytes\n", host, PACKETSIZE_ICMP_SM); + } + printf("\n"); + + /* Establish connection and ping. */ + + sock_open(sock); + sock_connect(sock); + do_ping(sock, npacket, &stat); + + /* Statistics */ + + printf("----%s PING Statistics----\n", host); + printf("%d packets transmitted, ", stat.nsent); + printf("%d packets received, ", stat.nrecv); + + if (stat.nsent) { + if (stat.nrecv > stat.nsent) { + printf("-- somebody's printing up packets!"); + } else { + printf("%d%% packet loss", + (int) (((stat.nsent - stat.nrecv) * 100) / stat.nsent)); + } + printf("\n"); + } + + if (stat.nrecv) { + printf("round-trip (ms) min/avg/max = %d/%d/%d\n", + stat.tmin, stat.tsum/stat.nrecv, stat.tmax); + } + + close(sock->socket); + + return 1; +} + diff --git a/inet/ping.h b/inet/ping.h new file mode 100644 index 0000000..70509e5 --- /dev/null +++ b/inet/ping.h @@ -0,0 +1,7 @@ +#ifndef _INET_PING_H +#define _INET_PING_H +#include "socket.h" + +int ping(struct sock_t *sock, char *host, int npacket); + +#endif diff --git a/inet/socket.c b/inet/socket.c new file mode 100644 index 0000000..92000d6 --- /dev/null +++ b/inet/socket.c @@ -0,0 +1,419 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <signal.h> +#include <time.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#include "../fork/daemon.h" +#include "../file/file.h" +#include "../file/fdio.h" +#include "../util/debug.h" +#include "../text/textutils.h" +#include "socketio.h" +#include "socket.h" + + +#define BUFSIZE 8096 + +#define ADDR(addr_ptr) (struct sockaddr *)(addr_ptr) + + +static const struct sockspec_t SOCKSPEC[] = { + /* ICMP */ { AF_INET, SOCK_RAW, IPPROTO_ICMP }, + /* TCP */ { AF_INET, SOCK_STREAM, IPPROTO_IP }, + /* UDP */ { AF_INET, SOCK_DGRAM, IPPROTO_IP }, + /* RAW */ { AF_INET, SOCK_RAW, IPPROTO_IP } +}; + + + +/****************************************************************************** + * SOCK SENSE CONFIGURATION + * + * sock_setup_server -- configure the sock object as a server (listener) + * sock_setup_client -- configure the sock object as a client (requester) + * + ******************************************************************************/ + +/** + * sock_setup_server + * ````````````````` + * Configure a sock type to act as a server (listener). + * + * @sock : the sock object. + * @port : port to listen on. + * @tag : type of socket. + * Return: Nothing. + */ +void sock_setup_server(struct sock_t *sock, int port, enum socktype tag) +{ + sock->t.domain = SOCKSPEC[tag].domain; + sock->t.type = SOCKSPEC[tag].type; + sock->t.protocol = SOCKSPEC[tag].protocol; + + sock->a_orig.sin_family = sock->t.domain; + sock->a_orig.sin_port = htons(port); + sock->a_orig.sin_addr.s_addr = htonl(INADDR_ANY); + + sock->l_orig = sizeof(sock->a_orig); +} + + +/** + * sock_setup_client + * ````````````````` + * Configure a sock object to act as a client (requester). + * + * @sock : the sock object. + * @tag : the type of socket. + * Return: Nothing. + */ +void sock_setup_client(struct sock_t *sock, const char *dst, enum socktype tag) +{ + static char addr[IPV4_ADDR_STRLEN]; + static char port[IPPORT_STRLEN]; + + sock->t.domain = SOCKSPEC[tag].domain; + sock->t.type = SOCKSPEC[tag].type; + sock->t.protocol = SOCKSPEC[tag].protocol; + + /* Got an ip address */ + if (!isalpha(dst[0])) { + strbif(addr, port, dst, ":"); + /* Got a hostname */ + } else { + sock_setup_client_hostname(sock, dst); + return; + } + + sock->a_dest.sin_family = sock->t.domain; + + /*if (port[0] != 0) {*/ + /*sock->a_dest.sin_port = htons(atoi(port));*/ + /*}*/ + /*if (SOCKSPEC[tag].protocol == IPPROTO_ICMP) {*/ + /*sock->a_dest.sin_port = htons(7);*/ + /*} else {*/ + sock->a_dest.sin_port = htons(atoi(port)); + /*}*/ + + inet_pton(sock->t.domain, addr, &sock->a_dest.sin_addr); + + sock->l_dest = sizeof(sock->a_dest); +} + + +/****************************************************************************** + * WRAPPERS AROUND THE BERKELEY SOCKETS API + * + * sock_open -- socket() + * sock_bind -- bind() + * sock_listen -- listen() + * sock_accept -- accept() + * + ******************************************************************************/ + +/** + * sock_open + * ````````` + * Open the listening socket. + * + * @sock : the sock object. + * Return: Nothing. + */ +void sock_open(struct sock_t *sock) +{ + int fd; + + if ((fd = socket(sock->t.domain, sock->t.type, sock->t.protocol)) < 0) { + halt(SIGABRT, "Could not initialize socket"); + } + + sock->socket = fd; +} + + +/** + * sock_bind + * ````````` + * Bind the listening socket to an address. + * + * @sock : the sock object. + * Return: Nothing. + */ +void sock_bind(struct sock_t *s) +{ + if ((bind(s->socket, ADDR(&s->a_orig), s->l_orig)) < 0) { + halt(SIGABRT, "Cannot bind"); + } +} + + +/** + * sock_listen + * ``````````` + * Allow the listening socket to accept connections. + * + * @sock : the sock object. + * Return: Nothing. + */ +void sock_listen(struct sock_t *sock, int backlog) +{ + if (listen(sock->socket, backlog) < 0) { + halt(SIGABRT, "Socket cannot be listened on."); + } +} + + +/** + * sock_accept + * ``````````` + * Attempt to accept incoming connections or datagrams. + * + * @sock : The sock object. + * Return: File descriptor of socket with accepted connection. + * + * NOTE + * This function will block until connection is accepted. + */ +int sock_accept(struct sock_t *s) +{ + int fd; + + fd = accept(s->socket, ADDR(&s->a_dest), &s->l_dest); + + if (fd < 1) { + halt(SIGABRT, "Could not accept on socket."); + } + + return fd; +} + + +/** + * sock_connect + * ```````````` + * Connect to a remote host. + * + * @sock : the sock object; + * Return: Nothing. + */ +void sock_connect(struct sock_t *s) +{ + static int val; + + val = connect(s->socket, ADDR(&s->a_dest), s->l_dest); + + if (val < 0) { + halt(SIGABRT, "Could not connect to %s\n", ip_str(&s->a_dest)); + } else { + printf("Connected to %s\n", ip_str(&s->a_dest)); + } +} + + + + +int sock_disconnect(struct sock_t *sock) +{ + sleep(1); + close(sock->socket); +} + + + + + + +#include <netdb.h> + + +/* addrinfo structure used by getaddrinfo() + * contains the following fields: + * + * struct addrinfo { + * int ai_flags; + * int ai_family; + * int ai_socktype; + * int ai_protocol; + * socklen_t ai_addrlen; + * struct sockaddr *ai_addr; + * char *ai_canonname; + * struct addrinfo *ai_next; + * }; + */ + + +bool sock_setup_client_hostname(struct sock_t *sock, const char *URI) +{ + struct addrinfo *info; + + if (getaddrinfo(URI, NULL, NULL, &info) == 0) { + + sock->t.domain = info[0].ai_family; + sock->t.type = info[0].ai_socktype; + sock->t.protocol = info[0].ai_protocol; + + sock->a_dest.sin_family = sock->t.domain; + + /* The info is dynamically allocated, so we need to copy it. */ + memcpy(&sock->a_dest.sin_addr, info[0].ai_addr, info[0].ai_addrlen); + + sock->l_dest = info[0].ai_addrlen; + + /* Free the allocated structs in the info list. */ + freeaddrinfo(info); + + return true; + } + + return false; +} + + +/* + * SO_DEBUG (bool) + * Enable recording of debugging information. + * + * SO_BROADCAST (bool) + * Send broadcast messages, if protocol supports them. + * + * SO_REUSEADDR (bool) + * bind() should allow the re-use of local addresses. + * + * SO_KEEPALIVE (bool) + * Periodically transmit messages to maintain connection. + * + * SO_LINGER (struct linger) + * Lingers on close() if data is present in the socket. The system + * shall block the process during close() until it can transmit the + * data or until the time expires. Takes a 'linger' structure + * + * int l_onoff indicates whether linger option is enabled + * int l_linger linger time, in seconds + * + * SO_OOBINLINE (bool) + * Leaves received out-of-band data inline. + * + * SO_SNDBUF (int) + * Sets the size of the send buffer. + * + * SO_RCVBUF (int) + * Sets the size of the receive buffer. + * + * SO_DONTROUTE (bool) + * Messages should bypass standard routing facilities. The destination + * shall be on a directly-connected network. + * + * SO_RCVLOWAT (int) + * Sets minimum number of bytes to process for socket input operations. + * Default is 1. Not all implementations allow this option to be set. + * + * SO_RCVTIMEO (struct timeval) + * Sets the timeout value that specifies the maximum amount of time an + * input function waits until it completes. Default is 0, indicating + * a function would never time out. Not all implementations allow this + * option to be set. + * + * SO_SNDLOWAT (int) + * Sets minimum number of bytes to process for socket output operations. + * Not all implementations allow this option to be set. + * + * SO_SNDTIMEO (struct timeval) + * Sets the timeout value that specifies the maximum amount of time an + * output function blocks because flow control prevents data from being + * sent. If send operation has blocked for this time, it will return with + * a partial count or with errno set. Default is 0, indicating a function + * would never time out. Not all implementations allow this option to be + * set. + */ +int sock_option(struct sock_t *sock, int option, int value) +{ + struct linger L; + struct timeval T; + int Z; + bool B; + int r; + + switch (option) { + /* boolean */ + case SO_DEBUG: + case SO_BROADCAST: + case SO_REUSEADDR: + case SO_KEEPALIVE: + case SO_OOBINLINE: + case SO_DONTROUTE: + B = (value) ? true : false; + r = setsockopt(sock->socket, SOL_SOCKET, option, &B, sizeof(B)); + break; + + /* linger */ + case SO_LINGER: + L.l_onoff = (value) ? true : false; + L.l_linger = value; + r = setsockopt(sock->socket, SOL_SOCKET, option, &L, sizeof(L)); + break; + + /* size */ + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_SNDBUF: + case SO_RCVBUF: + Z = value; + r = setsockopt(sock->socket, SOL_SOCKET, option, &Z, sizeof(Z)); + break; + + /* timeval */ + case SO_RCVTIMEO: + case SO_SNDTIMEO: + T.tv_sec = value; + T.tv_usec = 0; + r = setsockopt(sock->socket, SOL_SOCKET, option, &T, sizeof(T)); + break; + + default: + r = 0; + break; + } + + return (r == -1) ? 0 : 1; +} + + +void sock_memset_packet(struct sock_t *sock, char byte) +{ + memset(sock->packet, byte, PACKETSIZE_MAX); +} + +void sock_memset_header(struct sock_t *sock, char byte) +{ + memset(sock->header, byte, PACKETSIZE_MAX); +} + + + +int sock_send(struct sock_t *sock, void *packet, int packetsize) +{ + return fd_write(sock->socket, packet, packetsize); +} + +int sock_scan(struct sock_t *sock, void *packet, int packetsize) +{ + return fd_read(sock->socket, packet, packetsize); +} + + + + + + diff --git a/inet/socket.h b/inet/socket.h new file mode 100644 index 0000000..c3e529a --- /dev/null +++ b/inet/socket.h @@ -0,0 +1,136 @@ +#ifndef _SOCKET_H +#define _SOCKET_H +#include <stdbool.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +/* + * Protocol options + */ +#define PROTO_IP IPPROTO_IP /* dummy for IP */ +#define PROTO_IPV4 IPPROTO_IPV4 /* internet protocol version 4 */ +#define PROTO_IPV6 IPPROTO_IPV6 /* internet protocol version 6 */ +#define PROTO_TCP IPPROTO_TCP /* transmission control protocol */ +#define PROTO_UDP IPPROTO_UDP /* user datagram protocol */ +#define PROTO_ICMP IPPROTO_ICMP /* internet control and messaging protocol */ + +/* + * Internet addresses + */ +#define ADDR_ANY INADDR_ANY +#define ADDR_LOOPBACK INADDR_LOOPBACK +#define ADDR_BROADCAST INADDR_BROADCAST + +/* + * Ports < IPPORT_RESERVED are for privleged + * processes (e.g. the kernel). + * + * Ports > IPPORT_USERRESERVED are usually + * for servers. + */ +#define IPPORT_RESERVED 1024 +#define IPPORT_USERRESERVED 5000 +#define IPPORT_MAXIMUM 65535 +#define IPPORT_STRLEN 6 + +/* Length of an IPv4 string */ +#define IPV4_ADDR_STRLEN 16 + +/* Length of an IPv6 string */ +#define IPV6_ADDR_STRLEN 46 + +/* Maximum size of the accept queue. */ +#define BACKLOG_MAX 64 + + +#define PORT_DNS 53 + + +#define IP4_MIN_PACKET 20 +#define IP4_MIN_HEADER 20 +#define IP4_MAX_HEADER 60 + +#define PACKETSIZE_ICMP 32 +#define PACKETSIZE_ICMP_SM 64 +#define PACKETSIZE_MAX 1024 + + +struct sockspec_t { + int domain; + int type; + int protocol; +}; + + +enum socktype { + ICMP, + TCP, + UDP, + RAW +}; + +#define addr_dest(sock) sock->a_dest.sin_addr +#define addr_orig(sock) sock->a_orig.sin_addr + + +struct sock_t { + /* Options and configuration info */ + struct sockspec_t t; + /* Protocol-specific header */ + char header[IP4_MAX_HEADER]; + /* Packet payload */ + char data[PACKETSIZE_MAX-IP4_MAX_HEADER]; + /* Packet payload. */ + char packet[PACKETSIZE_MAX]; + /* Originating address */ + struct sockaddr_in a_orig; + /* Destination address */ + struct sockaddr_in a_dest; + /* Length of originating address */ + socklen_t l_orig; + /* Length of destination address */ + socklen_t l_dest; + /* Socket file descriptor */ + int socket; +}; + + + +static inline char *ip_str(struct sockaddr_in *addr) +{ + return inet_ntoa(addr->sin_addr); +} + + +void sock_setup_server(struct sock_t *sock, int port, enum socktype tag); +void sock_setup_client(struct sock_t *sock, const char *dst, enum socktype tag); + +int str_to_addr(int family, const char *addr, struct in_addr *dst); +char *addr_to_str(int family, struct in_addr *addr); + +#define A_LOCAL 0 +#define A_REMOTE 1 + +char *sock_address(struct sock_t *sock, int option); + +void sock_open(struct sock_t *sock); +void sock_bind(struct sock_t *sock); +void sock_listen(struct sock_t *sock, int backlog); +int sock_accept(struct sock_t *sock); +void sock_connect(struct sock_t *sock); + +void sock_block(struct sock_t *sock, bool setting); + + +int sock_disconnect(struct sock_t *sock); +bool sock_setup_client_hostname(struct sock_t *sock, const char *URI); + +int sock_option(struct sock_t *sock, int option, int value); + +void sock_memset_packet(struct sock_t *sock, char byte); +void sock_memset_header(struct sock_t *sock, char byte); + +int sock_send(struct sock_t *sock, void *packet, int packetsize); +int sock_scan(struct sock_t *sock, void *packet, int packetsize); + +#endif diff --git a/inet/socketio.c b/inet/socketio.c new file mode 100644 index 0000000..db8ef22 --- /dev/null +++ b/inet/socketio.c @@ -0,0 +1,31 @@ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include "socketio.h" +#include "../util/debug.h" + + +/*void sock_echo(int fd, struct sockaddr_in *addr)*/ +/*{*/ + /*char buf[4096];*/ + /*char head[4096];*/ + /*size_t len;*/ + + /*for (;;) {*/ + /*len = sock_readline(fd, buf, 4096);*/ + /*if (len == 0) {*/ + /*return;*/ + /*} else if (len < 0) {*/ + /*halt(SIGABRT, "sock_readline error\n");*/ + /*}*/ + + /*sprintf(head, "ECHO (%s): ", inet_ntoa(addr->sin_addr));*/ + /*sock_write(fd, head, strlen(head));*/ + + /*if (sock_write(fd, buf, len) != len) {*/ + /*halt(SIGABRT, "sock_write error\n");*/ + /*}*/ + /*}*/ +/*}*/ + + diff --git a/inet/socketio.h b/inet/socketio.h new file mode 100644 index 0000000..ac47753 --- /dev/null +++ b/inet/socketio.h @@ -0,0 +1,27 @@ +#ifndef _SOCKET_IO_H +#define _SOCKET_IO_H +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <signal.h> +#include <time.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <arpa/inet.h> + + +//size_t sock_read(int fd, char *buf, size_t nbytes); +//size_t sock_write(int fd, char *buf, size_t nbytes); +size_t sock_readline(int fd, char *buf, size_t maxlen); + +void sock_echo(int fd, struct sockaddr_in *addr); + +//void sock_echo(int fd); + +#endif diff --git a/text/memchr.c b/text/memchr.c new file mode 100644 index 0000000..69e3d13 --- /dev/null +++ b/text/memchr.c @@ -0,0 +1,102 @@ + +/** + * memchr + * `````` + * Search memory region for a character. + * + * @src : Region to be searched. + * @chr : Character to look for. + * @len : Length of the memory region. + * Return: Address of the character in @src, or else NULL. + */ + + +#define LONGALIGNED(X) ((long)X & (sizeof(long) - 1)) +#define LONGBYTES (sizeof(long)) +#define USE_BYTEWISE(len) ((len) < LONGBYTES) + + +/* + * NUL expands to nonzero if X (long int) contains '\0' + */ + +#if LONG_MAX == 2147483647L +#define NUL(X) (((X)-0x01010101) & ~(X) & 0x80808080) +#elif LONG_MAX == 9223372036854775807L +#define NUL(X) (((X)-0x0101010101010101) & ~(X) & 0x8080808080808080) +#else +#error memchar: long int is neither a 32bit nor a 64bit value +#endif + + +/* + * Expands to nonzero if X contains MASK + */ +#define DETECTCHAR(X,MASK) (NUL(X ^ MASK)) + + + +void *memchr(const void *src_void, int c, size_t len) +{ + const int *src = (const int *)src_void; + int d = c; + + #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) + unsigned long *asrc; + unsigned long mask; + int i; + + while (LONGALIGNED(src)) { + if (!len--) { + return NULL; + } + if (*src == d) { + return (void *)src; + } + src++; + } + /* + * If we get this far, we know that length is large and src is + * word-aligned. + */ + if (!USE_BYTEWISE(len)) { + /* + * The fast code reads the source one word at a time + * and only performs the bytewise search on word-sized + * segments if they contain the search character, which + * is detected by XOR-ing the word-sized segment with a + * word-sized block of the search character and then + * detecting for the presence of NUL in the result. + */ + asrc = (unsigned long *)src; + mask = ((d << 8) | d); + mask = ((mask << 16) | mask); + + for (i=32; i<8*LONGBYTES; i<<=1) { + mask = ((mask << i) | mask); + } + + while (len >= LONGBYTES) { + if (DETECTCHAR(*asrc, mask)) { + break; + } + len -= LONGBYTES; + asrc++; + } + /* + * If there are fewer than LONGBYTES characters left, + * we decay to the bytewise loop. + */ + src = (int *)asrc; + } + #endif /* !PREFER_SIZE_OVER_SPEED */ + + while (len--) { + if (*src == d) { + return (void *)src; + } + src++; + } + return NULL; +} + diff --git a/text/memmem.c b/text/memmem.c new file mode 100644 index 0000000..fa06c90 --- /dev/null +++ b/text/memmem.c @@ -0,0 +1,63 @@ + +/* + * Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LIBC +#define __builtin_expect(expr, val) (expr) +#endif + + +/* Return the first occurrence of NEEDLE in HAYSTACK. */ +void *memmem(const void *haystack, const void *needle) +{ + const char *begin; + const char *final; + size_t len_haystack; + size_t len_needle; + + len_haystack = strlen((const char*)haystack); + len_needle = strlen((const char*)needle); + + final = (const char *)haystack + (len_haystack - len_needle); + + /* + * The first occurrence of the empty string is deemed + * to occur at the beginning of the string. + */ + if (len_needle == 0) { + return (void *)haystack; + } + + /* + * Sanity check, otherwise the loop might search + * through the whole memory. + */ + if (__builtin_expect (len_haystack < len_needle, 0)) { + return NULL; + } + + for (begin=(const char *)haystack; begin<=final; ++begin) { + if (begin[0] == ((const char *)needle)[0] + && !memcmp((const void *)&begin[1], (const void *)((const char *)needle+1), len_needle-1)) + return (void *)begin; + } + + return NULL; +} + diff --git a/text/search.c b/text/search.c new file mode 100644 index 0000000..0455d44 --- /dev/null +++ b/text/search.c @@ -0,0 +1,127 @@ + + +/** + * chrswp + * `````` + * Replace (swap) the first occurence of a char byte with another + * + * @src : memory area to be searched + * @at : char byte to be searched for in 'src' + * @with: char byte which will overwrite the first occurence of 'at' + * @len : maximum length to search + */ +void chrswp(char *src, char at, char with, size_t len) +{ + char *sub; + + if ((sub = (char *)memchr(src, at, len)), sub!=NULL && *sub==at) { + *sub = with; + } +} + + + + + +char *textutils_strstr(const char *h, const char *n) +{ + const char *begin; + const char *end; + size_t hlen; + size_t nlen; + + hlen = strlen(h); + nlen = strlen(n); + + end = h + (hlen - nlen); + + /* + * The first occurrence of the empty string is deemed to occur at + * the beginning of the string. + */ + if (nlen == 0) + return (char *)h; + + /* + * Sanity check, otherwise the loop might search through the whole + * memory. + */ + if (__builtin_expect (hlen < nlen, 0)) + return NULL; + + for (begin=h; begin<=end; ++begin) { + if (begin[0] == n[0] + && !memcmp((const void *)(begin+1),(const void *)(n+1),nlen-1)) + return (char *)begin; + } + return NULL; +} + + +/** + * sbif + * ```` + * Bifurcate a string at into two substrings if a token is found. + * + * @l : destination of left side of string. + * @r : destination of right side of string. + * @str : original string. + * @tok : token to split at. + * Return: nothing. + */ +size_t sbif(char *l, char *r, const char *str, const char *tok) +{ + const char *cur; + char *cpy; + size_t t_len; + size_t s_len; + + s_len = strlen(str); + t_len = strlen(tok); + + if (t_len == 0) { + return -1; + } + + if (__builtin_expect (s_len < t_len, 0)) + return -1; + + cpy = l; // Start with the left string. + + for (cur=str; *cur!='\0'; cur++) { + if (cur[0]==tok[0] && !memcmp((cur+1), (tok+1), (t_len-1))) { + *cpy = '\0'; // NUL-terminate left string. + cpy = r; // We copy the right side now + cur += t_len; // Move cursor past the token + } + *cpy = *cur; + cpy++; + } + *cpy = '\0'; // NUL-terminate right string. + + + return 1; +} + + + +int ntok(const char *str, const char *tok) +{ + size_t toklen; + char *sub; + + int count=0; + + toklen = strlen(tok); + + for (sub = (char *)memmem(str, tok); + sub != NULL; + sub = (char *)memmem((sub+toklen), tok)) + { + count++; + } + + return count; +} + + diff --git a/text/search.h b/text/search.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/text/search.h diff --git a/text/string.c b/text/string.c new file mode 100644 index 0000000..d90515c --- /dev/null +++ b/text/string.c @@ -0,0 +1,152 @@ + +/** + * szero + * ````` + * Given a character buffer, set the contents to '\0'. + * + * @str : pointer to a byte buffer + * Return: nothing. + */ +void szero(char *str) +{ + memset(str, '\0', strlen(str)); +} + + + +/** + * sdup + * ```` + * Copy *str to a newly-alloc'd buffer, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * Return: pointer to a copy of *str, else NULL. + */ +char *sdup(const char *str) +{ + char *copy; + size_t len; + + len = strlen(str) + 1; + copy = malloc(len); + + return copy ? memcpy(copy, str, len) : NULL; +} + + +/** + * sldup + * ````` + * Copy *str to a newly-alloc'd buffer of size len, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * @len : size of buffer (including '\0') + * Return: pointer to a copy of *str, else NULL. + */ +char *sldup(const char *str, size_t max) +{ + char *copy; + size_t len; + size_t end; + + len = strlen(str) + 1; + len = (len > max) ? max : len; // lesser of two weevils + end = len - 1; + + if (!(copy = calloc(1, len))) + return NULL; + + copy[end] = '\0'; + + return memcpy(copy, str, end); +} + + +/** + * slcpy + * ````` + * Writes at most len characters from src to dst. + * + * @dst : destination buffer + * @src : source buffer + * @len : length of source buffer + * Return: number of bytes written. + */ +size_t slcpy(char *dst, const char *src, size_t siz) +{ + const char *s; + char *d; + size_t n; + + d = dst; + s = src; + n = siz; + + /* Copy as many bytes from src as will fit in dst */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* + * Not enough room in dst, add NUL + * and traverse the rest of src + */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} + + +/** + * slcat + * ````` + * Concatenates src and dst in dst. + * + * @dst : destination buffer + * @src : source buffer + * @siz : size of source buffer + * Return: Number of bytes concatenated + */ +size_t slcat(char *dst, const char *src, size_t siz) +{ + char *d; + const char *s; + size_t n; + size_t dlen; + + d = dst; + s = src; + n = siz; + + /* + * Find the end of dst and adjust bytes + * left, but don't go past end + */ + while (n--!=0 && *d!='\0') + d++; + + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return (dlen + (s - src)); /* count does not include NUL */ +} + + diff --git a/text/string.h b/text/string.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/text/string.h diff --git a/text/textutils.c b/text/textutils.c new file mode 100644 index 0000000..c629e9e --- /dev/null +++ b/text/textutils.c @@ -0,0 +1,1138 @@ +/* + * textutils.c -- byte-oriented character and string routines. + * + * Copyright (C) 2012 Jason Linehan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <ctype.h> +#include <stdbool.h> + +#include "../util/debug.h" +#include "textutils.h" + + +/** + * szero + * ````` + * Given a character buffer, set the contents to '\0'. + * + * @str : pointer to a byte buffer + * Return: nothing. + */ +void szero(char *str) +{ + memset(str, '\0', strlen(str)); +} + + + +/** + * sdup + * ```` + * Copy *str to a newly-alloc'd buffer, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * Return: pointer to a copy of *str, else NULL. + */ +char *sdup(const char *str) +{ + char *copy; + size_t len; + + len = strlen(str) + 1; + copy = malloc(len); + + return copy ? memcpy(copy, str, len) : NULL; +} + + +/** + * sldup + * ````` + * Copy *str to a newly-alloc'd buffer of size len, and return a pointer to it. + * + * @str : pointer to a '\0'-terminated char string + * @len : size of buffer (including '\0') + * Return: pointer to a copy of *str, else NULL. + */ +char *sldup(const char *str, size_t max) +{ + char *copy; + size_t len; + size_t end; + + len = strlen(str) + 1; + len = (len > max) ? max : len; // lesser of two weevils + end = len - 1; + + if (!(copy = calloc(1, len))) + return NULL; + + copy[end] = '\0'; + + return memcpy(copy, str, end); +} + + +/** + * slcpy + * ````` + * Writes at most len characters from src to dst. + * + * @dst : destination buffer + * @src : source buffer + * @len : length of source buffer + * Return: number of bytes written. + */ +size_t slcpy(char *dst, const char *src, size_t siz) +{ + const char *s; + char *d; + size_t n; + + d = dst; + s = src; + n = siz; + + /* Copy as many bytes from src as will fit in dst */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* + * Not enough room in dst, add NUL + * and traverse the rest of src + */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} + + +/** + * slcat + * ````` + * Concatenates src and dst in dst. + * + * @dst : destination buffer + * @src : source buffer + * @siz : size of source buffer + * Return: Number of bytes concatenated + */ +size_t slcat(char *dst, const char *src, size_t siz) +{ + char *d; + const char *s; + size_t n; + size_t dlen; + + d = dst; + s = src; + n = siz; + + /* + * Find the end of dst and adjust bytes + * left, but don't go past end + */ + while (n--!=0 && *d!='\0') + d++; + + dlen = d - dst; + n = siz - dlen; + + if (n == 0) + return(dlen + strlen(s)); + + while (*s != '\0') { + if (n != 1) { + *d++ = *s; + n--; + } + s++; + } + *d = '\0'; + + return (dlen + (s - src)); /* count does not include NUL */ +} + + + + +/** + * chrswp + * `````` + * Replace (swap) the first occurence of a char byte with another + * + * @src : memory area to be searched + * @at : char byte to be searched for in 'src' + * @with: char byte which will overwrite the first occurence of 'at' + * @len : maximum length to search + */ +void chrswp(char *src, char at, char with, size_t len) +{ + char *sub; + + if ((sub = (char *)memchr(src, at, len)), sub!=NULL && *sub==at) { + *sub = with; + } +} + + + + + +char *textutils_strstr(const char *h, const char *n) +{ + const char *begin; + const char *end; + size_t hlen; + size_t nlen; + + hlen = strlen(h); + nlen = strlen(n); + + end = h + (hlen - nlen); + + /* + * The first occurrence of the empty string is deemed to occur at + * the beginning of the string. + */ + if (nlen == 0) + return (char *)h; + + /* + * Sanity check, otherwise the loop might search through the whole + * memory. + */ + if (__builtin_expect (hlen < nlen, 0)) + return NULL; + + for (begin=h; begin<=end; ++begin) { + if (begin[0] == n[0] + && !memcmp((const void *)(begin+1),(const void *)(n+1),nlen-1)) + return (char *)begin; + } + return NULL; +} + + +/** + * sbif + * ```` + * Bifurcate a string at into two substrings if a token is found. + * + * @l : destination of left side of string. + * @r : destination of right side of string. + * @str : original string. + * @tok : token to split at. + * Return: nothing. + */ +size_t sbif(char *l, char *r, const char *str, const char *tok) +{ + const char *cur; + char *cpy; + size_t t_len; + size_t s_len; + + s_len = strlen(str); + t_len = strlen(tok); + + if (t_len == 0) { + return -1; + } + + if (__builtin_expect (s_len < t_len, 0)) + return -1; + + cpy = l; // Start with the left string. + + for (cur=str; *cur!='\0'; cur++) { + if (cur[0]==tok[0] && !memcmp((cur+1), (tok+1), (t_len-1))) { + *cpy = '\0'; // NUL-terminate left string. + cpy = r; // We copy the right side now + cur += t_len; // Move cursor past the token + } + *cpy = *cur; + cpy++; + } + *cpy = '\0'; // NUL-terminate right string. + + + return 1; +} + + + +int ntok(const char *str, const char *tok) +{ + size_t toklen; + char *sub; + + int count=0; + + toklen = strlen(tok); + + for (sub = (char *)memmem(str, tok); + sub != NULL; + sub = (char *)memmem((sub+toklen), tok)) + { + count++; + } + + return count; +} + + + + +/* + * Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LIBC +#define __builtin_expect(expr, val) (expr) +#endif + + +/* Return the first occurrence of NEEDLE in HAYSTACK. */ +void *memmem(const void *haystack, const void *needle) +{ + const char *begin; + const char *final; + size_t len_haystack; + size_t len_needle; + + len_haystack = strlen((const char*)haystack); + len_needle = strlen((const char*)needle); + + final = (const char *)haystack + (len_haystack - len_needle); + + /* + * The first occurrence of the empty string is deemed + * to occur at the beginning of the string. + */ + if (len_needle == 0) { + return (void *)haystack; + } + + /* + * Sanity check, otherwise the loop might search + * through the whole memory. + */ + if (__builtin_expect (len_haystack < len_needle, 0)) { + return NULL; + } + + for (begin=(const char *)haystack; begin<=final; ++begin) { + if (begin[0] == ((const char *)needle)[0] + && !memcmp((const void *)&begin[1], (const void *)((const char *)needle+1), len_needle-1)) + return (void *)begin; + } + + return NULL; +} + + +/** + * memchr + * `````` + * Search memory region for a character. + * + * @src : Region to be searched. + * @chr : Character to look for. + * @len : Length of the memory region. + * Return: Address of the character in @src, or else NULL. + */ + + +#define LONGALIGNED(X) ((long)X & (sizeof(long) - 1)) +#define LONGBYTES (sizeof(long)) +#define USE_BYTEWISE(len) ((len) < LONGBYTES) + + +/* + * NUL expands to nonzero if X (long int) contains '\0' + */ + +#if LONG_MAX == 2147483647L +#define NUL(X) (((X)-0x01010101) & ~(X) & 0x80808080) +#elif LONG_MAX == 9223372036854775807L +#define NUL(X) (((X)-0x0101010101010101) & ~(X) & 0x8080808080808080) +#else +#error memchar: long int is neither a 32bit nor a 64bit value +#endif + + +/* + * Expands to nonzero if X contains MASK + */ +#define DETECTCHAR(X,MASK) (NUL(X ^ MASK)) + + + +void *memchr(const void *src_void, int c, size_t len) +{ + const int *src = (const int *)src_void; + int d = c; + + #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) + unsigned long *asrc; + unsigned long mask; + int i; + + while (LONGALIGNED(src)) { + if (!len--) { + return NULL; + } + if (*src == d) { + return (void *)src; + } + src++; + } + /* + * If we get this far, we know that length is large and src is + * word-aligned. + */ + if (!USE_BYTEWISE(len)) { + /* + * The fast code reads the source one word at a time + * and only performs the bytewise search on word-sized + * segments if they contain the search character, which + * is detected by XOR-ing the word-sized segment with a + * word-sized block of the search character and then + * detecting for the presence of NUL in the result. + */ + asrc = (unsigned long *)src; + mask = ((d << 8) | d); + mask = ((mask << 16) | mask); + + for (i=32; i<8*LONGBYTES; i<<=1) { + mask = ((mask << i) | mask); + } + + while (len >= LONGBYTES) { + if (DETECTCHAR(*asrc, mask)) { + break; + } + len -= LONGBYTES; + asrc++; + } + /* + * If there are fewer than LONGBYTES characters left, + * we decay to the bytewise loop. + */ + src = (int *)asrc; + } + #endif /* !PREFER_SIZE_OVER_SPEED */ + + while (len--) { + if (*src == d) { + return (void *)src; + } + src++; + } + return NULL; +} + + + +/** + * match + * ````` + * Locate first occurance of string 'needle' in string 'haystack' + * + * @haystack: the string being searched for a match + * @needle : the pattern being matched in 'haystack' + * Return : The first occurance of 'needle' + */ +char *match(const char *haystack, const char *needle) +{ + size_t len_haystack; + size_t len_needle; + + if (!needle || !haystack) + return NULL; + + len_haystack = strlen(haystack); + len_needle = strlen(needle); + + /* Needle can't be larger than haystack */ + if (len_needle > len_haystack) + return NULL; + + return memmem(haystack, needle); +} + + +/** + * field + * ````` + * Return pointer to a delimited substring (not including delimiter) + * + * @str : the string being matched against + * @delim: the delimiter to be searched for + * Return: pointer to the start of the substring + */ +char *field(const char *string, const char *delimiter) +{ + size_t offset; + char *frame; + + if (!string || !delimiter) + return NULL; + + if (frame = match(string, delimiter), !frame) + return NULL; + + offset = strlen(delimiter); + + return &frame[offset]; +} + + +/** + * pumpf + * ````` + * Write a formatted character string into an auto-allocated buffer + * + * @strp : pointer to a character buffer (will be allocated) + * @fmt : format string + * @... : format string arguments + * Return: length of the formatted string at *strp + */ +void pumpf(char **strp, const char *fmt, ...) +{ + va_list args; + size_t len; + FILE *stream; + + /* Open a new FILE stream. *strp will be dynamically allocated to + * contain characters written to the stream, and len will reflect + * these changes. See man(3) open_memstream. */ + stream = open_memstream(strp, &len); + + if (!stream) + /* Unable to open FILE stream */ + return; + + /* Write formatted output to stream */ + va_start(args, fmt); + vfprintf(stream, fmt, args); + va_end(args); + + fflush(stream); + fclose(stream); +} + + + + +char *trimws(char *str) +{ + char *end; + + /* Trim leading space */ + while (isspace(*str)) + str++; + + /* Check for empty string */ + if (*str == '\0') + return str; + + /* Trim trailing space */ + end = str + strlen(str) - 1; + while (end > str && isspace(*end)) + end--; + + /* Write new NUL terminator */ + *(end+1) = '\0'; + + return str; +} + + + + +size_t catenate(char *dest, size_t max, int n, char *strings[]) +{ + size_t len = 0; + int i; + + for (i=0; i<n; i++) { + len += slcat(dest, strings[i], max); + len += slcat(dest, " ", max); + } + return len; +} + + +size_t tonext(char *str, char tok) +{ + char *match; + size_t len; + + len = strlen(str); + + match = (char *)memchr(str, tok, len); + + return len - strlen(match); +} + + + +bool is_ws(char c) { + switch (c) { + case ' ': + case '\n': + case '\t': + case '\f': + case '\r': + return true; + default: + return false; + } +} + + +/** + * tail + * ```` + * Return pointer to the last character of a string (not including newline). + */ +char *tail(char *string) +{ + return string + strlen(string)-1; +} + + +/** + * trimcpy + * ``````` + * Trim leading/trailing whitespace at src and copy result to dst + * + * @dst : destination buffer (must be allocated) + * @src : source buffer + * Return: number of bytes written to dst. + */ +size_t trimcpy(char *dst, const char *src) +{ + const char *end; + + /* Leading */ + while (isspace(*src)) + src++; + + /* All spaces */ + if (*src == 0) + return 0; + + /* Trailing */ + end = src + strlen(src) - 1; + + while (end > src && isspace(*end)) + end--; + end++; + + return slcpy(dst, src, (end-src)+1); // slcpy adds NUL +} + + +/** + * strip_comments + * `````````````` + * Replace C-like comments with whitespace space characters. + * + * @str : String to strip comment symbols from. + * Return: Does not return. + * + * NOTE + * Multi-line comments are supported. + */ +void strip_comments(char *str) +{ + static bool in_comment = false; + + for (; *str; str++) { + if (in_comment) { + /* Exiting comment zone */ + if (str[0] == '*' && str[1] == '/') { + in_comment = false; + *str++ = ' '; + } + /* Replace comments with space. */ + if (!isspace(*str)) { + *str = ' '; + } + } else { + /* Entering comment zone */ + if (str[0] == '/' && str[1] == '*') { + in_comment = true; + *str++ = ' '; + *str = ' '; + } + } + } +} + + +/** + * getstr + * `````` + * Get a string (NUL-terminated) of input. Returns the number of bytes read. + * + * @dest : Pointer to a string pointer. + * @max : Get no more than n-1 characters. + * @stream : Get the next string from this file stream. + * Return : Number of bytes (characters) read. + * + * NOTE + * Compare the return value with that of fgets(), which returns a pointer + * to the string which was read from the input stream. This is the main + * difference between the two functions. + */ +/*int getstr(char **dest, int n, FILE *stream)*/ +/*{*/ + /*static int len = 0;*/ + /*char *str = NULL;*/ + + /*str = *dest;*/ + + /*[> Initialize <]*/ + /*if (len == 0)*/ + /*len = getc(stream);*/ + + /*if (n>0 && len!=EOF) {*/ + /*while (n-->0) {*/ + /*len = getc(stream);*/ + /**str = len;*/ + + /*if (*str=='\n' || *str==EOF)*/ + /*break;*/ + /*str++;*/ + /*}*/ + /**str = '\0';*/ + /**dest = str;*/ + /*}*/ + /*return (n <= 0) ? 0 : len;*/ +/*}*/ + +/* getline() - line-based string input with automatic allocation + * + * getline() reads an entire line, storing the address of the buffer containing + * the text into *buf. The buffer is NUL-terminated and includes the newline + * character, if a newline delimiter was found. + * + * If *buf is NULL, getline() allocates a buffer for containing the line, which + * must be freed by the user program. Alternatively, before calling getline(), + * *buf can contain a poiner to a malloc()-allocated buffer *len bytes in + * size. If the buffer isn't large enough to hold the line read in, getline() + * grows the buffer with realloc(), updating *buf and *len as necessary. + * + * On success, getline() returns the number of characters read, including the + * newline, but not including the terminating NUL. This value can be used to + * handle embedded NUL characters in the line read. On failure to read a line + * (including end-of-file condition), -1 is returned, and errno may be set. + * getline() always updates *buf and *len to reflect the buffer address and + * size. errno is set to EINVAL if bad parameters are passed to getline(). + * + * XXX: Unlike GNU getline(), this function cannot correctly handle files whose + * last line contains embedded NUL bytes but lacks a final newline character. + * However, the only time this is likely to happen is if getline() is used to + * read binaries. In this exceptional condition, bytes including and following + * the first NUL are not counted as part of the return value. */ +ssize_t getstr(char **buf, size_t *len, FILE *stream) +{ + char *new_buf; + char *nl; + + int my_malloc=0; + int new_len; + int i=0; + + if (!buf || !len) + return set_errno(EINVAL); + + if (*buf == NULL || *len == 0) { + *buf = NULL; + *len = 0; + my_malloc = 1; + } + + if (*len <= 60) + goto alloc; + + while (1) { + + if (fgets(*buf + i, *len - i, stream) == NULL) { + + if (!feof(stream) || i == 0) { + /* + * The read failed with an error, or the + * file stream is empty. + */ + goto error; + + } else { + /* + * The final line contains no newline, and + * the previous fgets() read exactly as many + * characters as remained in the line. + */ + return i; + } + } + + if (feof(stream)) { + /* + * We were able to successfully read at least one + * byte before encountering EOF, but the file did + * not end in a newline. Let's hope the last line + * doesn't contain any NUL bytes. + */ + return i + strlen(*buf + i); + } + + if ((nl = memchr(*buf + i, '\n', *len - i - 1)) == NULL) { + /* + * No newline found. Either we're at the end of a + * file with no newline after its final line, or + * we need to grow the buffer. This chunk of code + * is also used to allocate the initial buffer, + * since realloc(NULL, x) works the same as malloc(x). + */ + i = *len - 1; + + alloc: + + new_len = *len < 60 ? 120 : *len * 2; + + if ((new_buf = realloc(*buf, new_len)) == NULL) { + goto error; + } + + *buf = new_buf; + *len = new_len; + + } else { + /* We have the newline, so we're done. */ + return nl - *buf + 1; + } + + } + + error: + if (my_malloc) { + free(*buf); + *buf = NULL; + *len = 0; + } + return -1; +} + +/* Uncomment to provide a test utility. Delete or rename the above getline() + * function to test GNU getline(), if present in your libc. */ +#if 0 +int main(int argc, char** argv) +{ + char* buf = NULL; int len = 0, ret; + + while (1) { + printf("getline() = %d", ret = getline(&buf, &len, stdin)); + if (ret == -1) { + if (feof(stdin)) printf("; EOF\n"); + else perror("getline"); + break; + } else { + printf("; buf = \""); + fwrite(buf, ret, 1, stdout); + printf("\"; len = %d\n", len); + } + } + + free(buf); + return EXIT_SUCCESS; +} +#endif + + + + + + +/** + * esc_fputs + * + * Write string to file stream, with control characters mapped to readable text. + * + * @str : String to be written (May contain control characters) + * @max : Maximum number of characters to write to @stream. + * @stream: Open file stream. + * Return : Nothing. + * + * TODO + * Make this thing return the number of characters written, cmon. + */ +void esc_fputs(char *str, size_t max, FILE *stream) +{ + char *s; + + while (*str && max >= 0) { + + s = bin_to_ascii(*str++, 1); + + while (*s && --max >= 0) { + fputc(*s++, stream); + } + } +} + + + + +/** + * hex2bin + * ``````` + * Convert the hexadecimal digit to an int. + * + * @c: hexadecimal digit + * + * NOTE + * @c must be one of 0123456789abcdefABCDEF + */ +int hex2bin(int c) +{ + return (isdigit(c)) ? ((c)-'0') : ((((toupper(c))-'A')+10) & 0xf); +} + +/** + * oct2bin + * ``````` + * Convert the octal digit represented by 'c' to an int. + * + * @c: octal digit + * + * NOTE + * @c must be one of 01234567 + */ +int oct2bin(int c) +{ + return (((c)-'0') & 0x7); +} + + +/** + * bin_to_ascii + * ```````````` + * Return a pointer to a string that represents the byte c in escaped form. + * + * @c : A byte, potentially a control character. + * @use_hex: Use hexadecimal escape sequences. + * Returns : A string representing @c in human-readable form. + * + * HISTORY + * Credit to Alan Holub, in "Compiler Construction in C". + */ +char *bin_to_ascii(int c, int use_hex) +{ + static char buf[8]; + + c &= 0xff; + + if (' ' <= c && c < 0x7f && c != '\'' && c != '\\') { + buf[0] = c; + buf[1] = '\0'; + } else { + buf[0] = '\\'; + buf[2] = '\0'; + + switch (c) + { + case '\\': buf[1] = '\\'; break; + case '\'': buf[1] = '\''; break; + case '\b': buf[1] = 'b'; break; + case '\f': buf[1] = 'f' ; break; + case '\t': buf[1] = 't' ; break; + case '\r': buf[1] = 'r' ; break; + case '\n': buf[1] = 'n' ; break; + default : sprintf(&buf[1], use_hex ? "x%03x" : "%03o", c); + } + } + return buf; +} + + + + +/** + * esc + * ``` + * Return the character associated with the escape sequence pointed to + * by *s, and modify *s to point past the sequence. + * + * @s: Pointer to a string holding escape sequence + * + * HISTORY + * From Alan Holub's "Compiler Design in C" + * + * NOTES + * Recognized characters: + * + * \b backspace + * \f formfeed + * \n newline + * \r carriage return + * \s space + * \t tab + * \e ASCII ESC character ('\033') + * \DDD number formed of 1-3 octal digits + * \xDDD number formed of 1-3 hex digits (two required) + * \^C C = any letter. Control code. + */ +int esc(char **s) +{ + register int rval; + + if (**s != '\\') + rval = *((*s)++); + else { + ++(*s); + switch (toupper(**s)) + { + case '\0': + rval = '\\'; + break; + case 'B': + rval = '\b'; + break; + case 'F': + rval = '\f'; + break; + case 'N': + rval = '\n'; + break; + case 'R': + rval = 'r'; + break; + case 'S': + rval = ' '; + break; + case 'T': + rval = '\t'; + break; + case 'E': + rval = '\033'; + break; + case '^': + rval = *++(*s); + rval = toupper(rval) - '@'; + break; + case 'X': + rval = 0; + ++(*s); + if (IS_HEXDIGIT(**s)) { + rval = hex2bin(*(*s)++); + } + if (IS_HEXDIGIT(**s)) { + rval <<= 4; + rval |= hex2bin(*(*s)++); + } + if (IS_HEXDIGIT(**s)) { + rval <<= 4; + rval |= hex2bin(*(*s)++); + } + --(*s); + break; + + default: + if (!IS_OCTDIGIT(**s)) + rval = **s; + else { + ++(*s); + rval = oct2bin(*(*s)++); + if (IS_OCTDIGIT(**s)) { + rval <<= 3; + rval |= oct2bin(*(*s)++); + } + if (IS_OCTDIGIT(**s)) { + rval <<= 3; + rval |= oct2bin(*(*s)++); + } + --(*s); + } + break; + } + ++(*s); + } + return rval; +} + + +void argv_print(int argc, char *argv[]) +{ + int i; + for (i=0; i<argc; i++) { + printf("%s\n", argv[i]); + } +} + + + +/** + * memseq + * `````` + * Sequential memcpy(). Portable version of mempcpy(), a GNU extension. + * Returns a pointer to dst + len after writing src to dst, instead of + * a pointer to the beginning of dst. + */ +char *memseq(void *dst, const void *src, size_t len) +{ + return (char *)memcpy(dst, src, len) + len; +} + + + + + diff --git a/text/textutils.h b/text/textutils.h new file mode 100644 index 0000000..d4aacf6 --- /dev/null +++ b/text/textutils.h @@ -0,0 +1,94 @@ +#ifndef __TEXTUTILS_H +#define __TEXTUTILS_H + +#include <string.h> +#include <ctype.h> +#include <stdbool.h> + +/* Initialization ----------------------------------------------------------- */ +void szero(char *str); + +/* Safe strings ------------------------------------------------------------- */ +char *sdup(const char *str); +char *sldup(const char *str, size_t max); +size_t slcpy(char *dst, const char *src, size_t siz); +size_t slcat(char *dst, const char *src, size_t siz); +size_t sbif(char *l, char *r, const char *str, const char *tok); +#define strbif sbif + +/* String sets -------------------------------------------------------------- */ +size_t catenate(char *dest, size_t max, int n, char *strings[]); +//const char *concat(const char *a, const char *b); +char *match(const char *haystack, const char *needle); +char *field(const char *string, const char *delimiter); +int ntok(const char *str, const char *tok); +void chrswp(char *src, char at, char with, size_t len); + +ssize_t getstr(char **buf, size_t *len, FILE *stream); + +/* Format print ------------------------------------------------------------- */ +void pumpf(char **strp, const char *fmt, ...); + +/* Whitespace --------------------------------------------------------------- */ +size_t trimcpy(char *dst, const char *src); +char *trimws(char *str); + +bool is_ws(char c); + +void strip_comments(char *str); + +char *tail(char *string); + +char *bin_to_ascii(int c, int use_hex); +void esc_fputs(char *str, size_t max, FILE *stream); + +void argv_print(int argc, char *argv[]); + + +/* Raw memory --------------------------------------------------------------- */ +#define memmem textutils_memmem +#define strstr textutils_strstr +#define memchr textutils_memchr + +char *memseq(void *dst, const void *src, size_t len); + +void *textutils_memmem(const void *haystack, const void *needle); +char *textutils_strstr(const char *haystack, const char *needle); +void *textutils_memchr(const void *src_void, int c, size_t len); + +/* Nice macros -------------------------------------------------------------- */ + +#define STRCMP(a,b) (strcmp((a),(b)) == 0) ? true : false +#define isarg(n, string) (STRCMP(argv[(n)], (string))) +#define ARG(n) (argv[(n)]) + +#define STREMPTY(s) (STRCMP((s),"")) + + +#define IS_HEXDIGIT(x) (isdigit(x)||('a'<=(x)&&(x)<='f')||('A'<=(x)&&(x)<='F')) +#define IS_OCTDIGIT(x) ('0'<=(x) && (x)<='7') +int hex2bin(int c); +int oct2bin(int c); +int esc(char **s); + + +/** + * concat + * `````` + * Return pointer to a static value of 2 concatenated strings. + * @a: first string (head) + * @b: second string (tail) + * Return: pointer to the concateneated string, static. + */ +static inline const char *concat(const char *a, const char *b) +{ + #define BIG 9000 + static char buffer[BIG]; + + slcpy(buffer, a, BIG); + slcat(buffer, b, BIG); + + return buffer; +} + +#endif diff --git a/util/bnfop.c b/util/bnfop.c new file mode 100644 index 0000000..4f5b4d7 --- /dev/null +++ b/util/bnfop.c @@ -0,0 +1,218 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> +#include "bnfop.h" + +/****************************************************************************** + * HELPER UTILITIES + ******************************************************************************/ + +/** + * is_ws + * ````` + * Test whether a character is a whitespace character or not. + * + * @c : Character to be tested. + * Return: TRUE if character is whitespace, else FALSE. + */ +static inline bool is_ws(char c) { + switch (c) { + case ' ': + case '\n': + case '\t': + case '\f': + case '\r': + return true; + default: + return false; + } +} + + +/** + * next_word + * ````````` + * Advance a pointer to the next whitespace-delimited word, or EOL. + * + * @ptr : Pointer to a string. + * Return: Pointer to the beginning of the next word, or to EOL. + */ +static inline const char *next_word(const char *ptr) +{ + /* End of string */ + if (*ptr == 0) { + return ptr; + } + + /* + * If the ptr is inside a word already, + * advance to the end of it. + */ + while (!is_ws(*ptr)) { + ptr++; + if (*ptr == 0) { + return ptr; + } + } + + /* + * If the ptr is whitespace, advance to + * the next non-whitespace character. + */ + while (is_ws(*ptr)) { + ptr++; + if (*ptr == 0) { + return ptr; + } + } + + /* We are at the beginning of the next word. */ + return ptr; +} + + +/****************************************************************************** + * BNFOP + ******************************************************************************/ + +/** + * bnfop + * ````` + * Use a BNF format string to validate arguments and print usage. + * + * @argc : Argument count. + * @argv : Argument vector. + * @usage: Format string written in mini-BNF. + * @... : Variable number of arguments (one for each <foo> in @usage). + * Return: TRUE if match, else FALSE. + * + * NOTE + * May abort program execution if a malformed message is detected. + * + * USAGE + * + * With a binary infix operator (@): + * -------------------------------- + * const char *name; + * const char *host; + * + * if (bnfop("prog <name> @ <host>", &name, &host)) { + * my_func(name, host); + * } + * + * + * With a traditional short option flag: + * ------------------------------------ + * const char *name; + * + * if (bnfop("prog -o <name>", &name)) { + * my_func(name); + * } + * + * + * With only a program invocation: + * ------------------------------ + * const char *name; + * + * if (bnfop("prog <name> $", &name)) { + * my_func(name); + * } + * + * + * With no arguments: + * ----------------- + * if (bnfop("prog $", &name)) { + * my_func(name); + * } + * + * + */ +bool bnfop(int argc, char *argv[], const char *usage, ...) +{ + const char *ptr; // Tracks across usage string + char **param; // Points to variable argument list. + va_list args; // The variable argument list + size_t len; // Length of the option word + bool onematch = false; // If the first (program name) option fits + bool twomatch = false; // If the second (option word) option fits + int n = 0; // Number of words processed. + + ptr = usage; + + va_start(args, usage); + + assert(!is_ws(*ptr) && "Leading whitespace in option string.\n"); + + do { + /* Required argument. */ + if (*ptr == '<') { + while (*ptr != '>') { + ptr++; + assert((*ptr != 0) && "Unterminated '<'.\n"); + } + + /* + * Assign the corresponding argv to the + * argument in the variadic argument list + * of the caller. + */ + param = va_arg(args, char **); + *param = argv[n]; + + goto next; + } + + + /* Option word */ + len = strlen(argv[n]); + + if (memcmp(ptr, argv[n], len) == 0) { + if (n == 0) { + onematch = true; /* Program name */ + } else { + twomatch = true; /* Option */ + } + ptr += len; /* advance past the word */ + goto next; + } else { + break; + } + +next: + ptr = next_word(ptr); + + if (*ptr != 0) { + n += 1; + } else { + break; + } + + } while (n != argc); + + va_end(args); /* Clean up after the variable arguments */ + + + /* + * Judgement! + */ + + if (twomatch == true) { + if (*ptr == 0) { + return true; + } else { + fprintf(stderr, "(Usage) %s\n", usage); + exit(1); /* malformed */ + } + } else if (onematch == true) { + if (*ptr == '$') { + return true; + } else { + return false; + } + } + + return false; +} + diff --git a/util/bnfop.h b/util/bnfop.h new file mode 100644 index 0000000..0d3a24f --- /dev/null +++ b/util/bnfop.h @@ -0,0 +1,51 @@ +#ifndef _BNFOP_H +#define _BNFOP_H +#include "cpp.h" +#include <stdbool.h> + +/****************************************************************************** + * Private stuff + ******************************************************************************/ +static const char * UNUSED(__BNF_NOARGS); + +#define BNF_TERM " $ " + +/* + * Assumes bnf() is being called within main(), and that argc and argv + * are present. + */ +#define __bnf(usage, ...) \ + bnfop(argc, argv, usage, __VA_ARGS__) + +/* + * Appends the dummy value NOARGS to the variable argument list, to + * handle cases where no argument is otherwise supplied. Because the + * number of arguments taken from the list is restricted first by the + * value of argc, NOARGS will never be assigned when there truly are + * "no args." + * + * We also append, to every usage string, a hidden ' $ ' value that + * denotes the end of the line, so we can tell the difference between + * a malformed and non-malformed string. I'll explain later. + */ +#define __bnf_append_noargs(usage, ...) \ + __bnf(usage, __VA_ARGS__ VA_COMMA(__VA_ARGS__) __BNF_NOARGS) + + +/****************************************************************************** + * Public stuff + ******************************************************************************/ +/* + * The macro to be used. + */ +#define bnf(usage, ...) \ + __bnf_append_noargs(usage, __VA_ARGS__) + + +bool bnfop(int argc, char *argv[], const char *usage, ...); + + +#define program_invoked_as(name) ((strcmp(name, argv[0]) == 0)) + + +#endif diff --git a/util/build_assert.h b/util/build_assert.h new file mode 100644 index 0000000..6b6f309 --- /dev/null +++ b/util/build_assert.h @@ -0,0 +1,48 @@ +#ifndef CCAN_BUILD_ASSERT_H +#define CCAN_BUILD_ASSERT_H + +/** + * BUILD_ASSERT + * ```````````` + * Assert a build-time dependency. + * + * @cond: the compile-time condition which must be true. + * + * Your compile will fail if the condition isn't true, or can't + * be evaluated by the compiler. This can only be used within a + * function. + * + * USAGE + * + * #include <stddef.h> + * ... + * static char *foo_to_char(struct foo *foo) + * { + * // This code needs string to be at start of foo. + * BUILD_ASSERT(offsetof(struct foo, string) == 0); + * return (char *)foo; + * } + */ +#define BUILD_ASSERT(cond) \ + do { (void) sizeof(char [1 - 2*!(cond)]); } while(0) + +/** + * BUILD_ASSERT_OR_ZERO + * ```````````````````` + * Assert a build-time dependency, as an expression. + * + * @cond: the compile-time condition which must be true. + * + * Your compile will fail if the condition isn't true, or can't be evaluated + * by the compiler. This can be used in an expression: its value is "0". + * + * USAGE + * #define foo_to_char(foo) \ + * ((char *)(foo) \ + * + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0)) + */ +#define BUILD_ASSERT_OR_ZERO(cond) \ + (sizeof(char [1 - 2*!(cond)]) - 1) + + +#endif /* CCAN_BUILD_ASSERT_H */ diff --git a/util/check_type.h b/util/check_type.h new file mode 100644 index 0000000..02d2a9b --- /dev/null +++ b/util/check_type.h @@ -0,0 +1,74 @@ +#ifndef CCAN_CHECK_TYPE_H +#define CCAN_CHECK_TYPE_H + +/** + * check_type + * `````````` + * Issue a warning or build failure if type is not correct. + * + * @expr: the expression whose type we should check (not evaluated). + * @type: the exact type we expect the expression to be. + * + * NOTES + * This macro is usually used within other macros to try to ensure + * that a macro argument is of the expected type. No type promotion + * of the expression is done: an unsigned int is not the same as an + * int! + * + * check_type() always evaluates to 0. + * + * If your compiler does not support typeof, then the best we can do is fail + * to compile if the sizes of the types are unequal (a less complete check). + * + * USAGE + * They should always pass a 64-bit value to _set_some_value! + * + * #define set_some_value(expr) \ + * _set_some_value((check_type((expr), uint64_t), (expr))) + */ + + +/** + * check_types_match + * ````````````````` + * Issue a warning or build failure if types are not same. + * + * @expr1: the first expression (not evaluated). + * @expr2: the second expression (not evaluated). + * + * This macro is usually used within other macros to try to ensure + * that arguments are of identical types. No type promotion of the + * expressions is done: an unsigned int is not the same as an int! + * + * check_types_match() always evaluates to 0. + * + * If your compiler does not support typeof, then the best we can do + * is fail to compile if the sizes of the types are unequal (a less + * complete check). + * + * USAGE + * // Do subtraction to get to enclosing type, but make sure that + * // pointer is of correct type for that member. + * #define container_of(mbr_ptr, encl_type, mbr) \ + * (check_types_match((mbr_ptr), &((encl_type *)0)->mbr), \ + * ((encl_type *) \ + * ((char *)(mbr_ptr) - offsetof(enclosing_type, mbr)))) + */ +#if HAVE_TYPEOF +#define check_type(expr, type) \ + ((typeof(expr) *)0 != (type *)0) + +#define check_types_match(expr1, expr2) \ + ((typeof(expr1) *)0 != (typeof(expr2) *)0) +#else +#include "build_assert.h" +/* Without typeof, we can only test the sizes. */ +#define check_type(expr, type) \ + BUILD_ASSERT_OR_ZERO(sizeof(expr) == sizeof(type)) + +#define check_types_match(expr1, expr2) \ + BUILD_ASSERT_OR_ZERO(sizeof(expr1) == sizeof(expr2)) +#endif /* HAVE_TYPEOF */ + +#endif /* CCAN_CHECK_TYPE_H */ + diff --git a/util/container_of.h b/util/container_of.h new file mode 100644 index 0000000..49e39f3 --- /dev/null +++ b/util/container_of.h @@ -0,0 +1,125 @@ +#ifndef CCAN_CONTAINER_OF_H +#define CCAN_CONTAINER_OF_H +#include <stddef.h> + +#include "check_type.h" + +/** + * container_of + * ```````````` + * Get pointer to enclosing structure + * + * @member_ptr : pointer to the structure member + * @containing_type: the type this member is within + * @member : the name of this member within the structure. + * + * Given a pointer to a member of a structure, this macro does pointer + * subtraction to return the pointer to the enclosing type. + * + * EXAMPLE + * + * struct foo { + * int field_a; + * int field_b; + * ... + * }; + * + * struct info { + * int some_other_field; + * struct foo my_foo; + * }; + * + * static struct info *foo_to_info(struct foo *foo) + * { + * return container_of(foo, struct info, my_foo); + * } + */ +#define container_of(member_ptr, containing_type, member) \ + ((containing_type *) \ + ((char *)(member_ptr) \ + - container_off(containing_type, member)) \ + + check_types_match(*(member_ptr), ((containing_type *)0)->member)) + +/** + * container_off + * ````````````` + * Get offset to enclosing structure + * + * @containing_type: the type this member is within + * @member : the name of this member within the structure. + * + * Given a pointer to a member of a structure, this macro does + * typechecking and figures out the offset to the enclosing type. + * + * EXAMPLE + * + * struct foo { + * int fielda; + * int fieldb; + * ... + * }; + * + * struct info { + * int some_other_field; + * struct foo my_foo; + * }; + * + * static struct info *foo_to_info(struct foo *foo) + * { + * size_t off = container_off(struct info, my_foo); + * return (void *)((char *)foo - off); + * } + */ +#define container_off(containing_type, member) \ + offsetof(containing_type, member) + +/** + * container_of_var + * ```````````````` + * Get pointer to enclosing structure using a variable + * + * @member_ptr : pointer to the structure member + * @container_var: a pointer of same type as this member's container + * @member : the name of this member within the structure. + * + * Given a pointer to a member of a structure, this macro does pointer + * subtraction to return the pointer to the enclosing type. + * + * EXAMPLE + * + * static struct info *foo_to_i(struct foo *foo) + * { + * struct info *i = container_of_var(foo, i, my_foo); + * return i; + * } + */ +#if HAVE_TYPEOF +#define container_of_var(member_ptr, container_var, member) \ + container_of(member_ptr, typeof(*container_var), member) +#else +#define container_of_var(member_ptr, container_var, member) \ + ((void *)((char *)(member_ptr) - \ + container_off_var(container_var, member))) +#endif + +/** + * container_off_var + * ````````````````` + * Get offset of a field in enclosing structure + * + * @container_var: a pointer to a container structure + * @member : the name of a member within the structure. + * + * Given (any) pointer to a structure and a its member name, this + * macro does pointer subtraction to return offset of member in a + * structure memory layout. + */ +#if HAVE_TYPEOF +#define container_off_var(var, member) \ + container_off(typeof(*var), member) +#else +#define container_off_var(var, member) \ + ((char *)&(var)->member - (char *)(var)) +#endif + +#endif /* CCAN_CONTAINER_OF_H */ diff --git a/util/cpp.h b/util/cpp.h new file mode 100644 index 0000000..c921961 --- /dev/null +++ b/util/cpp.h @@ -0,0 +1,46 @@ +#ifndef _CPP_H +#define _CPP_H + + +/* + * Allows you to suppress the "variable declared but not used" warning + * for variables that are used, but perhaps by a macro or perhaps in + * another scope... or just to make the compiler hush. + */ +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) x __attribute__((unused)) +#elif defined(__LCLINT__) +# define UNUSED(x) /*@unused@*/ x +#else +# define UNUSED(x) x +#endif + + + + +/* + * VA_COMMA surrounds its arguments (__VA_ARGS__) with 8 additional + * arguments: one empty argument before (doesn't have to be empty -- + * it's thrown away) and 6 commas and an empty argument after. + * These 8 or more arguments are passed to GET_8TH_ARG, which, as + * its name implies, expands to the eighth argument. All other + * arguments are discarded. + */ +#define GET_8TH_ARG(_0, _1, _2, _3, _4, _5, _6, _7, ...) _7 +#define COMMA , + +#define VA_COMMA(...) \ + GET_8TH_ARG(/*empty*/, ##__VA_ARGS__, COMMA, COMMA, COMMA, COMMA, COMMA, COMMA, /*empty*/) + + +#define VA_IF_NOT_EMPTY(emit, ...) \ + GET_8TH_ARG(/*empty*/, ##__VA_ARGS__, emit, emit, emit, emit, emit, emit, /*empty*/) + + + +#define VA_EMIT(emit, ...) \ + GET_8TH_ARG(/*empty*/, ##__VA_ARGS__, "", "", "", "", "", "", emit) + + +#endif diff --git a/util/debug.c b/util/debug.c new file mode 100644 index 0000000..63b16fd --- /dev/null +++ b/util/debug.c @@ -0,0 +1,231 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <stdarg.h> + +#include "debug.h" + + +#define USE_ERRNO_H + + +/* + * If the macro USE_ERROR_H is defined, the errno, its + * text symbol and description will all be included in + * the abort report. + */ +#ifdef USE_ERRNO_H + +struct error_info { + char *t; + char *m; +}; + +struct error_info e[] = { + {"_" , "_"}, + {"EPERM" , "Operation not permitted"}, + {"ENOENT" , "No such file or directory"}, + {"ESRCH" , "No such process"}, + {"EINTR" , "Interrupted system call"}, + {"EIO" , "I/O error"}, + {"ENXIO" , "No such device or address"}, + {"E2BIG" , "Argument list too long"}, + {"ENOEXEC", "Exec format error"}, + {"EBADF" , "Bad file number"}, + {"ECHILD" , "No child processes"}, + {"EAGAIN" , "Try again"}, + {"ENOMEM" , "Out of memory"}, + {"EACCES" , "Permission denied"}, + {"EFAULT" , "Bad address"}, + {"ENOTBLK", "Block device required"}, + {"EBUSY" , "Device or resource busy"}, + {"EEXIST" , "File exists"}, + {"EXDEV" , "Cross-device link"}, + {"ENODEV" , "No such device"}, + {"ENOTDIR", "Not a directory"}, + {"EISDIR" , "Is a directory"}, + {"EINVAL" , "Invalid argument"}, + {"ENFILE" , "File table overflow"}, + {"EMFILE" , "Too many open files"}, + {"ENOTTY" , "Not a typewriter"}, + {"ETXTBSY", "Text file busy"}, + {"EFBIG" , "File too large"}, + {"ENOSPC" , "No space left on device"}, + {"ESPIPE" , "Illegal seek"}, + {"EROFS" , "Read-only file system"}, + {"EMLINK" , "Too many links"}, + {"EPIPE" , "Broken pipe"}, + {"EDOM" , "Math argument out of domain of func"}, + {"ERANGE" , "Math result not representable"} +}; + +/** + * err + * ``` + * Set the errno global variable and return -1. Can be used in an expression. + * + * @number: error code to set errno. + * + * NOTE + * err() does not interrupt program execution. + */ +int set_errno(int number) +{ + errno = number; + return -1; +} + +#endif /* USE_ERRNO_H */ + + + +/** + * abort_report + * ```````````` + * Print a formatted report to stderr and exit. + * + * @fmt: a printf-style format string + * @...: the variable argument list to the format string + */ +int abort_report(const char *fmt, ...) +{ + char buf[1000]; + va_list args; + + /* Write formatted output to stream */ + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + + #ifdef USE_ERRNO_H + if (errno) + fprintf(stderr, "%s (%d): %s\n", e[errno].t,errno,e[errno].m); + #endif + + fprintf(stderr, "The handler reported: \"%s\"\n", buf); + + raise(SIGABRT); + + return -1; +} + + +/** + * raise_report + * ```````````` + * Print a formatted report to stderr and raise a signal. + * + * @signo: POSIX signal number to raise. + * @fmt : printf-style format string. + * @... : the variable argument list to the format string. + */ +int raise_report(int signo, const char *fmt, ...) +{ + char buf[1000]; + va_list args; + + /* Write formatted output to stream */ + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + + if (signo == SIGABRT) { + fprintf(stderr, "PANIC The handler reported: \"%s\"\n", buf); + } else { + fprintf(stderr, "The handler reported: \"%s\"\n", buf); + } + + #ifdef USE_ERRNO_H + if (errno && signo == SIGABRT) { + fprintf(stderr, "ERROR %s (%d): %s\n", e[errno].t,errno,e[errno].m); + } else if (errno) { + fprintf(stderr, "%s (%d): %s\n", e[errno].t,errno,e[errno].m); + } + #endif + + fprintf(stderr, "ABORT Caught SIGABRT\n"); + + raise(signo); + + return -1; +} + + +/** + * debug_report + * ```````````` + * Print a formatted report to stderr + * + * @signo: POSIX signal number to raise. + * @fmt : printf-style format string. + * @... : the variable argument list to the format string. + */ +int debug_report(const char *fmt, ...) +{ + char buf[1000]; + va_list args; + + /* Write formatted output to stream */ + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + + fprintf(stderr, "%s", buf); + + return 1; +} + + +/****************************************************************************** + * SIGNAL HANDLING + * + * Overview + * -------- + * Signals are usually Bad News that a process receives from the kernel. + * + * + * Signal Default action Description + * -------------------------------------------------------------------------- + * SIGABRT A Process abort signal. + * SIGALRM T Alarm clock. + * SIGBUS A Access to an undefined memory portion. + * SIGCHLD I Child process terminated/stopped/continued. + * SIGCONT C Continue executing, if stopped. + * SIGFPE A Erroneous arithmetic operation. + * SIGHUP T Terminal hangup. + * SIGILL A Illegal instruction. + * SIGINT T Terminal interrupt. + * SIGKILL T Kill (cannot be caught or ignored). + * SIGPIPE T Write on a pipe with no one to read it. + * SIGQUIT A Terminal quit signal. + * SIGSEGV A Invalid memory reference. + * SIGSTOP S Stop executing (cannot be caught or ignored). + * SIGTERM T Termination signal. + * SIGTSTP S Terminal stop signal. + * SIGTTIN S Background process attempting read. + * SIGTTOU S Background process attempting write. + * SIGUSR1 T User-defined signal 1. + * SIGUSR2 T User-defined signal 2. + * SIGPOLL T Pollable event. + * SIGPROF T Profiling timer expired. + * SIGSYS A Bad system call. + * SIGTRAP A Trace/breakpoint trap. + * SIGURG I High bandwidth data availible at a socket. + * SIGVTALRM T Virtual timer expired. + * SIGXCPU A CPU time limit exceeded. + * SIGXFSZ A File size limit exceeded. + * -------------------------------------------------------------------------- + * + * + * signal.h defines the sigaction() function: + * + * int sigaction(int sig, const struct sigaction *restrict act, + * struct sigaction *restrict oact); + * + * where 'act' specifies the implementation-defined signal handling, and + * 'oact' refers to the location at which the default signal handling + * configuration will be stored. These are of type struct sigaction, which + * is also defined in signal.h. See man(3) signal.h + * + ******************************************************************************/ diff --git a/util/debug.h b/util/debug.h new file mode 100644 index 0000000..96346cf --- /dev/null +++ b/util/debug.h @@ -0,0 +1,117 @@ + +#ifndef _DEBUG_H +#define _DEBUG_H +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include "util.h" + +#define PRIVATE static + + +#define assert_msg(test, msg) assert(test && msg); + +/* e.g. + when(x) { + printf("assertion will fail\n"); + } + */ +#define when(x) for (; (x); assert(x)) + + +/* + * Exit the program and print a diagnostic message + */ +#define bye(...) \ + (NUM_ARGS(__VA_ARGS__) == 1) ? abort_report(__VA_ARGS__, "") \ + : abort_report(__VA_ARGS__) + +/* + * Raise a signal and print an error. + */ + +#define halt(sig, ...) \ + (NUM_ARGS(__VA_ARGS__) == 1) ? raise_report(sig, __VA_ARGS__, "") \ + : raise_report(sig, __VA_ARGS__) + + + +/* + * Drop a printf statement with the line number, filename, + * and function name (if using GCC) included in the output. + */ + +//#define NO_DEBUG + + +#define WHERE_FMT "%s:%d: " +#define WHERE_ARG __FILE__, __LINE__ + +#define DEBUG_1(...) fprintf(stderr, WHERE_FMT __VA_ARGS__, WHERE_ARG) +#define DEBUG_2(fmt, ...) fprintf(stderr, WHERE_FMT fmt, WHERE_ARG, __VA_ARGS__) +#define DEBUG_3 DEBUG_2 +#define DEBUG_4 DEBUG_2 +#define DEBUG_5 DEBUG_2 +#define DEBUG_6 DEBUG_2 +#define DEBUG_7 DEBUG_2 +#define DEBUG_8 DEBUG_2 +#define DEBUG_9 DEBUG_2 + +#define __DEBUG(N, ...) DEBUG_ ## N(__VA_ARGS__) // N -> 1 +#define _DEBUG(N, ...) __DEBUG(N, __VA_ARGS__) // N -> (1) + +//#define NO_DEBUG + +#if defined(NO_DEBUG) +#define DEBUG(...) /* nothing */ +#define __ENTER /* nothing */ +#define __LEAVE /* nothing */ +#define ___BREAKPOINT___ /* nothing */ +#else +#define DEBUG(...) _DEBUG(NUM_ARGS(__VA_ARGS__), __VA_ARGS__) +#define __ENTER DEBUG("Entering %s\n", __func__) +#define __LEAVE DEBUG("Leaving %s\n", __func__) +#define ___BREAKPOINT___ DEBUG("Breakpoint") +#endif + + +#define PANIC_FMT "PANIC %s:%d: " +#define PANIC_ARG __FILE__, __LINE__ + +#define PANIC_1(...) fprintf(stderr, PANIC_FMT __VA_ARGS__, PANIC_ARG) +#define PANIC_2(fmt, ...) fprintf(stderr, PANIC_FMT fmt, PANIC_ARG, __VA_ARGS__) +#define PANIC_3 PANIC_2 +#define PANIC_4 PANIC_2 +#define PANIC_5 PANIC_2 +#define PANIC_6 PANIC_2 +#define PANIC_7 PANIC_2 +#define PANIC_8 PANIC_2 +#define PANIC_9 PANIC_2 + +#define __PANIC(N, ...) PANIC_ ## N(__VA_ARGS__) // N -> 1 +#define _PANIC(N, ...) __PANIC(N, __VA_ARGS__) // N -> (1) +#define PANIC(...) _PANIC(NUM_ARGS(__VA_ARGS__), __VA_ARGS__) + + +#define panic(...) \ + do { \ + PANIC(__VA_ARGS__); \ + halt(SIGABRT, "Internal error."); \ + } while (0) + + +int abort_report(const char *fmt, ...); +int debug_report(const char *fmt, ...); +int raise_report(int signo, const char *fmt, ...); +int internal_error(int signo, const char *fmt, ...); + + +//typedef void (*sig_handler_t)(int signo); +//void sigreg(sig_handler_t handler); + +int set_errno(int number); + + +#endif + diff --git a/util/time.c b/util/time.c new file mode 100644 index 0000000..a337acb --- /dev/null +++ b/util/time.c @@ -0,0 +1,87 @@ +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include "time.h" + +/* + * struct timeval { + * time_t tv_sec; // Seconds + * suseconds_t tv_usec; // Microseconds + * } + */ + +#define NANO_IN_MILLI 1000000 +#define NS_TO_MS(n) ((n)/NANO_IN_MILLI) +#define MS_TO_NS(m) ((m)*NANO_IN_MILLI) + + +int clock_seconds(void) +{ + return clock() / CLOCKS_PER_SEC; +} + + +/** + * ms_since_midnight + * ````````````````` + * Return: number of milliseconds since midnight (12:00AM) + */ +int ms_since_midnight(void) +{ + struct timeval tv; + + if (gettimeofday(&tv, NULL) != 0) { + return -1; + } + + return ((tv.tv_sec % SEC_IN_DAY) * 1000 + tv.tv_usec / 1000); +} + + +/** + * sec_since_midnight + * `````````````````` + * Return: number of seconds since midnight (12:00AM) + */ +int sec_since_midnight(void) +{ + struct timeval tv; + + if (gettimeofday(&tv, NULL) != 0) { + return -1; + } + + return (tv.tv_sec % SEC_IN_DAY); +} + + + +/** + * sleep_ns + * ```````` + * Sleep for the specified number of nanoseconds. + * + * @nanoseconds: Number of nanoseconds to sleep. + * Return : Nothing. + */ +void sleep_ns(long nanoseconds) +{ + const struct timespec ts = { .tv_nsec = nanoseconds }; + nanosleep(&ts, NULL); +} + + +/** + * sleep_ms + * ```````` + * Sleep for the specified number of milliseconds. + * + * @milliseconds : Number of milliseconds to sleep. + * Return : Nothing. + */ +void sleep_ms(long milliseconds) +{ + const struct timespec ts = { .tv_nsec = MS_TO_NS(milliseconds) }; + nanosleep(&ts, NULL); +} + diff --git a/util/time.h b/util/time.h new file mode 100644 index 0000000..de67160 --- /dev/null +++ b/util/time.h @@ -0,0 +1,16 @@ +#ifndef _TIMEUTIL_H +#define _TIMEUTIL_H +#include <time.h> +#include <sys/time.h> + +#define SEC_IN_DAY (86400) + +int ms_since_midnight(void); +int sec_since_midnight(void); + +void sleep_ns(long nanoseconds); +void sleep_ms(long milliseconds); + +int clock_seconds(void); + +#endif diff --git a/util/util.c b/util/util.c new file mode 100644 index 0000000..956a228 --- /dev/null +++ b/util/util.c @@ -0,0 +1,73 @@ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> +#include <time.h> + + +/** + * uctohex -- convert a byte into two hex characters + * @lower: the lower nibble + * @upper: the upper nibble + * @value: the value to decompose + */ +inline void uctohex(char *lower, char *upper, char value) +{ + /* Mask the upper and lower bytes of the value */ + *lower = (value & 0xf); + *upper = (value & 0xf0) >> 4; + /* Is each nibble a letter or integer? */ + *lower = (*lower > 9) ? ('a' + (*lower - 9)) : (*lower + '0'); + *upper = (*upper > 9) ? ('a' + (*upper - 9)) : (*upper + '0'); +} + + +/** + * strtohex -- convert a string of bytes into a string of hex characters + * @dst: the destination buffer + * @src: the source buffer + * @len: the size of the source buffer + */ +inline void strtohex(char *dst, char *src, size_t len) +{ + size_t i; + size_t k; + + for ((i=k=0); (i<len && k<(len*2)-1); (i++, k+=2)) { + uctohex(&dst[k], &dst[(k+1)], src[i]); + } + dst[k] = '\0'; /* Make dst a proper string */ +} + + +/** + * sha256gen -- return a hex string of the sha256sum + * @hex : will be filled with the sha256sum. Must be at least 64 bytes + * @hash: the data used to generate the sha256sum + */ +/*void sha256gen(char *hex, void *hash)*/ +/*{*/ + /*#define SHA32 32*/ + /*sph_sha256_context context;*/ + /*char output[SHA32];*/ + + /*sph_sha256_init(&context);*/ + /*sph_sha256(&context, hash, sizeof(hash));*/ + /*sph_sha256_close(&context, output);*/ + + /*strtohex(hex, output, SHA32);*/ +/*}*/ + + +/** + * nsleep -- nanosleep made easy + */ +/*void nsleep(long nanoseconds)*/ +/*{*/ + /*const struct timespec ts = { .tv_nsec = nanoseconds };*/ + /*nanosleep(&ts, NULL);*/ +/*}*/ + + diff --git a/util/util.h b/util/util.h new file mode 100644 index 0000000..aed5f4a --- /dev/null +++ b/util/util.h @@ -0,0 +1,631 @@ +#ifndef _UTIL_H +#define _UTIL_H + + +/* + * + * "If you lie to your compiler, it will get its revenge." + * + * -- Henry Spencer + * + * + */ + + +/****************************************************************************** + * COMPILE-TIME CHECKS AND BALANCES + ******************************************************************************/ + +/** + * __CHECKER__ + * ``````````` + * Provide compile-time reminders, diagnostics, warnings/errors + * + * USAGE + * Define the macro __CHECKER__ + */ +#ifndef __CHECKER__ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void*)0) +#define BUILD_BUG_ON(condition) +#define BUILD_BUG() (0) +#else /* __CHECKER__ */ + + + +/** + * BUILD_BUG_ON + * ```````````` + * Break compile if some condition is true. + * + * @condition: the condition which the compiler should know is false. + * + * USAGE + * + * If you have some code which relies on certain constants + * being equal, or other compile-time-evaluated conditions, + * you can use BUILD_BUG_ON to detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative + * array, but gcc (as of 4.4) only emits that error for obvious + * cases (eg. not arguments to inline functions). So as a fallback + * we use the optimizer; if it can't prove the condition is false, + * it will cause a link error on the undefined + * + * "__build_bug_on_failed". + * + * This error message can be harder to track down though, hence the + * two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif + + + + +/** + * BUILD_BUG + * ````````` + * Break the compile if called. + * + * If you have some code that you expect the compiler + * to eliminate at build time, you should use BUILD_BUG + * to detect if it is, in fact, unexpectedly used. + */ +#define BUILD_BUG() \ + do { \ + extern void __build_bug_failed(void) \ + __linktime_error("BUILD_BUG failed"); \ + __build_bug_failed(); \ + } while (0) + + + + +/** + * BUILD_BUG_ON_NOT_POWER_OF_2 + * ``````````````````````````` + * Force a compilation error if a constant expression is + * not a power of 2 + * + * @n: Numeric constant to be checked. + */ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + + + + +/** + * BUILD_BUG_ON_ZERO / NULL + * ```````````````````````` + * @e: Condition (expression). + * + * Force a compilation error if condition is true, but also + * produce a result (of value 0 and type size_t), so the + * expression can be used e.g. in a structure initializer + * (or where-ever else comma expressions aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) + + + + +/** + * BUILD_BUG_ON_INVALID + * ```````````````````` + * @e: Condition (expression) + * + * Permits the compiler to check the validity of the expression, + * but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + + + + +#endif /* __CHECKER__ */ + + + + +/****************************************************************************** + * TYPE CHECKING: "GOOD ENOUGH" + ******************************************************************************/ + +/** + * VARIABLE + * ```````` + * Test that a macro argument is a variable. + * + * @v: Macro argument, maybe a variable, but maybe not! + * + * NOTES + * The trick is to declare an enumeration inside a new scope, + * with the same name as the (potential) variable. Failure + * will generate a compiler error. + */ +#define VARIABLE(v) { enum v { }; } + + +/** + * V_ASSIGN + * ```````` + * Assign a value to a variable inside a macro. + * + * @variable: Macro argument, hopefully a variable. + * @value : Value to be assigned to the variable. + */ +#define V_ASSIGN(variable, value) \ + VARIABLE(variable); variable = value; + + +/** + * __same_type + * ``````````` + * Test that two types/vars (ignoring qualifiers) are the same. + * + * @a: Variable with some type. + * @b: Variable with some type. + */ +#ifndef __same_type +#define __same_type(a,b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + + +/** + * __must_be_array + * ``````````````` + * Test that a pointer was actually declared as an array (contiguous). + * + * @arr: Pointer to a potential array. + * + * NOTES + * &a[0] degrades to a pointer, which is a different type than + * the address of a declared array. Usually the compiler will + * print a warning and move on, but BUILD_BUG_ON_ZERO ensures + * the whole thing gets called off. + */ +#ifdef __CHECKER__ +#define __must_be_array(arr) 0 +#else +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#endif + + +/** + * ARRAY_SIZE + * `````````` + * Determine the number of elements in a dynamic array (at runtime!). + * + * @arr: Pointer to an array. + * + * CAVEAT + * The array must have been declared as a bona fide array, e.g. + * + * my_array[13]; + * + * An allocated region of memory, which is usually "the same thing" + * as an array, is not valid here. + * + * NOTES + * This one is a pretty venerable trick; the version from the Linux + * kernel polices a common mistake by testing that the argument be + * a true array. + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + + + +/****************************************************************************** + * OPTIMIZATION + ******************************************************************************/ + +/** + * likely/unlikely + * ``````````````` + * Branch prediction macros. + * + * NOTES + * gcc will emit instructions causing the branch prediction to + * favor the instruction on the "likely" side, re-arranging the + * jumps so that it gets tested first. + * + * USAGE + * if (unlikely(c < 4)) { + * special code + * } + * + * CAVEAT + * There has been some evidence that performance improvements here are + * negligible in all but the most special cases. + */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + + + +/****************************************************************************** + * MATHS + ******************************************************************************/ + +/** + * _abs_ + * ````` + * Absolute value for quantities <= 32 bits. + * + * @x : unsigned/signed long, short, and char. + * Return: signed long + */ +#define _abs_(x) \ +{ \ + long ret; \ + if (sizeof(x) == sizeof(long)) { \ + long __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } else { \ + int __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } \ + ret; \ +} + + +/** + * _abs64_ + * ``````` + * Absolute value for quantities <= 64 bits. + * + * @x : uint64_t, long long, etc. + * Return: uint64_t + */ +#define _abs64_(x) \ +{ \ + uint64_t __x = (x); \ + (__x < 0) ? -__x : __x; \ +} + + + +/** + * min + * ``` + * Find the minimum value between two varaibles. + * + * @x: Numeric variable. + * @y: Numeric variable. + * + * NOTE + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define min(x, y) \ +{ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; \ +} + + +/** + * max + * ``` + * Find the maximum value between two variables. + * + * @x: Numeric variable. + * @y: Numeric variable. + * + * NOTE + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define max(x, y) \ +{ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; \ +} + + + +/** + * min3 + * ```` + * Find the minimum value among three varaibles. + * + * @x: Numeric variable. + * @y: Numeric variable. + * @z: Numeric variable. + * + * NOTE + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define min3(x, y, z) \ +{ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + typeof(z) _min3 = (z); \ + (void) (&_min1 == &_min2); \ + (void) (&_min1 == &_min3); \ + _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ + (_min2 < _min3 ? _min2 : _min3); \ +} + + + +/** + * max3 + * ```` + * Find the maximum value among three varaibles. + * + * @x: Numeric variable. + * @y: Numeric variable. + * @z: Numeric variable. + * + * NOTE + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define max3(x, y, z) \ +{ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + typeof(z) _max3 = (z); \ + (void) (&_max1 == &_max2) \ + (void) (&_max1 == &_max3) \ + _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) \ + : (_max2 > _max3 ? _max2 : _max3); \ +} + + + +/** + * min_not_zero + * ```````````` + * Find the non-zero minimum value between two variables. + * + * @x: Numeric variable. + * @y: Numeric variable. + * + * NOTES + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define min_not_zero(x, y) \ +{ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y \ + : ((__y == 0) ? __x : min(__x, __y)); \ +} + + + +/** + * clamp + * ````` + * Clamp the value of a variable to a given range. + * + * @val: Numeric variable. + * @min: Minimum allowable value. + * @max: Maximum allowable value. + * + * NOTES + * Performes strict type-checking. See the "unnecessary" + * pointer comparison. + */ +#define clamp(val, min, max) \ +{ \ + typeof(val) __val = (val); \ + typeof(min) __min = (min); \ + typeof(max) __max = (max); \ + (void) (&__val == &__min); \ + (void) (&__val == &__max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; \ +} + + + +/** + * min_t + * ````` + * Find the minimum value between two varaibles of given type. + * + * @type: Type of the variables. + * @x : Numeric variable. + * @y : Numeric variable. + * + * NOTE + * This macro performs no type-checking, and uses temporary + * variables of type 'type' to make all comparisons. + */ +#define min_t(type, x, y) \ +{ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1: __min2; \ +} + + +/** + * max_t + * ````` + * Find the maximum value between two varaibles of given type. + * + * @type: Type of the variables. + * @x : Numeric variable. + * @y : Numeric variable. + * + * NOTE + * This macro performs no type-checking, and uses temporary + * variables of type 'type' to make all comparisons. + */ +#define max_t(type, x, y) \ +{ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1: __max2; \ +} + + + +/** + * clamp_t + * ``````` + * Clamp the value of a variable to a given range. + * + * @type: Type of @val. + * @val : Numeric variable. + * @min : Minimum allowable value. + * @max : Maximum allowable value. + * + * NOTE + * This macro performs no type-checking, and uses temporary + * variables of type 'type' to make all comparisons. + */ +#define clamp_t(type, val, min, max) \ +{ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; \ +} + + + +/** + * clamp_val + * ````````` + * Clamp the value of a variable clamped to a given range of the same type. + * + * @val: Numeric variable + * @min: Minimum allowable value + * @max: Maximum allowable value + * + * NOTE + * This macro performs no type-checking, and uses temporary + * variables of the same type as @val to make all comparisons. + * + * This is useful when @val is an unsigned type and @min/@max + * are literals that would otherwise be assigned a signed integer + * type. + */ +#define clamp_val(val, min, max) \ +{ \ + typeof(val) __val = (val); \ + typeof(val) __min = (min); \ + typeof(val) __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; \ +} + + +/** + * swap + * ```` + * Swap the value of two variables. + * + * @a: Some variable. + * @b: Some variable. + * + * NOTE + * Uses a temporary variable with type matching that of @a. + */ +#define swap(a, b) \ + do { \ + typeof(a) __tmp = (a); \ + (a) = (b); \ + (b) = __tmp; \ + \ + } while (0) + + +/****************************************************************************** + * BOUNDS-CHECKING + ******************************************************************************/ + +/* + * Safe decrement and increment; the value of x is modified + */ +#define dec(x, min) x = ((x) > (min)) ? ((x)-1) : (x) +#define inc(x, max) x = ((x) < (max)) ? ((x)+1) : (x) + + + +/****************************************************************************** + * Ex libri Alan Holub + ******************************************************************************/ + +#define TOOHIGH(a, p) ((p) - (a) > (ARRAY_SIZE(a) - 1)) +#define TOOLOW(a, p) ((p) - (a) < 0) +#define INBOUNDS(a, p) (!(TOOHIGH(a,p) || TOOLOW(a,p))) + +/* + * Largest int available + * on a machine + */ +#define LARGEST_INT (int)(((unsigned)(~0)) >> 1) + + + + +/****************************************************************************** + * MISC. HORRORS + ******************************************************************************/ + +/** + * VA_NUM_ARGS + * Counts the number of VA_ARGS by means of an intense and heathen magic, + * the particulars of which are not to be uttered here, nor anywhere else. + */ +#define NUM_ARGS(...) \ + NUM_ARGS_IMPL(__VA_ARGS__, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, \ + 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, \ + 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \ + 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, \ + 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, \ + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, \ + 3, 2, 1) + +#define NUM_ARGS_IMPL( _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, \ + _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \ + _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, \ + _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, \ + _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, \ + _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, \ + _61, _62, _63, N, ...) N + + + +#define CAT(a,b) a ## b +#define STR(a) #a +#define XSTR(a) STR(a) +#define EXP(a) a + + +#define SUDO_MESSAGE "(Permission denied) This operation must be performed as root." + + +#endif + |