head 1.3; access; symbols eawk-base:1.1.1.1 BWK:1.1.1; locks; strict; comment @ * @; 1.3 date 2013.05.31.05.56.36; author agc; state Exp; branches; next 1.2; commitid nDg2ioUTUdn4iKRw; 1.2 date 2011.09.01.05.07.12; author agc; state Exp; branches; next 1.1; 1.1 date 2011.08.31.04.19.40; author agc; state Exp; branches 1.1.1.1; next ; 1.1.1.1 date 2011.08.31.04.19.40; author agc; state Exp; branches; next ; desc @@ 1.3 log @Re-do completely the way that external modules are loaded in eawk. For now, use a use("digest") style statement in the script to enable the bindings in a libeawk-digest.so to be loaded via dlopen(3). Also add readrec() and writerec() callbacks for use in embedded programs, so that eawk can really be deeply embedded in other things, and used as a command interpreter, etc. Other headline changes include: + added gawk-style bit arithmetic ops + added gawk-style asort/asorti + added eawk_readrec() and eawk_writerec() callbacks for use in embedded operations + added extension use via use() command + added extensions for base64 c circa db digests dirent getopt2 hmac http mat netdiff netpgp progress rs soundex termcap The upshot is that we can extend awk quite easily, as shown by this example script: % cat scripts/conn2.sh #! /bin/sh env LD_LIBRARY_PATH=lib:extend/c bin/eawk -v host=$1 ' BEGIN { use("c"); if (host == "") print "phooey#0"; addr["ai_family"] = PF_INET; if (getaddrinfo(host, "http", addr) != 0) print "phooey#1"; sd = socket(PF_INET, SOCK_STREAM, 0); if (connect(sd, addr["ai_addr"], addr["ai_addrlen"]) != 0) print "phooey#2"; s = sprintf("GET / HTTP/1.1\r\nHost: %s\r\nUser-Agent: eawk-2.1\r\n\r\n", host); write(sd, s, length(s)); url = read(sd, 4096); print url; close(sd); }' % sh scripts/conn2.sh www.netbsd.org HTTP/1.1 200 OK Date: Fri, 31 May 2013 05:49:44 GMT Server: Apache/2.2.24 (Unix) Last-Modified: Sun, 19 May 2013 17:44:20 GMT ETag: "83765-4fd9-4dd15c40bc500" Accept-Ranges: none Content-Length: 20441 Content-Type: text/html; charset=ISO-8859-1 The NetBSD Project dbg) printf x #else # define DPRINTF(eawk, x) #endif #ifndef USE_ARG #define USE_ARG(x) /*LINTED*/(void)&x #endif #endif @ 1.2 log @add bit arithmetic operations to eawk (implementation based on the gawk man page). this provides: and(x, y) (in C, x & y) or(x, y) (in C, x | y) xor(x, y) (in C, x ^ y) lshift(x, n) (in C, x << n) rshift(x, n) (in C, x >> n) compl(x) (in C, ~x) operations, with exactly the same calling conventions as gawk. @ text @d71 7 a77 6 #define ISREC(n) ((n)->type & REC) #define ISFLD(n) ((n)->type & FLD) #define ISSTR(n) ((n)->type & STR) #define ISNUM(n) ((n)->type & NUM) #define ISARR(n) ((n)->type & ARR) #define ISFCN(n) ((n)->type & FCN) d81 2 a82 2 /* #define freeable(p) (!((p)->type & DONTFREE)) */ #define FREEABLE(p) ( ((p)->type & (STR|DONTFREE)) == STR ) a84 10 /* awkcell_t.type values: */ #define NUM 01 /* number value is valid */ #define STR 02 /* string value is valid */ #define DONTFREE 04 /* string space is not freeable */ #define CON 010 /* this is a constant */ #define ARR 020 /* this is an array */ #define FCN 040 /* this is a function name */ #define FLD 0100 /* this is a field $1, $2, ... */ #define REC 0200 /* this is $0 */ d103 1 a157 2 extern awkcell_t *eawk_lookup(const char *, awkarray_t *); extern awknum_t eawk_setfval(eawk_t *, awkcell_t *, awknum_t); a158 3 extern char *eawk_setsval(eawk_t *, awkcell_t *, const char *); extern awknum_t eawk_getfval(eawk_t *, awkcell_t *); extern char *eawk_getsval(eawk_t *, awkcell_t *); a192 1 awkcell_t *eawk_gettemp(eawk_t *); d222 1 a222 7 awkcell_t *eawk_dlopen(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlcall(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlclose(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlproto(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlalloc(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlfree(eawk_t *, awknode_t **, int); awkcell_t *eawk_dlfield(eawk_t *, awknode_t **, int); @ 1.1 log @Initial revision @ text @d95 1 a95 1 /* function types */ d213 1 @ 1.1.1.1 log @Embedded Awk ============ This is a heresy I have done, of my own free will and volition, and which I now know as being a sin. Firstly, I have butchered the one true awk source code, made it re-entrant and embeddable in C programs, and now present it as a library (libeawk) and a small driver program (eawk). The driver program now uses getopt_long, and gives a good idea of how to use eawk in embedded code. Furthermore, I have "added" to the one true language. The additions are 4 functions: dlopen(handle, shared object name) dlproto(handle, C function prototype as a string) dlcall(handle, function, function args...) dlclose(handle) which allows you to do such abominations as: dlopen(libc, "libc"); dlproto(libc, "long write(int, awkptr, long)") dlcall(libc, "write", 1, "hi\n", 3) dlclose(libc) (i.e. allows interfacing to shared libraries and shared objects without any C glue or other shim in between the scripting language and the compiled library). Please note that you can specify the prototype at the same time as the foreign function call, with dlcall: dlopen(libc, "libc"); dlcall(libc, "long write(int, awkptr, long)", 1, "hi\n", 3) and then: % eawk 'BEGIN { dlopen(libc, "libc"); dlcall(libc, "int printf(awkptr)", "Hello world\n") }' /dev/null Hello world % In fact, the following scripts are all equivalent: % eawk 'BEGIN { dlopen(libc, "libc"); dlcall(libc, "long write(int, awkptr, long)", 1, "Hello world\n", 12) }' /dev/null Hello world % eawk 'BEGIN { dlopen(libc, "libc"); dlcall(libc, "int printf(awkptr)", "Hello world\n") }' /dev/null Hello world % eawk 'BEGIN { dlopen(libc, "libc"); dlcall(libc, "int fprintf(cvar, awkptr)", "stdout", "Hello world\n") }' /dev/null Hello world The type of arguments, and the return type, given in the dlproto() calls is important: awkptr - a string as passed from the eawk script cptr - a pointer to an object in the compiled shared object cref - the address of a pointer to an object in the compiled shared object. this is used to map the construct: &cp into an awk string cvar - the awk string which maps to a compiled well-known variable in the compiled shared object, typically stdin, stdout and stderr void - no return type bool - the boolean type int - standard integer type on this machine long - native long on this machine int64 - 64-bit data type In order to support foreign functions which typically use a structure passed into every function as a handle (very much like the eawk implementation here), I'd also added two other functions which can be called from scripts: buf = dlalloc(size) dlfree(buf) and also a new prototype keyword called "cptr" - this is functionally equivalent to long, but more intuitively obvious that the argument should be pre-allocated storage (at the native layer). % eawk 'BEGIN { dlopen(libc, "libc") size = 1024 buf = dlalloc(size) dlcall(libc, "int snprintf(cptr, int, awkptr, int)", buf, size, "allocated size is %d\n", size) dlcall(libc, "int printf(cptr)", buf) dlfree(buf) }' /dev/null allocated size is 1024 % Finally, we need a way to get information back from C structures and storage into an awk script, and we do that with the var = dlfield(storage, offset, type) function. This can be used as follows: % eawk 'BEGIN { dlopen(libc, "libc") st = dlalloc(1024) dlcall(libc, "int stat(awkptr, cptr)", "/etc/group", st) mode = dlfield(st, 8, "int16") printf("%s mode is %o\n", "/etc/group", mode) dlfree(st) }' /dev/null mode is 100644 % To illustrate some of the dlcall features a bit further, this script will print out the keys in the user's keyring, by direct calling of exported frunctionality from libnetpgp: % eawk ' BEGIN { dlopen(libc, "libc") dlopen(libnetpgp, "libnetpgp") netpgp = dlalloc(2048) ret = dlcall(libnetpgp, "int netpgp_set_homedir(cptr, awkptr, awkptr, int)", netpgp, ENVIRON["HOME"], "/.gnupg", quiet = 1) ret = dlcall(libnetpgp, "int netpgp_init(cptr)", netpgp) } END { ret = dlcall(libnetpgp, "int netpgp_list_keys_json(cptr, cref, int)", netpgp, json, psigs = 0) ret = dlcall(libnetpgp, "int netpgp_format_json(cvar, cptr, int)", "stdout", json, psigs = 0) }' /dev/null 126 keys found signature 2048/RSA (Encrypt or Sign) 1b68dcfcc0596823 2004-01-12 Key fingerprint: d415 9deb 336d e4cc cdfa 00cd 1b68 dcfc c059 6823 uid Alistair Crooks uid Alistair Crooks uid Alistair Crooks uid Alistair Crooks uid Alistair Crooks (Yahoo!) encryption 2048/RSA (Encrypt or Sign) 79deb61e488eee74 2004-01-12 ... % Note that the prototype for netpgp_list_keys_json() above is: int netpgp_list_keys_json(netpgp_t *, char **, const int); and the prototype for netpgp_format_json is: int netpgp_format_json(void *, const char *, const int); and so the signature of the compiled function in the shared object defines the type of arguments that are passed via dlcall. Another example is that of calculating a digest using the SHA256_File function in libc. % eawk 'BEGIN { f = "../Makefile" dlopen(libc, "libc") buf = dlalloc(65) dlcall(libc, "cptr SHA256_File(awkptr, cptr)", f, buf) dlcall(libc, "int printf(awkptr, awkptr, cptr)", "SHA256 (%s) = %s\n", f, buf) }') }' SHA256 (../Makefile) = a6ccb2e57801867720b434d8dfc248d62389c518457ea1a022861819151f2b1f % digest sha256 ../Makefile SHA256 (../Makefile) = a6ccb2e57801867720b434d8dfc248d62389c518457ea1a022861819151f2b1f % I've had these changes around in my own tree for over 2 and a half years. It's finally time to commit them. Alistair Crooks Wed Aug 30 19:45:50 PDT 2011 @ text @@