diff -Nru clearsilver-0.7.0/Makefile clearsilver-0.7.1/Makefile --- clearsilver-0.7.0/Makefile Mon May 20 00:39:04 2002 +++ clearsilver-0.7.1/Makefile Wed Jul 31 17:01:40 2002 @@ -76,8 +76,8 @@ mkdir -p $$mdir; \ done -CS_DISTDIR = clearsilver-0.6.2 -CS_LABEL = CLEARSILVER-0_6_2 +CS_DISTDIR = clearsilver-0.7.0 +CS_LABEL = CLEARSILVER-0_7_0 CS_FILES = LICENSE CS_LICENSE rules.mk Makefile util cs cgi python scripts mod_ecs imd cs_dist: rm -rf $(CS_DISTDIR) diff -Nru clearsilver-0.7.0/cgi/Makefile clearsilver-0.7.1/cgi/Makefile --- clearsilver-0.7.0/cgi/Makefile Fri Jan 11 15:42:08 2002 +++ clearsilver-0.7.1/cgi/Makefile Wed Jul 31 17:01:41 2002 @@ -15,7 +15,17 @@ STATIC_OBJ = $(STATIC_SRC:%.c=%.o) STATIC_CSO = $(STATIC_EXE:%.cgi=%.cso) -CFLAGS += -I$(NEOTONIC_ROOT) -DHTML_COMPRESSION +CFLAGS += -I$(NEOTONIC_ROOT) + +ifeq ($(USE_ZLIB),1) +CFLAGS += -DHTML_COMPRESSION +endif + +ifeq ($(OSNAME),WindowsNT 0) +CFLAGS += -I$(NEOTONIC_ROOT)/util/os_win +endif + + DLIBS += -lneo_cgi -lneo_cs -lneo_utl # -lefence LIBS += -L$(LIB_DIR) $(DLIBS) diff -Nru clearsilver-0.7.0/cgi/cgi.c clearsilver-0.7.1/cgi/cgi.c --- clearsilver-0.7.0/cgi/cgi.c Wed Jul 3 15:33:57 2002 +++ clearsilver-0.7.1/cgi/cgi.c Wed Jul 31 16:49:41 2002 @@ -438,6 +438,8 @@ return nerr_pass(err); } +#ifndef __WINDOWS_GCC__ + static void _launch_debugger (CGI *cgi, char *display) { pid_t myPid, pid; @@ -476,6 +478,8 @@ } } +#endif + static NEOERR *cgi_pre_parse (CGI *cgi) { NEOERR *err; @@ -510,6 +514,7 @@ if (err != STATUS_OK) return nerr_pass (err); } +#ifndef __WINDOWS_GCC__ { char *display; @@ -520,6 +525,7 @@ _launch_debugger(cgi, display); } } +#endif return STATUS_OK; } diff -Nru clearsilver-0.7.0/cgi/html.c clearsilver-0.7.1/cgi/html.c --- clearsilver-0.7.0/cgi/html.c Tue Jun 25 16:14:47 2002 +++ clearsilver-0.7.1/cgi/html.c Fri Aug 9 16:23:38 2002 @@ -93,7 +93,7 @@ #define SC_TYPE_EMAIL 3 static char *EmailRe = "[^][@:;<>\\\"()[:space:][:cntrl:]]+@[-+a-zA-Z0-9]+\\.[-+a-zA-Z0-9\\.]+[-+a-zA-Z0-9]"; -static char *URLRe = "((((ht|f)tp)|mailto):(//)?[^[:space:]>\"\t]*|www\\.[-a-z0-9\\.]+)[^[:space:];\t\">]*"; +static char *URLRe = "((http|https|ftp|mailto):(//)?[^[:space:]>\"\t]*|www\\.[-a-z0-9\\.]+)[^[:space:];\t\">]*"; static NEOERR *split_and_convert (char *src, int slen, STRING *out, int newlines, int space_convert) { @@ -640,6 +640,7 @@ int strip_match = -1; int state = 0; char amp[10]; + int amp_start = 0; char buf[10]; int ampl = 0; @@ -656,6 +657,7 @@ { state = 3; ampl = 0; + amp_start = x; } else if (src[x] == '<') { @@ -706,6 +708,14 @@ { if (ampl < sizeof(amp)-1) amp[ampl++] = tolower(src[x]); + else + { + /* broken html... just back up */ + x = amp_start; + err = string_append_char(&out_s, src[x]); + if (err) break; + state = 0; + } } x++; break; diff -Nru clearsilver-0.7.0/cgi/rfc2388.c clearsilver-0.7.1/cgi/rfc2388.c --- clearsilver-0.7.0/cgi/rfc2388.c Thu Apr 11 18:11:39 2002 +++ clearsilver-0.7.1/cgi/rfc2388.c Tue Sep 10 15:23:28 2002 @@ -205,6 +205,11 @@ if (err) break; err = string_appendn (line, p, l - (p-s)); if (err) break; + if (line->len > 50*1024*1024) + { + string_clear(line); + return nerr_raise(NERR_ASSERT, "read_header_line exceeded 50MB"); + } } return nerr_pass (err); } @@ -326,6 +331,7 @@ { NEOERR *err = STATUS_OK; STRING str; + HDF *child, *obj = NULL; FILE *fp = NULL; char buf[256]; char *p; @@ -434,41 +440,76 @@ /* Set up the cgi data */ if (!err) { - if (filename) - { - fseek(fp, 0, SEEK_SET); - snprintf (buf, sizeof(buf), "Query.%s", name); - err = hdf_set_value (cgi->hdf, buf, filename); - if (!err && type) - { - snprintf (buf, sizeof(buf), "Query.%s.Type", name); - err = hdf_set_value (cgi->hdf, buf, type); - } - if (!err) - { - snprintf (buf, sizeof(buf), "Query.%s.FileHandle", name); - err = hdf_set_int_value (cgi->hdf, buf, uListLength(cgi->files)); - } - if (!err && !unlink_files) + do { + /* FIXME: Hmm, if we've seen the same name here before, what should we do? + */ + if (filename) { - char *path; - snprintf (buf, sizeof(buf), "Query.%s.FileName", name); - err = uListGet(cgi->filenames, uListLength(cgi->filenames)-1, - (void **)&path); - if (!err) err = hdf_set_value (cgi->hdf, buf, path); + fseek(fp, 0, SEEK_SET); + snprintf (buf, sizeof(buf), "Query.%s", name); + err = hdf_set_value (cgi->hdf, buf, filename); + if (!err && type) + { + snprintf (buf, sizeof(buf), "Query.%s.Type", name); + err = hdf_set_value (cgi->hdf, buf, type); + } + if (!err) + { + snprintf (buf, sizeof(buf), "Query.%s.FileHandle", name); + err = hdf_set_int_value (cgi->hdf, buf, uListLength(cgi->files)); + } + if (!err && !unlink_files) + { + char *path; + snprintf (buf, sizeof(buf), "Query.%s.FileName", name); + err = uListGet(cgi->filenames, uListLength(cgi->filenames)-1, + (void **)&path); + if (!err) err = hdf_set_value (cgi->hdf, buf, path); + } } - } - else - { - snprintf (buf, sizeof(buf), "Query.%s", name); - while (str.len && isspace(str.buf[str.len-1])) + else { - str.buf[str.len-1] = '\0'; - str.len--; + snprintf (buf, sizeof(buf), "Query.%s", name); + while (str.len && isspace(str.buf[str.len-1])) + { + str.buf[str.len-1] = '\0'; + str.len--; + } + if (!(cgi->ignore_empty_form_vars && str.len == 0)) + { + /* If we've seen it before... we force it into a list */ + obj = hdf_get_obj (cgi->hdf, buf); + if (obj != NULL) + { + int i = 0; + char buf2[10]; + char *t; + child = hdf_obj_child (obj); + if (child == NULL) + { + t = hdf_obj_value (obj); + err = hdf_set_value (obj, "0", t); + if (err != STATUS_OK) break; + i = 1; + } + else + { + while (child != NULL) + { + i++; + child = hdf_obj_next (child); + if (err != STATUS_OK) break; + } + if (err != STATUS_OK) break; + } + snprintf (buf2, sizeof(buf2), "%d", i); + err = hdf_set_value (obj, buf2, str.buf); + if (err != STATUS_OK) break; + } + err = hdf_set_value (cgi->hdf, buf, str.buf); + } } - if (!(cgi->ignore_empty_form_vars && str.len == 0)) - err = hdf_set_value (cgi->hdf, buf, str.buf); - } + } while (0); } string_clear(&str); diff -Nru clearsilver-0.7.0/cs/csparse.c clearsilver-0.7.1/cs/csparse.c --- clearsilver-0.7.0/cs/csparse.c Mon Jun 10 18:52:59 2002 +++ clearsilver-0.7.1/cs/csparse.c Tue Aug 20 16:46:25 2002 @@ -2242,6 +2242,27 @@ return STATUS_OK; } +static char* get_arg(char* top) +{ + int mode = 0; + char* p; + for (p = top; *p; p++) { + if (mode == 0) { + if (*p == ',') { + return p; + } else if (*p == '"') { + mode = 1; + } + } else { + if (*p == '"') { + mode = 0; + } + } + } + return NULL; +} + + static NEOERR *call_parse (CSPARSE *parse, int cmd, char *arg) { NEOERR *err; @@ -2323,7 +2344,7 @@ larg = carg; } x++; - a = strpbrk(s, ","); + a = get_arg(s); if (a == NULL) { last = TRUE; diff -Nru clearsilver-0.7.0/python/neo_util.c clearsilver-0.7.1/python/neo_util.c --- clearsilver-0.7.0/python/neo_util.c Thu Apr 25 18:47:20 2002 +++ clearsilver-0.7.1/python/neo_util.c Mon Aug 12 19:01:08 2002 @@ -200,8 +200,7 @@ char *name; HDF_ATTR *attr; - if (!PyArg_ParseTuple(args, "s:getAttrs(name)", &name)) - return NULL; + /* Brandon says this function does not work.... - jeske */ rv = PyList_New(0); if (rv == NULL) return NULL; @@ -344,8 +343,9 @@ PyObject *rv; char *name, *value; NEOERR *err; + int vlen = 0; - if (!PyArg_ParseTuple(args, "ss:setValue(name, value)", &name, &value)) + if (!PyArg_ParseTuple(args, "ss#:setValue(name, value)", &name, &value, &vlen)) return NULL; err = hdf_set_value (ho->data, name, value); @@ -363,7 +363,7 @@ char *name, *value, *key; NEOERR *err; - if (!PyArg_ParseTuple(args, "ssO:setValue(name, key, value)", &name, &key, &rv)) + if (!PyArg_ParseTuple(args, "ssO:setAttr(name, key, value)", &name, &key, &rv)) return NULL; if (PyString_Check(rv)) @@ -520,7 +520,7 @@ char *dest; NEOERR *err; - if (!PyArg_ParseTuple(args, "ss:copy(src, dest)", &src, &dest)) + if (!PyArg_ParseTuple(args, "ss:setSymLink(src, dest)", &src, &dest)) return NULL; err = hdf_set_symlink (ho->data, src, dest); diff -Nru clearsilver-0.7.0/python/p_neo_util.h clearsilver-0.7.1/python/p_neo_util.h --- clearsilver-0.7.0/python/p_neo_util.h Sat Apr 13 17:49:51 2002 +++ clearsilver-0.7.1/python/p_neo_util.h Tue Jul 23 16:10:21 2002 @@ -61,7 +61,7 @@ PyObject *c_api_object = PyDict_GetItemString(module_dict, "_C_API"); \ PyObject *c_api_num_o = PyDict_GetItemString(module_dict, "_C_API_NUM"); \ if (PyInt_AsLong(c_api_num_o) < P_NEO_CGI_POINTERS) { \ - PyErr_Format(PyExc_ImportError, "neo_cgi module doesn't match header compiled against, use of this module may cause a core dump: %ld < %ld", PyInt_AsLong(c_api_num_o), P_NEO_CGI_POINTERS); \ + PyErr_Format(PyExc_ImportError, "neo_cgi module doesn't match header compiled against, use of this module may cause a core dump: %ld < %ld", PyInt_AsLong(c_api_num_o), (long) P_NEO_CGI_POINTERS); \ } \ if (PyCObject_Check(c_api_object)) { \ NEO_PYTHON_API = (void **)PyCObject_AsVoidPtr(c_api_object); \ diff -Nru clearsilver-0.7.0/rules.mk clearsilver-0.7.1/rules.mk --- clearsilver-0.7.0/rules.mk Fri Jun 14 17:29:59 2002 +++ clearsilver-0.7.1/rules.mk Wed Jul 31 17:01:40 2002 @@ -5,6 +5,7 @@ ## OSNAME := $(shell uname -rs | cut -f 1-2 -d "." | cut -f 1 -d "-") +OSTYPE := $(shell uname -s) LIB_DIR = $(NEOTONIC_ROOT)libs/ @@ -14,10 +15,8 @@ ## 2.7.7 instead USE_DB2 = 1 -ifeq ($(USE_DB2),1) -DB2_INC = -I$(HOME)/src/db-2.7.7/dist -DB2_LIB = -L$(HOME)/src/db-2.7.7/dist -ldb -endif +USE_ZLIB = 1 + PYTHON_INC = -I/neo/opt/include/python2.2 ## Programs @@ -26,20 +25,35 @@ CC = gcc CPP = g++ -CFLAGS = -g -O2 -Wall -c -I$(NEOTONIC_ROOT) $(DB2_INC) -I/neo/opt/include +CFLAGS = -g -O2 -Wall -c -I$(NEOTONIC_ROOT) -I/neo/opt/include OUTPUT_OPTION = -o $@ LD = $(CC) -o LDFLAGS = -L$(LIB_DIR) LDSHARED = $(CC) -shared -fPi CPPLDSHARED = $(CPP) -shared -fPic -AR = $(MKDIR) $(LIB_DIR); ar -cr +AR = ar -cr DEP_LIBS = $(DLIBS:-l%=$(LIB_DIR)lib%.a) +LIBS = + +ifdef ($(OSTYPE),WindowsNT) +CFLAGS += -D__WINDOWS_GCC__=1 +USE_DB2 = 0 +USE_ZLIB = 0 +endif + +ifeq ($(USE_ZLIB),1) +LIBS += -lz +endif + +ifeq ($(USE_DB2),1) +DB2_INC = -I$(HOME)/src/db-2.7.7/dist +DB2_LIB = -L$(HOME)/src/db-2.7.7/dist -ldb +CFLAGS += $(DB2_INC) +endif .c.o: $(CC) $(CFLAGS) $(OUTPUT_OPTION) $< - -LIBS = -lz everything: depend all diff -Nru clearsilver-0.7.0/util/Makefile clearsilver-0.7.1/util/Makefile --- clearsilver-0.7.0/util/Makefile Fri May 10 17:41:07 2002 +++ clearsilver-0.7.1/util/Makefile Tue Sep 3 17:58:38 2002 @@ -8,11 +8,22 @@ UTL_LIB = $(LIB_DIR)libneo_utl.a UTL_SRC = neo_err.c neo_files.c neo_misc.c neo_test.c ulist.c neo_hdf.c \ - neo_str.c ulocks.c skiplist.c dict.c neo_date.c rcfs.c \ - wildmat.c filter.c + neo_str.c neo_date.c wildmat.c + +ifneq ($(OSNAME),WindowsNT 0) +UTL_SRC += ulocks.c skiplist.c dict.c filter.c rcfs.c neo_net.c neo_server.c + ifeq ($(USE_DB2),1) UTL_SRC += wdb.c endif + +else +CFLAGS += -Ios_win +UTL_SRC += snprintf.c os_win/regex.c os_win/mkstemp.c + +endif + + UTL_OBJ = $(UTL_SRC:%.c=%.o) TARGETS = $(UTL_LIB) diff -Nru clearsilver-0.7.0/util/neo_date.c clearsilver-0.7.1/util/neo_date.c --- clearsilver-0.7.0/util/neo_date.c Mon Aug 6 14:28:17 2001 +++ clearsilver-0.7.1/util/neo_date.c Wed Jul 31 16:49:42 2002 @@ -36,7 +36,11 @@ void neo_time_expand (const time_t tt, char *timezone, struct tm *ttm) { time_set_tz (timezone); +#ifdef __WINDOWS_GCC__ + ttm = localtime(&tt); +#else localtime_r (&tt, ttm); +#endif } time_t neo_time_compact (struct tm *ttm, char *timezone) diff -Nru clearsilver-0.7.0/util/neo_date.h clearsilver-0.7.1/util/neo_date.h --- clearsilver-0.7.0/util/neo_date.h Thu Apr 18 14:42:33 2002 +++ clearsilver-0.7.1/util/neo_date.h Wed Jul 31 16:49:42 2002 @@ -11,6 +11,8 @@ #ifndef _NEO_DATE_H_ #define _NEO_DATE_H_ 1 +#include "osdep.h" + __BEGIN_DECLS /* UTC time_t -> struct tm in local timezone */ diff -Nru clearsilver-0.7.0/util/neo_err.h clearsilver-0.7.1/util/neo_err.h --- clearsilver-0.7.0/util/neo_err.h Tue Mar 12 23:42:32 2002 +++ clearsilver-0.7.1/util/neo_err.h Wed Jul 31 16:49:42 2002 @@ -11,6 +11,8 @@ #ifndef __NEO_ERR_H_ #define __NEO_ERR_H_ 1 +#include "osdep.h" + __BEGIN_DECLS #define STATUS_OK ((NEOERR *)0) diff -Nru clearsilver-0.7.0/util/neo_files.c clearsilver-0.7.1/util/neo_files.c --- clearsilver-0.7.0/util/neo_files.c Thu Feb 28 16:53:41 2002 +++ clearsilver-0.7.1/util/neo_files.c Wed Jul 31 16:49:42 2002 @@ -18,6 +18,7 @@ #include #include #include + #include "neo_err.h" #include "neo_misc.h" #include "neo_files.h" @@ -42,7 +43,12 @@ if (mypath[x] == '/') { mypath[x] = '\0'; +#ifdef __WINDOWS_GCC__ + r = mkdir (mypath); +#else r = mkdir (mypath, mode); +#endif + if (r == -1 && errno != EEXIST) { return nerr_raise_errno(NERR_SYSTEM, "ne_mkdirs: mkdir(%s, %x) failed", mypath, mode); @@ -100,7 +106,7 @@ int fd; int w, l; - fd = open (path, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + fd = open (path, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP ); if (fd == -1) { return nerr_raise_errno (NERR_IO, "Unable to create file %s", path); diff -Nru clearsilver-0.7.0/util/neo_hdf.c clearsilver-0.7.1/util/neo_hdf.c --- clearsilver-0.7.0/util/neo_hdf.c Mon May 20 12:39:52 2002 +++ clearsilver-0.7.1/util/neo_hdf.c Wed Jul 31 16:49:42 2002 @@ -856,14 +856,14 @@ int x = 0; ml[x++] = '\n'; - nlen = 2 + (random() % (len-5)); + nlen = 2 + (os_random() % (len-5)); if (nlen == 0) { nlen = len / 2; } while (nlen) { - ml[x++] = ('A' + random() % 26); + ml[x++] = ('A' + os_random() % 26); nlen--; } ml[x++] = '\n'; @@ -1031,15 +1031,52 @@ NEOERR *hdf_write_file (HDF *hdf, char *path) { + NEOERR *err; FILE *fp; fp = fopen(path, "w"); if (fp == NULL) return nerr_raise_errno (NERR_IO, "Unable to open %s for writing", path); - hdf_dump_format (hdf, 0, fp); + err = hdf_dump_format (hdf, 0, fp); fclose (fp); + if (err) + { + unlink(path); + } + return nerr_pass(err); +} + +NEOERR *hdf_write_file_atomic (HDF *hdf, char *path) +{ + NEOERR *err; + FILE *fp; + char tpath[_POSIX_PATH_MAX]; + static int count = 0; + + snprintf(tpath, sizeof(tpath), "%s.%5.5f.%d", path, ne_timef(), count++); + + fp = fopen(tpath, "w"); + if (fp == NULL) + return nerr_raise_errno (NERR_IO, "Unable to open %s for writing", tpath); + + err = hdf_dump_format (hdf, 0, fp); + + fclose (fp); + + if (err) + { + unlink(tpath); + return nerr_pass(err); + } + if (rename(tpath, path) == -1) + { + unlink (tpath); + return nerr_raise_errno (NERR_IO, "Unable to rename file %s to %s", + tpath, path); + } + return STATUS_OK; } diff -Nru clearsilver-0.7.0/util/neo_hdf.h clearsilver-0.7.1/util/neo_hdf.h --- clearsilver-0.7.0/util/neo_hdf.h Thu Apr 25 18:47:18 2002 +++ clearsilver-0.7.1/util/neo_hdf.h Thu Jul 25 16:39:40 2002 @@ -76,6 +76,7 @@ NEOERR* hdf_read_file (HDF *hdf, char *path); NEOERR* hdf_write_file (HDF *hdf, char *path); +NEOERR* hdf_write_file_atomic (HDF *hdf, char *path); NEOERR* hdf_read_string (HDF *hdf, char *s); NEOERR* hdf_read_string_ignore (HDF *hdf, char *s, int ignore); diff -Nru clearsilver-0.7.0/util/neo_misc.c clearsilver-0.7.1/util/neo_misc.c --- clearsilver-0.7.0/util/neo_misc.c Thu Feb 28 16:53:41 2002 +++ clearsilver-0.7.1/util/neo_misc.c Wed Jul 31 16:49:42 2002 @@ -29,7 +29,11 @@ now = time(NULL); +#ifdef __WINDOWS_GCC__ + my_tm = *localtime(&now); +#else localtime_r(&now, &my_tm); +#endif strftime(tbuf, sizeof(tbuf), "%m/%d %T", &my_tm); @@ -120,6 +124,15 @@ return dest+l+1; } +#ifdef __WINDOWS_GCC__ + +double ne_timef (void) { + return 0.0; +} + + +#else + double ne_timef (void) { double f = 0; @@ -133,6 +146,7 @@ } return f; } +#endif static const UINT32 CRCTable[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, diff -Nru clearsilver-0.7.0/util/neo_misc.h clearsilver-0.7.1/util/neo_misc.h --- clearsilver-0.7.0/util/neo_misc.h Thu Feb 28 16:53:41 2002 +++ clearsilver-0.7.1/util/neo_misc.h Wed Jul 31 16:49:42 2002 @@ -11,6 +11,8 @@ #ifndef __NEO_MISC_H_ #define __NEO_MISC_H_ 1 +#include "osdep.h" + __BEGIN_DECLS #include diff -Nru clearsilver-0.7.0/util/neo_net.c clearsilver-0.7.1/util/neo_net.c --- clearsilver-0.7.0/util/neo_net.c Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/neo_net.c Sun Sep 1 19:00:46 2002 @@ -0,0 +1,653 @@ +/* + * Neotonic ClearSilver CGI Kit + * + * This code is made available under the terms of the + * Neotonic ClearSilver License. + * http://www.neotonic.com/clearsilver/license.hdf + * + * Copyright (C) 2001 by Brandon Long + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "neo_err.h" +#include "neo_misc.h" +#include "neo_net.h" +#include "neo_str.h" + +/* Server side */ +NEOERR *net_listen(int port, int *fd) +{ + int sfd = 0; + int on = 1; +/* int flags; */ + struct sockaddr_in serv_addr; + + if ((sfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) + return nerr_raise_errno(NERR_IO, "Unable to create socket"); + + if (setsockopt (sfd, SOL_SOCKET, SO_REUSEADDR, (char *)&on, + sizeof(on)) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to setsockopt(SO_REUSEADDR)"); + } + + if(setsockopt (sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, + sizeof(on)) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to setsockopt(SO_KEEPALIVE)"); + } + + if(setsockopt (sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&on, + sizeof(on)) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to setsockopt(TCP_NODELAY)"); + } + serv_addr.sin_family = AF_INET; + serv_addr.sin_addr.s_addr = htonl(INADDR_ANY); + serv_addr.sin_port = htons(port); + + if (bind(sfd,(struct sockaddr *)&(serv_addr),sizeof(struct sockaddr)) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to bind to port %d", port); + } + + /* If set non-block, then we have to use select prior to accept... + * typically we don't, so we'll leave this out until we have a need + * for it and then figure out how to work it into the common code */ + /* + flags = fcntl(sfd, F_GETFL, 0 ); + if (flags == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to get socket flags for port %d", + port); + } + + if (fcntl(sfd, F_SETFL, flags | O_NDELAY) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to set O_NDELAY for port %d", + port); + } + */ + + if (listen(sfd, 100) == -1) + { + close(sfd); + return nerr_raise_errno(NERR_IO, "Unable to listen on port %d", port); + } + *fd = sfd; + + return STATUS_OK; +} + +NEOERR *net_accept(NSOCK **sock, int sfd, int data_timeout) +{ + NSOCK *my_sock; + int fd; + struct sockaddr_in client_addr; + int len; + + len = sizeof(struct sockaddr_in); + while ((fd = accept(sfd, (struct sockaddr *)&client_addr, &len)) == -1) + { + if (errno == EINTR) continue; + return nerr_raise_errno(NERR_IO, "accept() returned error"); + } + + my_sock = (NSOCK *) calloc(1, sizeof(NSOCK)); + if (my_sock == NULL) + { + close(fd); + return nerr_raise(NERR_NOMEM, "Unable to allocate memory for NSOCK"); + } + my_sock->fd = fd; + my_sock->remote_ip = ntohl(client_addr.sin_addr.s_addr); + my_sock->remote_port = ntohs(client_addr.sin_port); + my_sock->data_timeout = data_timeout; + + *sock = my_sock; + + return STATUS_OK; +} + +/* Client side */ +NEOERR *net_connect(NSOCK **sock, char *host, int port, int conn_timeout, + int data_timeout) +{ + struct sockaddr_in serv_addr; + struct hostent hp; + struct hostent *php; + int fd; + int r = 0, x; + int flags; + struct timeval tv; + fd_set fds; + int optval; + int optlen; + NSOCK *my_sock; + + /* FIXME: This isn't thread safe... but there's no man entry for the _r + * version? */ + + php = gethostbyname(host); + if (php == NULL) + { + return nerr_raise(NERR_IO, "Host not found: %s", hstrerror(h_errno)); + } + hp = *php; + + memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(port); + fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (fd == -1) + return nerr_raise_errno(NERR_IO, "Unable to create socket"); + + flags = fcntl(fd, F_GETFL, 0 ); + if (flags == -1) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Unable to get socket flags"); + } + + if (fcntl(fd, F_SETFL, flags | O_NDELAY) == -1) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Unable to set O_NDELAY"); + } + + x = 0; + while (hp.h_addr_list[x] != NULL) + { + memcpy(&(serv_addr.sin_addr), hp.h_addr_list[x], sizeof(struct in_addr)); + errno = 0; + r = connect(fd, (struct sockaddr *) &(serv_addr), sizeof(struct sockaddr_in)); + if (r == 0 || errno == EINPROGRESS) break; + x++; + } + if (r != 0) + { + if (errno != EINPROGRESS) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Unable to connect to %s:%d", + host, port); + } + tv.tv_sec = conn_timeout; + tv.tv_usec = 0; + + FD_ZERO(&fds); + FD_SET(fd, &fds); + + r = select(fd+1, NULL, &fds, NULL, &tv); + if (r == 0) + { + close(fd); + return nerr_raise(NERR_IO, "Connection to %s:%d failed: Timeout", host, + port); + } + if (r < 0) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Connection to %s:%d failed", host, + port); + } + + optlen = sizeof(optval); + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &optval, &optlen) == -1) + { + close(fd); + return nerr_raise_errno(NERR_IO, + "Unable to getsockopt to determine connection error"); + } + + if (optval) + { + close(fd); + errno = optval; + return nerr_raise_errno(NERR_IO, "Connection to %s:%d failed", host, + port); + } + } + /* Re-enable blocking... we'll use select on read/write for timeouts + * anyways, and if we want non-blocking version in the future we'll + * add a flag or something. + */ + flags = fcntl(fd, F_GETFL, 0 ); + if (flags == -1) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Unable to get socket flags"); + } + + if (fcntl(fd, F_SETFL, flags & ~O_NDELAY) == -1) + { + close(fd); + return nerr_raise_errno(NERR_IO, "Unable to set O_NDELAY"); + } + + my_sock = (NSOCK *) calloc(1, sizeof(NSOCK)); + if (my_sock == NULL) + { + close(fd); + return nerr_raise(NERR_NOMEM, "Unable to allocate memory for NSOCK"); + } + my_sock->fd = fd; + my_sock->remote_ip = ntohl(serv_addr.sin_addr.s_addr); + my_sock->remote_port = port; + my_sock->data_timeout = data_timeout; + my_sock->conn_timeout = conn_timeout; + + *sock = my_sock; + + return STATUS_OK; +} + +NEOERR *net_close(NSOCK **sock) +{ + NEOERR *err; + + if (sock == NULL || *sock == NULL) return STATUS_OK; + err = net_flush(*sock); + close((*sock)->fd); + free((*sock)); + *sock = NULL; + return nerr_pass(err); +} + +/* Low level data interface ... we are implementing a buffered stream + * here, and the fill and flush are designed for that. More over, our + * buffered stream assumes a certain type of protocol design where we + * flush the write buffer before reading... there are possible protocols + * where this would be grossly inefficient, but I don't expect to use + * anything like that */ + +/* Also, an annoyance here... what to do with the EOF case? Currently, + * we're just returing with a ol of 0, which means in most cases when + * calling this we have to check that case as well as standard errors. + * We could raise an NERR_EOF or something, but that seems like + * overkill. We should probably have a ret arg for the case... */ +static NEOERR *net_fill(NSOCK *sock) +{ + NEOERR *err; + struct timeval tv; + fd_set fds; + int r; + + /* Ok, we are assuming a model where one side of the connection is the + * consumer and the other the producer... and then it switches. So we + * flush the output buffer (if any) before we read */ + if (sock->ol) + { + err = net_flush(sock); + if (err) return nerr_pass(err); + } + + /* Ok, we want connections to fail if they don't connect in + * conn_timeout... but with higher listen queues, the connection could + * actually connect, but the remote server won't get to it within the + * conn_timeout, we still want it to fail. We do that by using the + * conn_timeout on the first read ... this isn't quite the same as we + * might actually timeout at almost 2x conn_timeout (if we had to wait + * for connect and the first read) but its still better then waiting + * the full data timeout */ + if (sock->conn_timeout) + { + tv.tv_sec = sock->conn_timeout; + sock->conn_timeout = 0; + } + else + { + tv.tv_sec = sock->data_timeout; + } + tv.tv_usec = 0; + + FD_ZERO(&fds); + FD_SET(sock->fd, &fds); + + r = select(sock->fd+1, &fds, NULL, NULL, &tv); + if (r == 0) + { + return nerr_raise(NERR_IO, "read failed: Timeout"); + } + if (r < 0) + { + return nerr_raise_errno(NERR_IO, "select for read failed"); + } + + sock->ibuf[0] = '\0'; + r = read(sock->fd, sock->ibuf, NET_BUFSIZE); + if (r < 0) + { + return nerr_raise_errno(NERR_IO, "read failed"); + } + + sock->ib = 0; + sock->il = r; + + return STATUS_OK; +} + +NEOERR *net_flush(NSOCK *sock) +{ + fd_set fds; + struct timeval tv; + int r; + int x = 0; + + if (sock->conn_timeout) + { + tv.tv_sec = sock->conn_timeout; + } + else + { + tv.tv_sec = sock->data_timeout; + } + tv.tv_usec = 0; + + x = 0; + while (x < sock->ol) + { + FD_ZERO(&fds); + FD_SET(sock->fd, &fds); + + r = select(sock->fd+1, NULL, &fds, NULL, &tv); + if (r == 0) + { + return nerr_raise(NERR_IO, "write failed: Timeout"); + } + if (r < 0) + { + return nerr_raise_errno(NERR_IO, "select for write failed"); + } + + r = write(sock->fd, sock->obuf + x, sock->ol - x); + if (r < 0) + { + return nerr_raise_errno(NERR_IO, "select for write failed"); + } + x += r; + } + sock->ol = 0; + return STATUS_OK; +} + +/* hmm, we may need something to know how much we've read here... */ +NEOERR *net_read(NSOCK *sock, UINT8 *buf, int buflen) +{ + NEOERR *err; + int x = 0; + int l; + + x = buflen; + while (x > 0) + { + if (sock->il - sock->ib > 0) + { + if (sock->ib + x <= sock->il) + l = x; + else + l = sock->il - sock->ib; + + memcpy(buf + buflen - x, sock->ibuf + sock->ib, l); + sock->ib += l; + x -= l; + } + else + { + err = net_fill(sock); + if (err) return nerr_pass(err); + if (sock->il == 0) return STATUS_OK; + } + } + return STATUS_OK; +} + +NEOERR *net_read_line(NSOCK *sock, char **buf) +{ + NEOERR *err; + STRING str; + UINT8 *nl; + int l; + + string_init(&str); + + while (1) + { + if (sock->il - sock->ib > 0) + { + nl = memchr(sock->ibuf + sock->ib, '\n', sock->il - sock->ib); + if (nl == NULL) + { + l = sock->il - sock->ib; + err = string_appendn(&str, sock->ibuf + sock->ib, l); + sock->ib += l; + if (err) break; + } + else + { + l = nl - (sock->ibuf + sock->ib); + err = string_appendn(&str, sock->ibuf + sock->ib, l); + sock->ib += l; + if (err) break; + + *buf = str.buf; + return STATUS_OK; + } + } + else + { + err = net_fill(sock); + if (err) break; + if (sock->il == 0) return STATUS_OK; + } + } + string_clear(&str); + return nerr_pass(err); +} + +static NEOERR *_net_read_int(NSOCK *sock, int *i, char end) +{ + NEOERR *err; + int x = 0; + char buf[32]; + + while (x < sizeof(buf)) + { + while (sock->il - sock->ib > 0) + { + buf[x] = sock->ibuf[sock->ib++]; + if (buf[x] == end) break; + x++; + if (x == sizeof(buf)) break; + } + if (buf[x] == end) break; + err = net_fill(sock); + if (err) return nerr_pass(err); + if (sock->il == 0) return STATUS_OK; + } + + if (x == sizeof(buf)) + return nerr_raise(NERR_PARSE, "Format error on stream, expected '%c'", end); + + buf[x] = '\0'; + *i = atoi(buf); + + return STATUS_OK; +} + +NEOERR *net_read_binary(NSOCK *sock, UINT8 **b, int *blen) +{ + NEOERR *err; + UINT8 *data; + char buf[5]; + int l; + + err = _net_read_int(sock, &l, ':'); + if (err) return nerr_pass(err); + + /* Special case to read a NULL */ + if (l < 0) + { + *b = NULL; + if (blen != NULL) *blen = l; + return STATUS_OK; + } + + data = (UINT8 *) malloc(l + 1); + if (data == NULL) + { + /* We might want to clear the incoming data here... */ + return nerr_raise(NERR_NOMEM, + "Unable to allocate memory for binary data %d" , l); + } + + err = net_read(sock, data, l); + if (err) + { + free(data); + return nerr_pass(err); + } + /* check for comma separator */ + err = net_read(sock, buf, 1); + if (err) + { + free(data); + return nerr_pass(err); + } + if (buf[0] != ',') + { + free(data); + return nerr_raise(NERR_PARSE, "Format error on stream, expected ','"); + } + + *b = data; + if (blen != NULL) *blen = l; + return STATUS_OK; +} + +NEOERR *net_read_str_alloc(NSOCK *sock, char **s, int *len) +{ + NEOERR *err; + int l; + + /* just use the binary read and null terminate the string... */ + err = net_read_binary(sock, (UINT8 **)s, &l); + if (err) return nerr_pass(err); + + if (*s != NULL) + { + (*s)[l] = '\0'; + } + if (len != NULL) *len = l; + return STATUS_OK; +} + +NEOERR *net_read_int(NSOCK *sock, int *i) +{ + return nerr_pass(_net_read_int(sock, i, ',')); +} + +NEOERR *net_write(NSOCK *sock, UINT8 *b, int blen) +{ + NEOERR *err; + int x = 0; + int l; + + x = blen; + while (x > 0) + { + if (sock->ol < NET_BUFSIZE) + { + if (sock->ol + x <= NET_BUFSIZE) + { + l = x; + } + else + { + l = NET_BUFSIZE - sock->ol; + } + + memcpy(sock->obuf + sock->ol, b + blen - x, l); + sock->ol += l; + x -= l; + } + else + { + err = net_flush(sock); + if (err) return nerr_pass(err); + } + } + return STATUS_OK; +} + +NEOERR *net_write_line(NSOCK *sock, char *s) +{ + NEOERR *err; + + err = net_write(sock, s, strlen(s)); + if (err) return nerr_pass(err); + err = net_write(sock, "\n", 1); + if (err) return nerr_pass(err); + return STATUS_OK; +} + +NEOERR *net_write_binary(NSOCK *sock, UINT8 *b, int blen) +{ + NEOERR *err; + char buf[32]; + + if (b == NULL) blen = -1; + + snprintf(buf, sizeof(buf), "%d:", blen); + err = net_write(sock, buf, strlen(buf)); + if (err) return nerr_pass(err); + + if (blen > 0) + { + err = net_write(sock, b, blen); + if (err) return nerr_pass(err); + } + + err = net_write(sock, ",", 1); + if (err) return nerr_pass(err); + return STATUS_OK; +} + +NEOERR *net_write_str(NSOCK *sock, char *s) +{ + NEOERR *err; + + if (s == NULL) + err = net_write_binary(sock, s, -1); + else + err = net_write_binary(sock, s, strlen(s)); + return nerr_pass(err); +} + +NEOERR *net_write_int(NSOCK *sock, int i) +{ + char buf[32]; + + snprintf(buf, sizeof(buf), "%d,", i); + return nerr_pass(net_write(sock, buf, strlen(buf))); +} + diff -Nru clearsilver-0.7.0/util/neo_net.h clearsilver-0.7.1/util/neo_net.h --- clearsilver-0.7.0/util/neo_net.h Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/neo_net.h Sun Sep 1 19:00:46 2002 @@ -0,0 +1,54 @@ +/* + * Neotonic ClearSilver CGI Kit + * + * This code is made available under the terms of the + * Neotonic ClearSilver License. + * http://www.neotonic.com/clearsilver/license.hdf + * + * Copyright (C) 2001 by Brandon Long + */ + +#ifndef __NEO_NET_H_ +#define __NEO_NET_H_ 1 + +__BEGIN_DECLS + +#define NET_BUFSIZE 4096 + +typedef struct _neo_sock { + int fd; + int data_timeout; + int conn_timeout; + + UINT32 remote_ip; + int remote_port; + + /* incoming buffer */ + UINT8 ibuf[NET_BUFSIZE]; + int ib; + int il; + + /* outbound buffer */ + UINT8 obuf[NET_BUFSIZE]; + int ol; +} NSOCK; + +NEOERR *net_listen(int port, int *fd); +NEOERR *net_accept(NSOCK **sock, int fd, int data_timeout); +NEOERR *net_connect(NSOCK **sock, char *host, int port, int conn_timeout, int data_timeout); +NEOERR *net_close(NSOCK **sock); +NEOERR *net_read(NSOCK *sock, UINT8 *buf, int buflen); +NEOERR *net_read_line(NSOCK *sock, char **buf); +NEOERR *net_read_binary(NSOCK *sock, UINT8 **b, int *blen); +NEOERR *net_read_str_alloc(NSOCK *sock, char **s, int *len); +NEOERR *net_read_int(NSOCK *sock, int *i); +NEOERR *net_write(NSOCK *sock, UINT8 *b, int blen); +NEOERR *net_write_line(NSOCK *sock, char *s); +NEOERR *net_write_binary(NSOCK *sock, UINT8 *b, int blen); +NEOERR *net_write_str(NSOCK *sock, char *s); +NEOERR *net_write_int(NSOCK *sock, int i); +NEOERR *net_flush(NSOCK *sock); + +__END_DECLS + +#endif /* __NEO_NET_H_ */ diff -Nru clearsilver-0.7.0/util/neo_server.c clearsilver-0.7.1/util/neo_server.c --- clearsilver-0.7.0/util/neo_server.c Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/neo_server.c Fri Sep 6 17:59:59 2002 @@ -0,0 +1,225 @@ + +/* + * Neotonic ClearSilver CGI Kit + * + * This code is made available under the terms of the + * Neotonic ClearSilver License. + * http://www.neotonic.com/clearsilver/license.hdf + * + * Copyright (C) 2001 by Brandon Long + */ + +/* Initial version based on multi-proc based server (like apache 1.x) + * + * Parts are: + * 1) server Init + * 2) sub-proc start + * 3) sub-proc init + * 4) sub-proc process request + * 5) sub-proc cleanup + * 6) server cleanup + * + * Parts 1 & 6 aren't part of the framework, and at this point, I don't + * think I need to worry about 3 & 5 either, but maybe in the future. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "neo_err.h" +#include "neo_net.h" +#include "ulocks.h" +#include "neo_server.h" + +static NEOERR *nserver_child_loop(NSERVER *server, int num) +{ + NEOERR *err = STATUS_OK, *clean_err; + int loop = 0; + NSOCK *child_sock; + + if (server->init_cb) + { + err = server->init_cb(server->data, num); + if (err) return nerr_pass(err); + } + + while (loop++ < server->num_requests) + { + err = fLock(server->accept_lock); + if (err) break; + err = net_accept(&child_sock, server->server_fd, server->data_timeout); + fUnlock(server->accept_lock); + if (err) break; + err = server->req_cb(server->data, num, child_sock); + if (err) + { + net_close(&child_sock); + } + else + { + err = net_close(&child_sock); + } + nerr_log_error(err); + nerr_ignore(&err); + } + ne_warn("nserver child loop handled %d connections", loop-1); + + if (server->clean_cb) + { + clean_err = server->clean_cb(server->data, num); + if (clean_err) + { + nerr_log_error(clean_err); + nerr_ignore(&clean_err); + } + } + + return nerr_pass(err); +} + +static void ignore_pipe(void) +{ + struct sigaction sa; + + + memset(&sa, 0, sizeof(struct sigaction)); + + sa.sa_handler = SIG_IGN; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sigaction(SIGPIPE, &sa, NULL); +} + +/* Handle shutdown by accepting a TERM signal and then passing it to our + * program group */ +static int ShutdownPending = 0; + +static void sig_term(int sig) +{ + ShutdownPending = 1; +} + +static void setup_term(void) +{ + struct sigaction sa; + + + memset(&sa, 0, sizeof(struct sigaction)); + + sa.sa_handler = sig_term; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(SIGTERM, &sa, NULL); +} + +NEOERR *nserver_proc_start(NSERVER *server, BOOL debug) +{ + NEOERR *err; + + if (server->req_cb == NULL) + return nerr_raise(NERR_ASSERT, "nserver requires a request callback"); + + ignore_pipe(); + + setup_term(); + + ShutdownPending = 0; + + err = fFind(&(server->accept_lock), server->lockfile); + if (err && nerr_handle(&err, NERR_NOT_FOUND)) + { + err = fCreate(&(server->accept_lock), server->lockfile); + } + if (err) return nerr_pass(err); + + do + { + err = net_listen(server->port, &(server->server_fd)); + if (err) break; + + if (debug == TRUE) + { + err = nserver_child_loop(server, 0); + break; + } + else + { + /* create children and restart them as necessary */ + pid_t child; + int count, status; + + for (count = 0; count < server->num_children; count++) + { + child = fork(); + if (child == -1) + { + err = nerr_raise_errno(NERR_SYSTEM, "Unable to fork child"); + break; + } + if (!child) + { + err = nserver_child_loop(server, count); + if (err) exit(-1); + exit(0); + } + ne_warn("Starting child pid %d", child); + } + if (count < server->num_children) break; + while (!ShutdownPending) + { + child = wait3(&status, 0, NULL); + if (child == -1) + { + ne_warn("wait3 failed [%d] %s", errno, strerror(errno)); + continue; + } + if (WIFSTOPPED(status)) + { + ne_warn("pid %d stopped on signal %d", child, WSTOPSIG(status)); + continue; + } + if (WIFEXITED(status)) + { + /* at some point, we might do something here with the + * particular exit value */ + ne_warn("pid %d exited, returned %d", child, WEXITSTATUS(status)); + } + else if (WIFSIGNALED(status)) + { + ne_warn("pid %d exited on signal %d", child, WTERMSIG(status)); + } + count++; + + child = fork(); + if (child == -1) + { + err = nerr_raise_errno(NERR_SYSTEM, "Unable to fork child"); + break; + } + if (!child) + { + err = nserver_child_loop(server, count); + if (err) exit(-1); + exit(0); + } + ne_warn("Starting child pid %d", child); + } + /* At some point, we might want to actually maintain information + * on our children, and then we can be more specific here in terms + * of making sure they all shutdown... for now, fergitaboutit */ + if (ShutdownPending) + { + killpg(0, SIGTERM); + } + } + } + while (0); + + fDestroy(server->accept_lock); + return nerr_pass(err); +} diff -Nru clearsilver-0.7.0/util/neo_server.h clearsilver-0.7.1/util/neo_server.h --- clearsilver-0.7.0/util/neo_server.h Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/neo_server.h Wed Sep 4 17:10:53 2002 @@ -0,0 +1,44 @@ +/* + * Neotonic ClearSilver License. + * http://www.neotonic.com/clearsilver/license.hdf + * + * Copyright (C) 2001 by Brandon Long + */ + +#ifndef __NEO_SERVER_H_ +#define __NEO_SERVER_H_ 1 + +__BEGIN_DECLS + +/* hmm, this callback might need a mechanism for telling the child to + * end... */ +typedef NEOERR *(*NSERVER_REQ_CB)(void *rock, int num, NSOCK *sock); +typedef NEOERR *(*NSERVER_CB)(void *rock, int num); + +typedef struct _nserver { + /* callbacks */ + NSERVER_CB init_cb; + NSERVER_REQ_CB req_cb; + NSERVER_CB clean_cb; + + void *data; + + int num_children; + int num_requests; + + int port; + int conn_timeout; + int data_timeout; + + char lockfile[_POSIX_PATH_MAX]; + + /* Internal data */ + int accept_lock; + int server_fd; +} NSERVER; + +NEOERR *nserver_proc_start(NSERVER *server, BOOL debug); + +__END_DECLS + +#endif /* __NEO_SERVER_H_ */ diff -Nru clearsilver-0.7.0/util/neo_str.h clearsilver-0.7.1/util/neo_str.h --- clearsilver-0.7.0/util/neo_str.h Thu Apr 25 18:47:18 2002 +++ clearsilver-0.7.1/util/neo_str.h Fri Aug 30 17:39:46 2002 @@ -43,6 +43,10 @@ int max; } STRING_ARRAY; + +/* At some point, we should add the concept of "max len" to these so we + * can't get DoS'd by someone sending us a line without an end point, + * etc. */ void string_init (STRING *str); NEOERR *string_set (STRING *str, char *buf); NEOERR *string_append (STRING *str, char *buf); diff -Nru clearsilver-0.7.0/util/neo_test.c clearsilver-0.7.1/util/neo_test.c --- clearsilver-0.7.0/util/neo_test.c Mon Aug 6 14:28:17 2001 +++ clearsilver-0.7.1/util/neo_test.c Wed Jul 31 16:49:42 2002 @@ -21,8 +21,10 @@ void neot_seed_rand (long int seed) { +#ifndef __WINDOWS_GCC__ ne_warn ("Rand Seed is %ld", seed); srand48(seed); +#endif RandomInit = 1; } @@ -34,7 +36,11 @@ { neot_seed_rand (time(NULL)); } +#ifdef __WINDOWS_GCC__ + r = rand() * max; +#else r = drand48() * max; +#endif return r; } diff -Nru clearsilver-0.7.0/util/os_win/mkstemp.c clearsilver-0.7.1/util/os_win/mkstemp.c --- clearsilver-0.7.0/util/os_win/mkstemp.c Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/os_win/mkstemp.c Wed Jul 31 16:49:42 2002 @@ -0,0 +1,6 @@ + +#include + +int mkstemp(char *path) { + return open(mktemp(path),O_RDWR); +} diff -Nru clearsilver-0.7.0/util/os_win/regex.c clearsilver-0.7.1/util/os_win/regex.c --- clearsilver-0.7.0/util/os_win/regex.c Wed Dec 31 16:00:00 1969 +++ clearsilver-0.7.1/util/os_win/regex.c Wed Jul 31 16:49:42 2002 @@ -0,0 +1,5515 @@ +/* Extended regular expression matching and search library, version + 0.12. (Implements POSIX draft P10003.2/D11.2, except for + internationalization features.) + + Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#include "../osdep.h" + +#ifdef HAVE_CONFIG_H +#include +#endif + +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include + +/* This is for other GNU distributions with internationalized messages. */ +#if HAVE_LIBINTL_H || defined (_LIBC) +# include +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +#define gettext_noop(String) String +#endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs + +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" + +#else /* not emacs */ + +/* If we are not linking with Emacs proper, + we can't use the relocating allocator + even if config.h says that we can. */ +#undef REL_ALLOC + +#if defined (STDC_HEADERS) || defined (_LIBC) +#include +#else +char *malloc (); +char *realloc (); +#endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. + If nothing else has been done, use the method below. */ +#ifdef INHIBIT_STRING_HEADER +#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) +#if !defined (bzero) && !defined (bcopy) +#undef INHIBIT_STRING_HEADER +#endif +#endif +#endif + +/* This is the normal way of making sure we have a bcopy and a bzero. + This is used in most programs--a few other programs avoid this + by defining INHIBIT_STRING_HEADER. */ +#ifndef INHIBIT_STRING_HEADER +#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) +#include +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif +#else +#include +#endif +#endif + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SWITCH_ENUM_BUG +#define SWITCH_ENUM_CAST(x) ((int)(x)) +#else +#define SWITCH_ENUM_CAST(x) (x) +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + +#endif /* not emacs */ + +/* Get the interface, including the syntax bits. */ +#include "regex.h" + +/* isalpha etc. are used for the character classes. */ +#include + +/* Jim Meyering writes: + + "... Some ctype macros are valid only for character codes that + isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when + using /bin/cc or gcc but without giving an ansi option). So, all + ctype uses should be through macros like ISPRINT... If + STDC_HEADERS is defined, then autoconf has verified that the ctype + macros don't need to be guarded with references to isascii. ... + Defining IN_CTYPE_DOMAIN to 1 should let any compiler worth its salt + eliminate the && through constant folding." */ + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +#define IN_CTYPE_DOMAIN(c) 1 +#else +#define IN_CTYPE_DOMAIN(c) isascii(c) +#endif + +#ifdef isblank +#define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c)) +#else +#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) +#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c)) +#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c)) +#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c)) +#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c)) +#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c)) +#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c)) +#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c)) +#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c)) +#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c)) + +#ifndef NULL +#define NULL (void *)0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE free + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#if 0 /* It is a bad idea to declare alloca. We always cast the result. */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +/* No need to do anything to free, after alloca. */ +#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ + +#endif /* not REGEX_MALLOC */ + +/* Define how to allocate the failure stack. */ + +#if defined (REL_ALLOC) && defined (REGEX_MALLOC) + +#define REGEX_ALLOCATE_STACK(size) \ + r_alloc (&failure_stack_ptr, (size)) +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + r_re_alloc (&failure_stack_ptr, (nsize)) +#define REGEX_FREE_STACK(ptr) \ + r_alloc_free (&failure_stack_ptr) + +#else /* not using relocating allocator */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE_STACK malloc +#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +#define REGEX_FREE_STACK free + +#else /* not REGEX_MALLOC */ + +#define REGEX_ALLOCATE_STACK alloca + +#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything. */ +#define REGEX_FREE_STACK(arg) + +#endif /* not REGEX_MALLOC */ +#endif /* not using relocating allocator */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define RETALLOC_IF(addr, n, t) \ + if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#undef MAX +#undef MIN +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +static int re_match_2_internal (); + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. */ + +typedef enum +{ + no_op = 0, + + /* Succeed right away--no more backtracking. */ + succeed, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include + +/* It is useful to test things that ``must'' be true when debugging. */ +#include + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + putchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + putchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + printf ("%d:\t", p - start); + + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + putchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c, last = -100; + register int in_range = 0; + + printf ("/charset [%s", + (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); + + assert (p + *p < pend); + + for (c = 0; c < 256; c++) + if (c / 8 < *p + && (p[1 + (c/8)] & (1 << (c % 8)))) + { + /* Are we starting a range? */ + if (last + 1 == c && ! in_range) + { + putchar ('-'); + in_range = 1; + } + /* Have we broken a range? */ + else if (last + 1 != c && in_range) + { + putchar (last); + in_range = 0; + } + + if (! in_range) + putchar (c); + + last = c; + } + + if (in_range) + putchar (last); + + putchar (']'); + + p += 1 + *p; + } + break; + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump to %d", p + mcnt - start); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump to %d", p + mcnt - start); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump to %d", p + mcnt - start); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump to %d", p + mcnt - start); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt to %d", p + mcnt - start); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump to %d", p + mcnt - start); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + + putchar ('\n'); + } + + printf ("%d:\tend of pattern.\n", p - start); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %d\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + unsigned this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + putchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + putchar (string2[this_char]); + } +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +static const char *re_error_msgid[] = + { + gettext_noop ("Success"), /* REG_NOERROR */ + gettext_noop ("No match"), /* REG_NOMATCH */ + gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ + gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ + gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ + gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ + gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ + gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ + gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ + gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ + gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ + gettext_noop ("Invalid range end"), /* REG_ERANGE */ + gettext_noop ("Memory exhausted"), /* REG_ESPACE */ + gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ + gettext_noop ("Premature end of regular expression"), /* REG_EEND */ + gettext_noop ("Regular expression too big"), /* REG_ESIZE */ + gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ + }; + +/* Avoiding alloca during matching, to placate r_alloc. */ + +/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the + searching and matching functions should not call alloca. On some + systems, alloca is implemented in terms of malloc, and if we're + using the relocating allocator routines, then malloc could cause a + relocation, which might (if the strings being searched are in the + ralloc heap) shift the data out from underneath the regexp + routines. + + Here's another reason to avoid allocation: Emacs + processes input from X in a signal handler; processing X input may + call malloc; if input arrives while a matching routine is calling + malloc, then we're scrod. But Emacs can't just block input while + calling matching routines; then we don't notice interrupts when + they come in. So, Emacs blocks input around all regexp calls + except the matching calls, which it leaves unprotected, in the + faith that they will not malloc. */ + +/* Normally, this is fine. */ +#define MATCH_MAY_ALLOCATE + +/* When using GNU C, we are not REALLY using the C alloca, no matter + what config.h may say. So don't take precautions for it. */ +#ifdef __GNUC__ +#undef C_ALLOCA +#endif + +/* The match routines may not allocate if (1) they would do it with malloc + and (2) it's not safe for them to use malloc. + Note that if REL_ALLOC is defined, matching would not use malloc for the + failure stack, but we would still use it for the register vectors; + so REL_ALLOC should not affect this. */ +#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) +#undef MATCH_MAY_ALLOCATE +#endif + + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE_STACK. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ +#if defined (MATCH_MAY_ALLOCATE) +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +int re_max_failures = 20000; +#else +int re_max_failures = 2000; +#endif + +union fail_stack_elt +{ + unsigned char *pointer; + int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) + + +/* Define macros to initialize and free the failure stack. + Do `return -2' if the alloc fails. */ + +#ifdef MATCH_MAY_ALLOCATE +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) +#else +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.avail = 0; \ + } while (0) + +#define RESET_FAIL_STACK() +#endif + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE_STACK requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE_STACK ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push pointer POINTER on FAIL_STACK. + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ + ? 0 \ + : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ + 1)) + +/* Push a pointer value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_POINTER(item) \ + fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) + +/* This pushes an integer-valued item onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_INT(item) \ + fail_stack.stack[fail_stack.avail++].integer = (item) + +/* Push a fail_stack_elt_t value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ELT(item) \ + fail_stack.stack[fail_stack.avail++] = (item) + +/* These three POP... operations complement the three PUSH... operations. + All assume that `fail_stack' is nonempty. */ +#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_INT +#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + int this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + if (1) \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_POINTER (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_POINTER (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ELT (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_INT (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_INT (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_POINTER (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_POINTER (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +/* We used to use (num_regs - 1), which is the number of registers + this regexp will save; but that was changed to 5 + to avoid stack overflow for a regexp with lots of parens. */ +#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + (((0 \ + ? 0 : highest_active_reg - lowest_active_reg + 1) \ + * NUM_REG_ITEMS) \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + int this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_POINTER (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (unsigned) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (unsigned) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + if (1) \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ELT (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + else \ + { \ + for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ + { \ + reg_info[this_reg].word.integer = 0; \ + regend[this_reg] = 0; \ + regstart[this_reg] = 0; \ + } \ + highest_active_reg = high_reg; \ + } \ + \ + set_regs_matched_done = 0; \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + + + +/* Structure for per-register (a.k.a. per-group) information. + Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ + +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + if (!set_regs_matched_done) \ + { \ + unsigned r; \ + set_regs_matched_done = 1; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + } \ + while (0) + +/* Registers are set to a sentinel when they haven't yet matched. */ +static char reg_unset_dummy; +#define REG_UNSET_VALUE (®_unset_dummy) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + +/* Subroutine declarations and macros for regex_compile. */ + +static void store_op1 (), store_op2 (); +static void insert_op1 (), insert_op2 (); +static boolean at_begline_loc_p (), at_endline_loc_p (); +static boolean group_in_compile_stack (); +static reg_errcode_t compile_range (); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#ifndef PATFETCH +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = (unsigned char) translate[c]; \ + } while (0) +#endif + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#ifndef TRANSLATE +#define TRANSLATE(d) \ + (translate ? (char) translate[(unsigned char) (d)] : (d)) +#endif + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while (b - bufp->buffer + (n) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (to) - (loc) - 3) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (to) - (loc) - 3, arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (to) - (loc) - 3, b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (to) - (loc) - 3, arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +#define MAX_BUF_SIZE (1L << 16) + + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +typedef int pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +#define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) + +#ifndef MATCH_MAY_ALLOCATE + +/* If we cannot allocate large objects within re_match_2_internal, + we make the fail stack and register vectors global. + The fail stack, we grow to the maximum size when a regexp + is compiled. + The register vectors, we adjust in size each time we + compile a regexp, according to the number of registers it needs. */ + +static fail_stack_type fail_stack; + +/* Size with which the following vectors are currently allocated. + That is so we can make them bigger as needed, + but never make them smaller. */ +static int regs_allocated_size; + +static const char ** regstart, ** regend; +static const char ** old_regstart, ** old_regend; +static const char **best_regstart, **best_regend; +static register_info_type *reg_info; +static const char **reg_dummy; +static register_info_type *reg_info_dummy; + +/* Make the register vectors big enough for NUM_REGS registers, + but don't make them smaller. */ + +static +regex_grow_registers (num_regs) + int num_regs; +{ + if (num_regs > regs_allocated_size) + { + RETALLOC_IF (regstart, num_regs, const char *); + RETALLOC_IF (regend, num_regs, const char *); + RETALLOC_IF (old_regstart, num_regs, const char *); + RETALLOC_IF (old_regend, num_regs, const char *); + RETALLOC_IF (best_regstart, num_regs, const char *); + RETALLOC_IF (best_regend, num_regs, const char *); + RETALLOC_IF (reg_info, num_regs, register_info_type); + RETALLOC_IF (reg_dummy, num_regs, const char *); + RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); + + regs_allocated_size = num_regs; + } +} + +#endif /* not MATCH_MAY_ALLOCATE */ + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +/* Return, freeing storage we allocated. */ +#define FREE_STACK_RETURN(value) \ + return (free (compile_stack.stack), value) + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + int size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random temporary spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + RE_TRANSLATE_TYPE translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + putchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + FREE_STACK_RETURN (REG_ERANGE); + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ch++) + { + int translated = TRANSLATE (ch); + /* This was split into 3 if's to + avoid an arbitrary limit in some compiler. */ + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch))) + SET_LIST_BIT (translated); + if ( (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch))) + SET_LIST_BIT (translated); + if ( (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (translated); + } + had_char_class = true; + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + FREE_STACK_RETURN (REG_ERPAREN); + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + FREE_STACK_RETURN (REG_ERPAREN); + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_EBRACE); + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + FREE_STACK_RETURN (REG_BADBR); + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + FREE_STACK_RETURN (REG_BADRPT); + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at + set_number_at + succeed_n + + jump_n + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + BUF_PUSH (wordbeg); + break; + + case '>': + BUF_PUSH (wordend); + break; + + case 'b': + BUF_PUSH (wordbound); + break; + + case 'B': + BUF_PUSH (notwordbound); + break; + + case '`': + BUF_PUSH (begbuf); + break; + + case '\'': + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + FREE_STACK_RETURN (REG_ESUBREG); + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + FREE_STACK_RETURN (REG_EPAREN); + + /* If we don't want backtracking, force success + the first time we reach the end of the compiled pattern. */ + if (syntax & RE_NO_POSIX_BACKTRACKING) + BUF_PUSH (succeed); + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: \n"); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + +#ifndef MATCH_MAY_ALLOCATE + /* Initialize the failure stack to the largest possible stack. This + isn't necessary unless we're trying to avoid calling alloca in + the search and match routines. */ + { + int num_regs = bufp->re_nsub + 1; + + /* Since DOUBLE_FAIL_STACK refuses to double only if the current size + is strictly greater than re_max_failures, the largest possible stack + is 2 * re_max_failures failure points. */ + if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) + { + fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +#ifdef emacs + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) xmalloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) xrealloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#else /* not emacs */ + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#endif /* not emacs */ + } + + regex_grow_registers (num_regs); + } +#endif /* not MATCH_MAY_ALLOCATE */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + int syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : 0; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + RE_TRANSLATE_TYPE translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ + range_start = ((const unsigned char *) p)[-2]; + range_end = ((const unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; +#ifdef MATCH_MAY_ALLOCATE + fail_stack_type fail_stack; +#endif +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ + unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned long size = bufp->used; + unsigned char *p = pattern; + register unsigned char *pend = pattern + size; + + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ +#ifdef REL_ALLOC + fail_stack_elt_t *failure_stack_ptr; +#endif + + /* Assume that each path through the pattern can be null unt