Blame - lib/Support/regcomp.c - fp2-dev/platform/external/llvm

blob: 9ae66f3fa5ff290df3b30f4495676bcddcde2653 [file] [log] [blame]

Torok Edwin	ce0c81e	2009-08-30 08:24:09 +0000	[diff] [blame]	1	/*-
				2	* This code is derived from OpenBSD's libc/regex, original license follows:
				3	*
				4	* Copyright (c) 1992, 1993, 1994 Henry Spencer.
				5	* Copyright (c) 1992, 1993, 1994
				6	* The Regents of the University of California. All rights reserved.
				7	*
				8	* This code is derived from software contributed to Berkeley by
				9	* Henry Spencer.
				10	*
				11	* Redistribution and use in source and binary forms, with or without
				12	* modification, are permitted provided that the following conditions
				13	* are met:
				14	* 1. Redistributions of source code must retain the above copyright
				15	* notice, this list of conditions and the following disclaimer.
				16	* 2. Redistributions in binary form must reproduce the above copyright
				17	* notice, this list of conditions and the following disclaimer in the
				18	* documentation and/or other materials provided with the distribution.
				19	* 3. Neither the name of the University nor the names of its contributors
				20	* may be used to endorse or promote products derived from this software
				21	* without specific prior written permission.
				22	*
				23	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
				24	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				25	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				26	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
				27	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				28	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				29	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				30	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				31	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				32	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				33	* SUCH DAMAGE.
				34	*
				35	* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
				36	*/
				37
				38	#include <sys/types.h>
				39	#include <stdio.h>
				40	#include <string.h>
				41	#include <ctype.h>
				42	#include <limits.h>
				43	#include <stdlib.h>
				44	#include "regex_impl.h"
				45
				46	#include "regutils.h"
				47	#include "regex2.h"
				48
				49	#include "regcclass.h"
				50	#include "regcname.h"
				51
				52	/*
				53	* parse structure, passed up and down to avoid global variables and
				54	* other clumsinesses
				55	*/
				56	struct parse {
				57	char next; / next character in RE */
				58	char end; / end of string (-> NUL normally) */
				59	int error; /* has an error been seen? */
				60	sop strip; / malloced strip */
				61	sopno ssize; /* malloced strip size (allocated) */
				62	sopno slen; /* malloced strip length (used) */
				63	int ncsalloc; /* number of csets allocated */
				64	struct re_guts *g;
				65	# define NPAREN 10 /* we need to remember () 1-9 for back refs */
				66	sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
				67	sopno pend[NPAREN]; /* -> ) ([0] unused) */
				68	};
				69
				70	static void p_ere(struct parse *, int);
				71	static void p_ere_exp(struct parse *);
				72	static void p_str(struct parse *);
				73	static void p_bre(struct parse *, int, int);
				74	static int p_simp_re(struct parse *, int);
				75	static int p_count(struct parse *);
				76	static void p_bracket(struct parse *);
				77	static void p_b_term(struct parse , cset );
				78	static void p_b_cclass(struct parse , cset );
				79	static void p_b_eclass(struct parse , cset );
				80	static char p_b_symbol(struct parse *);
				81	static char p_b_coll_elem(struct parse *, int);
				82	static char othercase(int);
				83	static void bothcases(struct parse *, int);
				84	static void ordinary(struct parse *, int);
				85	static void nonnewline(struct parse *);
				86	static void repeat(struct parse *, sopno, int, int);
				87	static int seterr(struct parse *, int);
				88	static cset allocset(struct parse );
				89	static void freeset(struct parse , cset );
				90	static int freezeset(struct parse , cset );
				91	static int firstch(struct parse , cset );
				92	static int nch(struct parse , cset );
				93	static void mcadd(struct parse , cset , const char *);
				94	static void mcinvert(struct parse , cset );
				95	static void mccase(struct parse , cset );
				96	static int isinsets(struct re_guts *, int);
				97	static int samesets(struct re_guts *, int, int);
				98	static void categorize(struct parse , struct re_guts );
				99	static sopno dupl(struct parse *, sopno, sopno);
				100	static void doemit(struct parse *, sop, size_t);
				101	static void doinsert(struct parse *, sop, size_t, sopno);
				102	static void dofwd(struct parse *, sopno, sop);
				103	static void enlarge(struct parse *, sopno);
				104	static void stripsnug(struct parse , struct re_guts );
				105	static void findmust(struct parse , struct re_guts );
				106	static sopno pluscount(struct parse , struct re_guts );
				107
				108	static char nuls[10]; /* place to point scanner in event of error */
				109
				110	/*
				111	* macros for use with parse structure
				112	* BEWARE: these know that the parse structure is named `p' !!!
				113	*/
				114	#define PEEK() (*p->next)
				115	#define PEEK2() (*(p->next+1))
				116	#define MORE() (p->next < p->end)
				117	#define MORE2() (p->next+1 < p->end)
				118	#define SEE(c) (MORE() && PEEK() == (c))
				119	#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
				120	#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
				121	#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
				122	#define NEXT() (p->next++)
				123	#define NEXT2() (p->next += 2)
				124	#define NEXTn(n) (p->next += (n))
				125	#define GETNEXT() (*p->next++)
				126	#define SETERROR(e) seterr(p, (e))
				127	#define REQUIRE(co, e) (void)((co) \|\| SETERROR(e))
				128	#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
				129	#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
				130	#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() \|\| PEEK() != (c), e))
				131	#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
				132	#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
				133	#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
				134	#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
				135	#define HERE() (p->slen)
				136	#define THERE() (p->slen - 1)
				137	#define THERETHERE() (p->slen - 2)
				138	#define DROP(n) (p->slen -= (n))
				139
				140	#ifdef _POSIX2_RE_DUP_MAX
				141	#define DUPMAX _POSIX2_RE_DUP_MAX
				142	#else
				143	#define DUPMAX 255
				144	#endif
				145
				146	#ifndef NDEBUG
				147	static int never = 0; /* for use in asserts; shuts lint up */
				148	#else
				149	#define never 0 /* some <assert.h>s have bugs too */
				150	#endif
				151
				152	/*
				153	- llvm_regcomp - interface for parser and compilation
				154	*/
				155	int /* 0 success, otherwise REG_something */
				156	llvm_regcomp(llvm_regex_t preg, const char pattern, int cflags)
				157	{
				158	struct parse pa;
				159	struct re_guts *g;
				160	struct parse *p = &pa;
				161	int i;
				162	size_t len;
				163	#ifdef REDEBUG
				164	# define GOODFLAGS(f) (f)
				165	#else
				166	# define GOODFLAGS(f) ((f)&~REG_DUMP)
				167	#endif
				168
				169	cflags = GOODFLAGS(cflags);
				170	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
				171	return(REG_INVARG);
				172
				173	if (cflags&REG_PEND) {
				174	if (preg->re_endp < pattern)
				175	return(REG_INVARG);
				176	len = preg->re_endp - pattern;
				177	} else
				178	len = strlen((const char *)pattern);
				179
				180	/* do the mallocs early so failure handling is easy */
				181	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
				182	(NC-1)*sizeof(cat_t));
				183	if (g == NULL)
				184	return(REG_ESPACE);
				185	p->ssize = len/(size_t)2(size_t)3 + (size_t)1; / ugh */
				186	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
				187	p->slen = 0;
				188	if (p->strip == NULL) {
				189	free((char *)g);
				190	return(REG_ESPACE);
				191	}
				192
				193	/* set things up */
				194	p->g = g;
				195	p->next = (char )pattern; / convenience; we do not modify it */
				196	p->end = p->next + len;
				197	p->error = 0;
				198	p->ncsalloc = 0;
				199	for (i = 0; i < NPAREN; i++) {
				200	p->pbegin[i] = 0;
				201	p->pend[i] = 0;
				202	}
				203	g->csetsize = NC;
				204	g->sets = NULL;
				205	g->setbits = NULL;
				206	g->ncsets = 0;
				207	g->cflags = cflags;
				208	g->iflags = 0;
				209	g->nbol = 0;
				210	g->neol = 0;
				211	g->must = NULL;
				212	g->mlen = 0;
				213	g->nsub = 0;
				214	g->ncategories = 1; /* category 0 is "everything else" */
				215	g->categories = &g->catspace[-(CHAR_MIN)];
				216	(void) memset((char )g->catspace, 0, NCsizeof(cat_t));
				217	g->backrefs = 0;
				218
				219	/* do it */
				220	EMIT(OEND, 0);
				221	g->firststate = THERE();
				222	if (cflags&REG_EXTENDED)
				223	p_ere(p, OUT);
				224	else if (cflags&REG_NOSPEC)
				225	p_str(p);
				226	else
				227	p_bre(p, OUT, OUT);
				228	EMIT(OEND, 0);
				229	g->laststate = THERE();
				230
				231	/* tidy up loose ends and fill things in */
				232	categorize(p, g);
				233	stripsnug(p, g);
				234	findmust(p, g);
				235	g->nplus = pluscount(p, g);
				236	g->magic = MAGIC2;
				237	preg->re_nsub = g->nsub;
				238	preg->re_g = g;
				239	preg->re_magic = MAGIC1;
				240	#ifndef REDEBUG
				241	/* not debugging, so can't rely on the assert() in llvm_regexec() */
				242	if (g->iflags&REGEX_BAD)
				243	SETERROR(REG_ASSERT);
				244	#endif
				245
				246	/* win or lose, we're done */
				247	if (p->error != 0) /* lose */
				248	llvm_regfree(preg);
				249	return(p->error);
				250	}
				251
				252	/*
				253	- p_ere - ERE parser top level, concatenation and alternation
				254	*/
				255	static void
				256	p_ere(struct parse p, int stop) / character this ERE should end at */
				257	{
				258	char c;
Benjamin Kramer	5b7c356	2009-09-06 09:29:39 +0000	[diff] [blame]	259	sopno prevback;
				260	sopno prevfwd;
Torok Edwin	ce0c81e	2009-08-30 08:24:09 +0000	[diff] [blame]	261	sopno conc;
				262	int first = 1; /* is this the first alternative? */
				263
				264	for (;;) {
				265	/* do a bunch of concatenated expressions */
				266	conc = HERE();
				267	while (MORE() && (c = PEEK()) != '\|' && c != stop)
				268	p_ere_exp(p);
				269	REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
				270
				271	if (!EAT('\|'))
				272	break; /* NOTE BREAK OUT */
				273
				274	if (first) {
				275	INSERT(OCH_, conc); /* offset is wrong */
				276	prevfwd = conc;
				277	prevback = conc;
				278	first = 0;
				279	}
				280	ASTERN(OOR1, prevback);
				281	prevback = THERE();
				282	AHEAD(prevfwd); /* fix previous offset */
				283	prevfwd = HERE();
				284	EMIT(OOR2, 0); /* offset is very wrong */
				285	}
				286
				287	if (!first) { /* tail-end fixups */
				288	AHEAD(prevfwd);
				289	ASTERN(O_CH, prevback);
				290	}
				291
				292	assert(!MORE() \|\| SEE(stop));
				293	}
				294
				295	/*
				296	- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
				297	*/
				298	static void
				299	p_ere_exp(struct parse *p)
				300	{
				301	char c;
				302	sopno pos;
				303	int count;
				304	int count2;
				305	sopno subno;
				306	int wascaret = 0;
				307
				308	assert(MORE()); /* caller should have ensured this */
				309	c = GETNEXT();
				310
				311	pos = HERE();
				312	switch (c) {
				313	case '(':
				314	REQUIRE(MORE(), REG_EPAREN);
				315	p->g->nsub++;
				316	subno = p->g->nsub;
				317	if (subno < NPAREN)
				318	p->pbegin[subno] = HERE();
				319	EMIT(OLPAREN, subno);
				320	if (!SEE(')'))
				321	p_ere(p, ')');
				322	if (subno < NPAREN) {
				323	p->pend[subno] = HERE();
				324	assert(p->pend[subno] != 0);
				325	}
				326	EMIT(ORPAREN, subno);
				327	MUSTEAT(')', REG_EPAREN);
				328	break;
				329	#ifndef POSIX_MISTAKE
				330	case ')': /* happens only if no current unmatched ( */
				331	/*
				332	* You may ask, why the ifndef? Because I didn't notice
				333	* this until slightly too late for 1003.2, and none of the
				334	* other 1003.2 regular-expression reviewers noticed it at
				335	* all. So an unmatched ) is legal POSIX, at least until
				336	* we can get it fixed.
				337	*/
				338	SETERROR(REG_EPAREN);
				339	break;
				340	#endif
				341	case '^':
				342	EMIT(OBOL, 0);
				343	p->g->iflags \|= USEBOL;
				344	p->g->nbol++;
				345	wascaret = 1;
				346	break;
				347	case '$':
				348	EMIT(OEOL, 0);
				349	p->g->iflags \|= USEEOL;
				350	p->g->neol++;
				351	break;
				352	case '\|':
				353	SETERROR(REG_EMPTY);
				354	break;
				355	case '*':
				356	case '+':
				357	case '?':
				358	SETERROR(REG_BADRPT);
				359	break;
				360	case '.':
				361	if (p->g->cflags&REG_NEWLINE)
				362	nonnewline(p);
				363	else
				364	EMIT(OANY, 0);
				365	break;
				366	case '[':
				367	p_bracket(p);
				368	break;
				369	case '\\':
				370	REQUIRE(MORE(), REG_EESCAPE);
				371	c = GETNEXT();
				372	ordinary(p, c);
				373	break;
				374	case '{': /* okay as ordinary except if digit follows */
				375	REQUIRE(!MORE() \|\| !isdigit((uch)PEEK()), REG_BADRPT);
				376	/* FALLTHROUGH */
				377	default:
				378	ordinary(p, c);
				379	break;
				380	}
				381
				382	if (!MORE())
				383	return;
				384	c = PEEK();
				385	/* we call { a repetition if followed by a digit */
				386	if (!( c == '*' \|\| c == '+' \|\| c == '?' \|\|
				387	(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
				388	return; /* no repetition, we're done */
				389	NEXT();
				390
				391	REQUIRE(!wascaret, REG_BADRPT);
				392	switch (c) {
				393	case '': / implemented as +? */
				394	/* this case does not require the (y\|) trick, noKLUDGE */
				395	INSERT(OPLUS_, pos);
				396	ASTERN(O_PLUS, pos);
				397	INSERT(OQUEST_, pos);
				398	ASTERN(O_QUEST, pos);
				399	break;
				400	case '+':
				401	INSERT(OPLUS_, pos);
				402	ASTERN(O_PLUS, pos);
				403	break;
				404	case '?':
				405	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				406	INSERT(OCH_, pos); /* offset slightly wrong */
				407	ASTERN(OOR1, pos); /* this one's right */
				408	AHEAD(pos); /* fix the OCH_ */
				409	EMIT(OOR2, 0); /* offset very wrong... */
				410	AHEAD(THERE()); /* ...so fix it */
				411	ASTERN(O_CH, THERETHERE());
				412	break;
				413	case '{':
				414	count = p_count(p);
				415	if (EAT(',')) {
				416	if (isdigit((uch)PEEK())) {
				417	count2 = p_count(p);
				418	REQUIRE(count <= count2, REG_BADBR);
				419	} else /* single number with comma */
				420	count2 = INFINITY;
				421	} else /* just a single number */
				422	count2 = count;
				423	repeat(p, pos, count, count2);
				424	if (!EAT('}')) { /* error heuristics */
				425	while (MORE() && PEEK() != '}')
				426	NEXT();
				427	REQUIRE(MORE(), REG_EBRACE);
				428	SETERROR(REG_BADBR);
				429	}
				430	break;
				431	}
				432
				433	if (!MORE())
				434	return;
				435	c = PEEK();
				436	if (!( c == '*' \|\| c == '+' \|\| c == '?' \|\|
				437	(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
				438	return;
				439	SETERROR(REG_BADRPT);
				440	}
				441
				442	/*
				443	- p_str - string (no metacharacters) "parser"
				444	*/
				445	static void
				446	p_str(struct parse *p)
				447	{
				448	REQUIRE(MORE(), REG_EMPTY);
				449	while (MORE())
				450	ordinary(p, GETNEXT());
				451	}
				452
				453	/*
				454	- p_bre - BRE parser top level, anchoring and concatenation
				455	* Giving end1 as OUT essentially eliminates the end1/end2 check.
				456	*
				457	* This implementation is a bit of a kludge, in that a trailing $ is first
				458	* taken as an ordinary character and then revised to be an anchor. The
				459	* only undesirable side effect is that '$' gets included as a character
				460	* category in such cases. This is fairly harmless; not worth fixing.
				461	* The amount of lookahead needed to avoid this kludge is excessive.
				462	*/
				463	static void
				464	p_bre(struct parse *p,
				465	int end1, /* first terminating character */
				466	int end2) /* second terminating character */
				467	{
				468	sopno start = HERE();
				469	int first = 1; /* first subexpression? */
				470	int wasdollar = 0;
				471
				472	if (EAT('^')) {
				473	EMIT(OBOL, 0);
				474	p->g->iflags \|= USEBOL;
				475	p->g->nbol++;
				476	}
				477	while (MORE() && !SEETWO(end1, end2)) {
				478	wasdollar = p_simp_re(p, first);
				479	first = 0;
				480	}
				481	if (wasdollar) { /* oops, that was a trailing anchor */
				482	DROP(1);
				483	EMIT(OEOL, 0);
				484	p->g->iflags \|= USEEOL;
				485	p->g->neol++;
				486	}
				487
				488	REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
				489	}
				490
				491	/*
				492	- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
				493	*/
				494	static int /* was the simple RE an unbackslashed $? */
				495	p_simp_re(struct parse *p,
				496	int starordinary) /* is a leading * an ordinary character? */
				497	{
				498	int c;
				499	int count;
				500	int count2;
				501	sopno pos;
				502	int i;
				503	sopno subno;
				504	# define BACKSL (1<<CHAR_BIT)
				505
				506	pos = HERE(); /* repetion op, if any, covers from here */
				507
				508	assert(MORE()); /* caller should have ensured this */
				509	c = GETNEXT();
				510	if (c == '\\') {
				511	REQUIRE(MORE(), REG_EESCAPE);
				512	c = BACKSL \| GETNEXT();
				513	}
				514	switch (c) {
				515	case '.':
				516	if (p->g->cflags&REG_NEWLINE)
				517	nonnewline(p);
				518	else
				519	EMIT(OANY, 0);
				520	break;
				521	case '[':
				522	p_bracket(p);
				523	break;
				524	case BACKSL\|'{':
				525	SETERROR(REG_BADRPT);
				526	break;
				527	case BACKSL\|'(':
				528	p->g->nsub++;
				529	subno = p->g->nsub;
				530	if (subno < NPAREN)
				531	p->pbegin[subno] = HERE();
				532	EMIT(OLPAREN, subno);
				533	/* the MORE here is an error heuristic */
				534	if (MORE() && !SEETWO('\\', ')'))
				535	p_bre(p, '\\', ')');
				536	if (subno < NPAREN) {
				537	p->pend[subno] = HERE();
				538	assert(p->pend[subno] != 0);
				539	}
				540	EMIT(ORPAREN, subno);
				541	REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
				542	break;
				543	case BACKSL\|')': /* should not get here -- must be user */
				544	case BACKSL\|'}':
				545	SETERROR(REG_EPAREN);
				546	break;
				547	case BACKSL\|'1':
				548	case BACKSL\|'2':
				549	case BACKSL\|'3':
				550	case BACKSL\|'4':
				551	case BACKSL\|'5':
				552	case BACKSL\|'6':
				553	case BACKSL\|'7':
				554	case BACKSL\|'8':
				555	case BACKSL\|'9':
				556	i = (c&~BACKSL) - '0';
				557	assert(i < NPAREN);
				558	if (p->pend[i] != 0) {
				559	assert(i <= p->g->nsub);
				560	EMIT(OBACK_, i);
				561	assert(p->pbegin[i] != 0);
				562	assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
				563	assert(OP(p->strip[p->pend[i]]) == ORPAREN);
				564	(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
				565	EMIT(O_BACK, i);
				566	} else
				567	SETERROR(REG_ESUBREG);
				568	p->g->backrefs = 1;
				569	break;
				570	case '*':
				571	REQUIRE(starordinary, REG_BADRPT);
				572	/* FALLTHROUGH */
				573	default:
				574	ordinary(p, (char)c);
				575	break;
				576	}
				577
				578	if (EAT('')) { / implemented as +? */
				579	/* this case does not require the (y\|) trick, noKLUDGE */
				580	INSERT(OPLUS_, pos);
				581	ASTERN(O_PLUS, pos);
				582	INSERT(OQUEST_, pos);
				583	ASTERN(O_QUEST, pos);
				584	} else if (EATTWO('\\', '{')) {
				585	count = p_count(p);
				586	if (EAT(',')) {
				587	if (MORE() && isdigit((uch)PEEK())) {
				588	count2 = p_count(p);
				589	REQUIRE(count <= count2, REG_BADBR);
				590	} else /* single number with comma */
				591	count2 = INFINITY;
				592	} else /* just a single number */
				593	count2 = count;
				594	repeat(p, pos, count, count2);
				595	if (!EATTWO('\\', '}')) { /* error heuristics */
				596	while (MORE() && !SEETWO('\\', '}'))
				597	NEXT();
				598	REQUIRE(MORE(), REG_EBRACE);
				599	SETERROR(REG_BADBR);
				600	}
				601	} else if (c == '$') /* $ (but not \$) ends it */
				602	return(1);
				603
				604	return(0);
				605	}
				606
				607	/*
				608	- p_count - parse a repetition count
				609	*/
				610	static int /* the value */
				611	p_count(struct parse *p)
				612	{
				613	int count = 0;
				614	int ndigits = 0;
				615
				616	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
				617	count = count*10 + (GETNEXT() - '0');
				618	ndigits++;
				619	}
				620
				621	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
				622	return(count);
				623	}
				624
				625	/*
				626	- p_bracket - parse a bracketed character list
				627	*
				628	* Note a significant property of this code: if the allocset() did SETERROR,
				629	* no set operations are done.
				630	*/
				631	static void
				632	p_bracket(struct parse *p)
				633	{
				634	cset *cs;
				635	int invert = 0;
				636
				637	/* Dept of Truly Sickening Special-Case Kludges */
				638	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
				639	EMIT(OBOW, 0);
				640	NEXTn(6);
				641	return;
				642	}
				643	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
				644	EMIT(OEOW, 0);
				645	NEXTn(6);
				646	return;
				647	}
				648
				649	if ((cs = allocset(p)) == NULL) {
				650	/* allocset did set error status in p */
				651	return;
				652	}
				653
				654	if (EAT('^'))
				655	invert++; /* make note to invert set at end */
				656	if (EAT(']'))
				657	CHadd(cs, ']');
				658	else if (EAT('-'))
				659	CHadd(cs, '-');
				660	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
				661	p_b_term(p, cs);
				662	if (EAT('-'))
				663	CHadd(cs, '-');
				664	MUSTEAT(']', REG_EBRACK);
				665
				666	if (p->error != 0) { /* don't mess things up further */
				667	freeset(p, cs);
				668	return;
				669	}
				670
				671	if (p->g->cflags&REG_ICASE) {
				672	int i;
				673	int ci;
				674
				675	for (i = p->g->csetsize - 1; i >= 0; i--)
				676	if (CHIN(cs, i) && isalpha(i)) {
				677	ci = othercase(i);
				678	if (ci != i)
				679	CHadd(cs, ci);
				680	}
				681	if (cs->multis != NULL)
				682	mccase(p, cs);
				683	}
				684	if (invert) {
				685	int i;
				686
				687	for (i = p->g->csetsize - 1; i >= 0; i--)
				688	if (CHIN(cs, i))
				689	CHsub(cs, i);
				690	else
				691	CHadd(cs, i);
				692	if (p->g->cflags&REG_NEWLINE)
				693	CHsub(cs, '\n');
				694	if (cs->multis != NULL)
				695	mcinvert(p, cs);
				696	}
				697
				698	assert(cs->multis == NULL); /* xxx */
				699
				700	if (nch(p, cs) == 1) { /* optimize singleton sets */
				701	ordinary(p, firstch(p, cs));
				702	freeset(p, cs);
				703	} else
				704	EMIT(OANYOF, freezeset(p, cs));
				705	}
				706
				707	/*
				708	- p_b_term - parse one term of a bracketed character list
				709	*/
				710	static void
				711	p_b_term(struct parse p, cset cs)
				712	{
				713	char c;
				714	char start, finish;
				715	int i;
				716
				717	/* classify what we've got */
				718	switch ((MORE()) ? PEEK() : '\0') {
				719	case '[':
				720	c = (MORE2()) ? PEEK2() : '\0';
				721	break;
				722	case '-':
				723	SETERROR(REG_ERANGE);
				724	return; /* NOTE RETURN */
				725	break;
				726	default:
				727	c = '\0';
				728	break;
				729	}
				730
				731	switch (c) {
				732	case ':': /* character class */
				733	NEXT2();
				734	REQUIRE(MORE(), REG_EBRACK);
				735	c = PEEK();
				736	REQUIRE(c != '-' && c != ']', REG_ECTYPE);
				737	p_b_cclass(p, cs);
				738	REQUIRE(MORE(), REG_EBRACK);
				739	REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
				740	break;
				741	case '=': /* equivalence class */
				742	NEXT2();
				743	REQUIRE(MORE(), REG_EBRACK);
				744	c = PEEK();
				745	REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
				746	p_b_eclass(p, cs);
				747	REQUIRE(MORE(), REG_EBRACK);
				748	REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
				749	break;
				750	default: /* symbol, ordinary character, or range */
				751	/* xxx revision needed for multichar stuff */
				752	start = p_b_symbol(p);
				753	if (SEE('-') && MORE2() && PEEK2() != ']') {
				754	/* range */
				755	NEXT();
				756	if (EAT('-'))
				757	finish = '-';
				758	else
				759	finish = p_b_symbol(p);
				760	} else
				761	finish = start;
				762	/* xxx what about signed chars here... */
				763	REQUIRE(start <= finish, REG_ERANGE);
				764	for (i = start; i <= finish; i++)
				765	CHadd(cs, i);
				766	break;
				767	}
				768	}
				769
				770	/*
				771	- p_b_cclass - parse a character-class name and deal with it
				772	*/
				773	static void
				774	p_b_cclass(struct parse p, cset cs)
				775	{
				776	char *sp = p->next;
				777	struct cclass *cp;
				778	size_t len;
				779	const char *u;
				780	char c;
				781
				782	while (MORE() && isalpha(PEEK()))
				783	NEXT();
				784	len = p->next - sp;
				785	for (cp = cclasses; cp->name != NULL; cp++)
				786	if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
				787	break;
				788	if (cp->name == NULL) {
				789	/* oops, didn't find it */
				790	SETERROR(REG_ECTYPE);
				791	return;
				792	}
				793
				794	u = cp->chars;
				795	while ((c = *u++) != '\0')
				796	CHadd(cs, c);
				797	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
				798	MCadd(p, cs, u);
				799	}
				800
				801	/*
				802	- p_b_eclass - parse an equivalence-class name and deal with it
				803	*
				804	* This implementation is incomplete. xxx
				805	*/
				806	static void
				807	p_b_eclass(struct parse p, cset cs)
				808	{
				809	char c;
				810
				811	c = p_b_coll_elem(p, '=');
				812	CHadd(cs, c);
				813	}
				814
				815	/*
				816	- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
				817	*/
				818	static char /* value of symbol */
				819	p_b_symbol(struct parse *p)
				820	{
				821	char value;
				822
				823	REQUIRE(MORE(), REG_EBRACK);
				824	if (!EATTWO('[', '.'))
				825	return(GETNEXT());
				826
				827	/* collating symbol */
				828	value = p_b_coll_elem(p, '.');
				829	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
				830	return(value);
				831	}
				832
				833	/*
				834	- p_b_coll_elem - parse a collating-element name and look it up
				835	*/
				836	static char /* value of collating element */
				837	p_b_coll_elem(struct parse *p,
				838	int endc) /* name ended by endc,']' */
				839	{
				840	char *sp = p->next;
				841	struct cname *cp;
				842	int len;
				843
				844	while (MORE() && !SEETWO(endc, ']'))
				845	NEXT();
				846	if (!MORE()) {
				847	SETERROR(REG_EBRACK);
				848	return(0);
				849	}
				850	len = p->next - sp;
				851	for (cp = cnames; cp->name != NULL; cp++)
				852	if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
				853	return(cp->code); /* known name */
				854	if (len == 1)
				855	return(sp); / single character */
				856	SETERROR(REG_ECOLLATE); /* neither */
				857	return(0);
				858	}
				859
				860	/*
				861	- othercase - return the case counterpart of an alphabetic
				862	*/
				863	static char /* if no counterpart, return ch */
				864	othercase(int ch)
				865	{
				866	ch = (uch)ch;
				867	assert(isalpha(ch));
				868	if (isupper(ch))
				869	return ((uch)tolower(ch));
				870	else if (islower(ch))
				871	return ((uch)toupper(ch));
				872	else /* peculiar, but could happen */
				873	return(ch);
				874	}
				875
				876	/*
				877	- bothcases - emit a dualcase version of a two-case character
				878	*
				879	* Boy, is this implementation ever a kludge...
				880	*/
				881	static void
				882	bothcases(struct parse *p, int ch)
				883	{
				884	char *oldnext = p->next;
				885	char *oldend = p->end;
				886	char bracket[3];
				887
				888	ch = (uch)ch;
				889	assert(othercase(ch) != ch); /* p_bracket() would recurse */
				890	p->next = bracket;
				891	p->end = bracket+2;
				892	bracket[0] = ch;
				893	bracket[1] = ']';
				894	bracket[2] = '\0';
				895	p_bracket(p);
				896	assert(p->next == bracket+2);
				897	p->next = oldnext;
				898	p->end = oldend;
				899	}
				900
				901	/*
				902	- ordinary - emit an ordinary character
				903	*/
				904	static void
				905	ordinary(struct parse *p, int ch)
				906	{
				907	cat_t *cap = p->g->categories;
				908
				909	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
				910	bothcases(p, ch);
				911	else {
				912	EMIT(OCHAR, (uch)ch);
				913	if (cap[ch] == 0)
				914	cap[ch] = p->g->ncategories++;
				915	}
				916	}
				917
				918	/*
				919	- nonnewline - emit REG_NEWLINE version of OANY
				920	*
				921	* Boy, is this implementation ever a kludge...
				922	*/
				923	static void
				924	nonnewline(struct parse *p)
				925	{
				926	char *oldnext = p->next;
				927	char *oldend = p->end;
				928	char bracket[4];
				929
				930	p->next = bracket;
				931	p->end = bracket+3;
				932	bracket[0] = '^';
				933	bracket[1] = '\n';
				934	bracket[2] = ']';
				935	bracket[3] = '\0';
				936	p_bracket(p);
				937	assert(p->next == bracket+3);
				938	p->next = oldnext;
				939	p->end = oldend;
				940	}
				941
				942	/*
				943	- repeat - generate code for a bounded repetition, recursively if needed
				944	*/
				945	static void
				946	repeat(struct parse *p,
				947	sopno start, /* operand from here to end of strip */
				948	int from, /* repeated from this number */
				949	int to) /* to this number of times (maybe INFINITY) */
				950	{
				951	sopno finish = HERE();
				952	# define N 2
				953	# define INF 3
				954	# define REP(f, t) ((f)*8 + (t))
				955	# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
				956	sopno copy;
				957
				958	if (p->error != 0) /* head off possible runaway recursion */
				959	return;
				960
				961	assert(from <= to);
				962
				963	switch (REP(MAP(from), MAP(to))) {
				964	case REP(0, 0): /* must be user doing this */
				965	DROP(finish-start); /* drop the operand */
				966	break;
				967	case REP(0, 1): /* as x{1,1}? */
				968	case REP(0, N): /* as x{1,n}? */
				969	case REP(0, INF): /* as x{1,}? */
				970	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				971	INSERT(OCH_, start); /* offset is wrong... */
				972	repeat(p, start+1, 1, to);
				973	ASTERN(OOR1, start);
				974	AHEAD(start); /* ... fix it */
				975	EMIT(OOR2, 0);
				976	AHEAD(THERE());
				977	ASTERN(O_CH, THERETHERE());
				978	break;
				979	case REP(1, 1): /* trivial case */
				980	/* done */
				981	break;
				982	case REP(1, N): /* as x?x{1,n-1} */
				983	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				984	INSERT(OCH_, start);
				985	ASTERN(OOR1, start);
				986	AHEAD(start);
				987	EMIT(OOR2, 0); /* offset very wrong... */
				988	AHEAD(THERE()); /* ...so fix it */
				989	ASTERN(O_CH, THERETHERE());
				990	copy = dupl(p, start+1, finish+1);
				991	assert(copy == finish+4);
				992	repeat(p, copy, 1, to-1);
				993	break;
				994	case REP(1, INF): /* as x+ */
				995	INSERT(OPLUS_, start);
				996	ASTERN(O_PLUS, start);
				997	break;
				998	case REP(N, N): /* as xx{m-1,n-1} */
				999	copy = dupl(p, start, finish);
				1000	repeat(p, copy, from-1, to-1);
				1001	break;
				1002	case REP(N, INF): /* as xx{n-1,INF} */
				1003	copy = dupl(p, start, finish);
				1004	repeat(p, copy, from-1, to);
				1005	break;
				1006	default: /* "can't happen" */
				1007	SETERROR(REG_ASSERT); /* just in case */
				1008	break;
				1009	}
				1010	}
				1011
				1012	/*
				1013	- seterr - set an error condition
				1014	*/
				1015	static int /* useless but makes type checking happy */
				1016	seterr(struct parse *p, int e)
				1017	{
				1018	if (p->error == 0) /* keep earliest error condition */
				1019	p->error = e;
				1020	p->next = nuls; /* try to bring things to a halt */
				1021	p->end = nuls;
				1022	return(0); /* make the return value well-defined */
				1023	}
				1024
				1025	/*
				1026	- allocset - allocate a set of characters for []
				1027	*/
				1028	static cset *
				1029	allocset(struct parse *p)
				1030	{
				1031	int no = p->g->ncsets++;
				1032	size_t nc;
				1033	size_t nbytes;
				1034	cset *cs;
				1035	size_t css = (size_t)p->g->csetsize;
				1036	int i;
				1037
				1038	if (no >= p->ncsalloc) { /* need another column of space */
				1039	void *ptr;
				1040
				1041	p->ncsalloc += CHAR_BIT;
				1042	nc = p->ncsalloc;
				1043	assert(nc % CHAR_BIT == 0);
				1044	nbytes = nc / CHAR_BIT * css;
				1045
				1046	ptr = (cset )realloc((char )p->g->sets, nc * sizeof(cset));
				1047	if (ptr == NULL)
				1048	goto nomem;
				1049	p->g->sets = ptr;
				1050
				1051	ptr = (uch )realloc((char )p->g->setbits, nbytes);
				1052	if (ptr == NULL)
				1053	goto nomem;
				1054	p->g->setbits = ptr;
				1055
				1056	for (i = 0; i < no; i++)
				1057	p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
				1058
				1059	(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
				1060	}
				1061	/* XXX should not happen */
				1062	if (p->g->sets == NULL \|\| p->g->setbits == NULL)
				1063	goto nomem;
				1064
				1065	cs = &p->g->sets[no];
				1066	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
				1067	cs->mask = 1 << ((no) % CHAR_BIT);
				1068	cs->hash = 0;
				1069	cs->smultis = 0;
				1070	cs->multis = NULL;
				1071
				1072	return(cs);
				1073	nomem:
				1074	free(p->g->sets);
				1075	p->g->sets = NULL;
				1076	free(p->g->setbits);
				1077	p->g->setbits = NULL;
				1078
				1079	SETERROR(REG_ESPACE);
				1080	/* caller's responsibility not to do set ops */
				1081	return(NULL);
				1082	}
				1083
				1084	/*
				1085	- freeset - free a now-unused set
				1086	*/
				1087	static void
				1088	freeset(struct parse p, cset cs)
				1089	{
				1090	size_t i;
				1091	cset *top = &p->g->sets[p->g->ncsets];
				1092	size_t css = (size_t)p->g->csetsize;
				1093
				1094	for (i = 0; i < css; i++)
				1095	CHsub(cs, i);
				1096	if (cs == top-1) /* recover only the easy case */
				1097	p->g->ncsets--;
				1098	}
				1099
				1100	/*
				1101	- freezeset - final processing on a set of characters
				1102	*
				1103	* The main task here is merging identical sets. This is usually a waste
				1104	* of time (although the hash code minimizes the overhead), but can win
				1105	* big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
				1106	* is done using addition rather than xor -- all ASCII [aA] sets xor to
				1107	* the same value!
				1108	*/
				1109	static int /* set number */
				1110	freezeset(struct parse p, cset cs)
				1111	{
				1112	uch h = cs->hash;
				1113	size_t i;
				1114	cset *top = &p->g->sets[p->g->ncsets];
				1115	cset *cs2;
				1116	size_t css = (size_t)p->g->csetsize;
				1117
				1118	/* look for an earlier one which is the same */
				1119	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
				1120	if (cs2->hash == h && cs2 != cs) {
				1121	/* maybe */
				1122	for (i = 0; i < css; i++)
				1123	if (!!CHIN(cs2, i) != !!CHIN(cs, i))
				1124	break; /* no */
				1125	if (i == css)
				1126	break; /* yes */
				1127	}
				1128
				1129	if (cs2 < top) { /* found one */
				1130	freeset(p, cs);
				1131	cs = cs2;
				1132	}
				1133
				1134	return((int)(cs - p->g->sets));
				1135	}
				1136
				1137	/*
				1138	- firstch - return first character in a set (which must have at least one)
				1139	*/
				1140	static int /* character; there is no "none" value */
				1141	firstch(struct parse p, cset cs)
				1142	{
				1143	size_t i;
				1144	size_t css = (size_t)p->g->csetsize;
				1145
				1146	for (i = 0; i < css; i++)
				1147	if (CHIN(cs, i))
				1148	return((char)i);
				1149	assert(never);
				1150	return(0); /* arbitrary */
				1151	}
				1152
				1153	/*
				1154	- nch - number of characters in a set
				1155	*/
				1156	static int
				1157	nch(struct parse p, cset cs)
				1158	{
				1159	size_t i;
				1160	size_t css = (size_t)p->g->csetsize;
				1161	int n = 0;
				1162
				1163	for (i = 0; i < css; i++)
				1164	if (CHIN(cs, i))
				1165	n++;
				1166	return(n);
				1167	}
				1168
				1169	/*
				1170	- mcadd - add a collating element to a cset
				1171	*/
				1172	static void
				1173	mcadd( struct parse p, cset cs, const char *cp)
				1174	{
				1175	size_t oldend = cs->smultis;
				1176	void *np;
				1177
				1178	cs->smultis += strlen(cp) + 1;
				1179	np = realloc(cs->multis, cs->smultis);
				1180	if (np == NULL) {
				1181	if (cs->multis)
				1182	free(cs->multis);
				1183	cs->multis = NULL;
				1184	SETERROR(REG_ESPACE);
				1185	return;
				1186	}
				1187	cs->multis = np;
				1188
				1189	llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
				1190	}
				1191
				1192	/*
				1193	- mcinvert - invert the list of collating elements in a cset
				1194	*
				1195	* This would have to know the set of possibilities. Implementation
				1196	* is deferred.
				1197	*/
				1198	/* ARGSUSED */
				1199	static void
				1200	mcinvert(struct parse p, cset cs)
				1201	{
				1202	assert(cs->multis == NULL); /* xxx */
				1203	}
				1204
				1205	/*
				1206	- mccase - add case counterparts of the list of collating elements in a cset
				1207	*
				1208	* This would have to know the set of possibilities. Implementation
				1209	* is deferred.
				1210	*/
				1211	/* ARGSUSED */
				1212	static void
				1213	mccase(struct parse p, cset cs)
				1214	{
				1215	assert(cs->multis == NULL); /* xxx */
				1216	}
				1217
				1218	/*
				1219	- isinsets - is this character in any sets?
				1220	*/
				1221	static int /* predicate */
				1222	isinsets(struct re_guts *g, int c)
				1223	{
				1224	uch *col;
				1225	int i;
				1226	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
				1227	unsigned uc = (uch)c;
				1228
				1229	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
				1230	if (col[uc] != 0)
				1231	return(1);
				1232	return(0);
				1233	}
				1234
				1235	/*
				1236	- samesets - are these two characters in exactly the same sets?
				1237	*/
				1238	static int /* predicate */
				1239	samesets(struct re_guts *g, int c1, int c2)
				1240	{
				1241	uch *col;
				1242	int i;
				1243	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
				1244	unsigned uc1 = (uch)c1;
				1245	unsigned uc2 = (uch)c2;
				1246
				1247	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
				1248	if (col[uc1] != col[uc2])
				1249	return(0);
				1250	return(1);
				1251	}
				1252
				1253	/*
				1254	- categorize - sort out character categories
				1255	*/
				1256	static void
				1257	categorize(struct parse p, struct re_guts g)
				1258	{
				1259	cat_t *cats = g->categories;
				1260	int c;
				1261	int c2;
				1262	cat_t cat;
				1263
				1264	/* avoid making error situations worse */
				1265	if (p->error != 0)
				1266	return;
				1267
				1268	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
				1269	if (cats[c] == 0 && isinsets(g, c)) {
				1270	cat = g->ncategories++;
				1271	cats[c] = cat;
				1272	for (c2 = c+1; c2 <= CHAR_MAX; c2++)
				1273	if (cats[c2] == 0 && samesets(g, c, c2))
				1274	cats[c2] = cat;
				1275	}
				1276	}
				1277
				1278	/*
				1279	- dupl - emit a duplicate of a bunch of sops
				1280	*/
				1281	static sopno /* start of duplicate */
				1282	dupl(struct parse *p,
				1283	sopno start, /* from here */
				1284	sopno finish) /* to this less one */
				1285	{
				1286	sopno ret = HERE();
				1287	sopno len = finish - start;
				1288
				1289	assert(finish >= start);
				1290	if (len == 0)
				1291	return(ret);
				1292	enlarge(p, p->ssize + len); /* this many unexpected additions */
				1293	assert(p->ssize >= p->slen + len);
				1294	(void) memmove((char *)(p->strip + p->slen),
				1295	(char )(p->strip + start), (size_t)lensizeof(sop));
				1296	p->slen += len;
				1297	return(ret);
				1298	}
				1299
				1300	/*
				1301	- doemit - emit a strip operator
				1302	*
				1303	* It might seem better to implement this as a macro with a function as
				1304	* hard-case backup, but it's just too big and messy unless there are
				1305	* some changes to the data structures. Maybe later.
				1306	*/
				1307	static void
				1308	doemit(struct parse *p, sop op, size_t opnd)
				1309	{
				1310	/* avoid making error situations worse */
				1311	if (p->error != 0)
				1312	return;
				1313
				1314	/* deal with oversize operands ("can't happen", more or less) */
				1315	assert(opnd < 1<<OPSHIFT);
				1316
				1317	/* deal with undersized strip */
				1318	if (p->slen >= p->ssize)
				1319	enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
				1320	assert(p->slen < p->ssize);
				1321
				1322	/* finally, it's all reduced to the easy case */
				1323	p->strip[p->slen++] = SOP(op, opnd);
				1324	}
				1325
				1326	/*
				1327	- doinsert - insert a sop into the strip
				1328	*/
				1329	static void
				1330	doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
				1331	{
				1332	sopno sn;
				1333	sop s;
				1334	int i;
				1335
				1336	/* avoid making error situations worse */
				1337	if (p->error != 0)
				1338	return;
				1339
				1340	sn = HERE();
				1341	EMIT(op, opnd); /* do checks, ensure space */
				1342	assert(HERE() == sn+1);
				1343	s = p->strip[sn];
				1344
				1345	/* adjust paren pointers */
				1346	assert(pos > 0);
				1347	for (i = 1; i < NPAREN; i++) {
				1348	if (p->pbegin[i] >= pos) {
				1349	p->pbegin[i]++;
				1350	}
				1351	if (p->pend[i] >= pos) {
				1352	p->pend[i]++;
				1353	}
				1354	}
				1355
				1356	memmove((char )&p->strip[pos+1], (char )&p->strip[pos],
				1357	(HERE()-pos-1)*sizeof(sop));
				1358	p->strip[pos] = s;
				1359	}
				1360
				1361	/*
				1362	- dofwd - complete a forward reference
				1363	*/
				1364	static void
				1365	dofwd(struct parse *p, sopno pos, sop value)
				1366	{
				1367	/* avoid making error situations worse */
				1368	if (p->error != 0)
				1369	return;
				1370
				1371	assert(value < 1<<OPSHIFT);
				1372	p->strip[pos] = OP(p->strip[pos]) \| value;
				1373	}
				1374
				1375	/*
				1376	- enlarge - enlarge the strip
				1377	*/
				1378	static void
				1379	enlarge(struct parse *p, sopno size)
				1380	{
				1381	sop *sp;
				1382
				1383	if (p->ssize >= size)
				1384	return;
				1385
				1386	sp = (sop )realloc(p->strip, sizesizeof(sop));
				1387	if (sp == NULL) {
				1388	SETERROR(REG_ESPACE);
				1389	return;
				1390	}
				1391	p->strip = sp;
				1392	p->ssize = size;
				1393	}
				1394
				1395	/*
				1396	- stripsnug - compact the strip
				1397	*/
				1398	static void
				1399	stripsnug(struct parse p, struct re_guts g)
				1400	{
				1401	g->nstates = p->slen;
				1402	g->strip = (sop )realloc((char )p->strip, p->slen * sizeof(sop));
				1403	if (g->strip == NULL) {
				1404	SETERROR(REG_ESPACE);
				1405	g->strip = p->strip;
				1406	}
				1407	}
				1408
				1409	/*
				1410	- findmust - fill in must and mlen with longest mandatory literal string
				1411	*
				1412	* This algorithm could do fancy things like analyzing the operands of \|
				1413	* for common subsequences. Someday. This code is simple and finds most
				1414	* of the interesting cases.
				1415	*
				1416	* Note that must and mlen got initialized during setup.
				1417	*/
				1418	static void
				1419	findmust(struct parse p, struct re_guts g)
				1420	{
				1421	sop *scan;
Daniel Dunbar	12420d3	2009-08-30 21:13:58 +0000	[diff] [blame]	1422	sop start = 0; / start initialized in the default case, after that */
				1423	sop newstart = 0; / newstart was initialized in the OCHAR case */
Torok Edwin	ce0c81e	2009-08-30 08:24:09 +0000	[diff] [blame]	1424	sopno newlen;
				1425	sop s;
				1426	char *cp;
				1427	sopno i;
				1428
				1429	/* avoid making error situations worse */
				1430	if (p->error != 0)
				1431	return;
				1432
				1433	/* find the longest OCHAR sequence in strip */
				1434	newlen = 0;
				1435	scan = g->strip + 1;
				1436	do {
				1437	s = *scan++;
				1438	switch (OP(s)) {
				1439	case OCHAR: /* sequence member */
				1440	if (newlen == 0) /* new sequence */
				1441	newstart = scan - 1;
				1442	newlen++;
				1443	break;
				1444	case OPLUS_: /* things that don't break one */
				1445	case OLPAREN:
				1446	case ORPAREN:
				1447	break;
				1448	case OQUEST_: /* things that must be skipped */
				1449	case OCH_:
				1450	scan--;
				1451	do {
				1452	scan += OPND(s);
				1453	s = *scan;
				1454	/* assert() interferes w debug printouts */
				1455	if (OP(s) != O_QUEST && OP(s) != O_CH &&
				1456	OP(s) != OOR2) {
				1457	g->iflags \|= REGEX_BAD;
				1458	return;
				1459	}
				1460	} while (OP(s) != O_QUEST && OP(s) != O_CH);
				1461	/* fallthrough */
				1462	default: /* things that break a sequence */
				1463	if (newlen > g->mlen) { /* ends one */
				1464	start = newstart;
				1465	g->mlen = newlen;
				1466	}
				1467	newlen = 0;
				1468	break;
				1469	}
				1470	} while (OP(s) != OEND);
				1471
				1472	if (g->mlen == 0) /* there isn't one */
				1473	return;
				1474
				1475	/* turn it into a character string */
				1476	g->must = malloc((size_t)g->mlen + 1);
				1477	if (g->must == NULL) { /* argh; just forget it */
				1478	g->mlen = 0;
				1479	return;
				1480	}
				1481	cp = g->must;
				1482	scan = start;
				1483	for (i = g->mlen; i > 0; i--) {
				1484	while (OP(s = *scan++) != OCHAR)
				1485	continue;
				1486	assert(cp < g->must + g->mlen);
				1487	*cp++ = (char)OPND(s);
				1488	}
				1489	assert(cp == g->must + g->mlen);
				1490	cp++ = '\0'; / just on general principles */
				1491	}
				1492
				1493	/*
				1494	- pluscount - count + nesting
				1495	*/
				1496	static sopno /* nesting depth */
				1497	pluscount(struct parse p, struct re_guts g)
				1498	{
				1499	sop *scan;
				1500	sop s;
				1501	sopno plusnest = 0;
				1502	sopno maxnest = 0;
				1503
				1504	if (p->error != 0)
				1505	return(0); /* there may not be an OEND */
				1506
				1507	scan = g->strip + 1;
				1508	do {
				1509	s = *scan++;
				1510	switch (OP(s)) {
				1511	case OPLUS_:
				1512	plusnest++;
				1513	break;
				1514	case O_PLUS:
				1515	if (plusnest > maxnest)
				1516	maxnest = plusnest;
				1517	plusnest--;
				1518	break;
				1519	}
				1520	} while (OP(s) != OEND);
				1521	if (plusnest != 0)
				1522	g->iflags \|= REGEX_BAD;
				1523	return(maxnest);
				1524	}