Blame - llvm/lib/Support/regcomp.c - toolchain/llvm-project

blob: cd018d5dc5bc4d3628dcacbff4d72b1cb0cfc53f [file] [log] [blame]

Torok Edwin	e14d4cd	2009-08-30 08:24:09 +0000	[diff] [blame]	1	/*-
				2	* This code is derived from OpenBSD's libc/regex, original license follows:
				3	*
				4	* Copyright (c) 1992, 1993, 1994 Henry Spencer.
				5	* Copyright (c) 1992, 1993, 1994
				6	* The Regents of the University of California. All rights reserved.
				7	*
				8	* This code is derived from software contributed to Berkeley by
				9	* Henry Spencer.
				10	*
				11	* Redistribution and use in source and binary forms, with or without
				12	* modification, are permitted provided that the following conditions
				13	* are met:
				14	* 1. Redistributions of source code must retain the above copyright
				15	* notice, this list of conditions and the following disclaimer.
				16	* 2. Redistributions in binary form must reproduce the above copyright
				17	* notice, this list of conditions and the following disclaimer in the
				18	* documentation and/or other materials provided with the distribution.
				19	* 3. Neither the name of the University nor the names of its contributors
				20	* may be used to endorse or promote products derived from this software
				21	* without specific prior written permission.
				22	*
				23	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
				24	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				25	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				26	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
				27	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				28	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				29	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				30	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				31	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				32	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				33	* SUCH DAMAGE.
				34	*
				35	* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
				36	*/
				37
				38	#include <sys/types.h>
				39	#include <stdio.h>
				40	#include <string.h>
				41	#include <ctype.h>
				42	#include <limits.h>
				43	#include <stdlib.h>
				44	#include "regex_impl.h"
				45
				46	#include "regutils.h"
				47	#include "regex2.h"
				48
				49	#include "regcclass.h"
				50	#include "regcname.h"
				51
				52	/*
				53	* parse structure, passed up and down to avoid global variables and
				54	* other clumsinesses
				55	*/
				56	struct parse {
				57	char next; / next character in RE */
				58	char end; / end of string (-> NUL normally) */
				59	int error; /* has an error been seen? */
				60	sop strip; / malloced strip */
				61	sopno ssize; /* malloced strip size (allocated) */
				62	sopno slen; /* malloced strip length (used) */
				63	int ncsalloc; /* number of csets allocated */
				64	struct re_guts *g;
				65	# define NPAREN 10 /* we need to remember () 1-9 for back refs */
				66	sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
				67	sopno pend[NPAREN]; /* -> ) ([0] unused) */
				68	};
				69
				70	static void p_ere(struct parse *, int);
				71	static void p_ere_exp(struct parse *);
				72	static void p_str(struct parse *);
				73	static void p_bre(struct parse *, int, int);
				74	static int p_simp_re(struct parse *, int);
				75	static int p_count(struct parse *);
				76	static void p_bracket(struct parse *);
				77	static void p_b_term(struct parse , cset );
				78	static void p_b_cclass(struct parse , cset );
				79	static void p_b_eclass(struct parse , cset );
				80	static char p_b_symbol(struct parse *);
				81	static char p_b_coll_elem(struct parse *, int);
				82	static char othercase(int);
				83	static void bothcases(struct parse *, int);
				84	static void ordinary(struct parse *, int);
				85	static void nonnewline(struct parse *);
				86	static void repeat(struct parse *, sopno, int, int);
				87	static int seterr(struct parse *, int);
				88	static cset allocset(struct parse );
				89	static void freeset(struct parse , cset );
				90	static int freezeset(struct parse , cset );
				91	static int firstch(struct parse , cset );
				92	static int nch(struct parse , cset );
				93	static void mcadd(struct parse , cset , const char *);
				94	static void mcinvert(struct parse , cset );
				95	static void mccase(struct parse , cset );
				96	static int isinsets(struct re_guts *, int);
				97	static int samesets(struct re_guts *, int, int);
				98	static void categorize(struct parse , struct re_guts );
				99	static sopno dupl(struct parse *, sopno, sopno);
				100	static void doemit(struct parse *, sop, size_t);
				101	static void doinsert(struct parse *, sop, size_t, sopno);
				102	static void dofwd(struct parse *, sopno, sop);
				103	static void enlarge(struct parse *, sopno);
				104	static void stripsnug(struct parse , struct re_guts );
				105	static void findmust(struct parse , struct re_guts );
				106	static sopno pluscount(struct parse , struct re_guts );
				107
				108	static char nuls[10]; /* place to point scanner in event of error */
				109
				110	/*
				111	* macros for use with parse structure
				112	* BEWARE: these know that the parse structure is named `p' !!!
				113	*/
				114	#define PEEK() (*p->next)
				115	#define PEEK2() (*(p->next+1))
				116	#define MORE() (p->next < p->end)
				117	#define MORE2() (p->next+1 < p->end)
				118	#define SEE(c) (MORE() && PEEK() == (c))
				119	#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
				120	#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
				121	#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
				122	#define NEXT() (p->next++)
				123	#define NEXT2() (p->next += 2)
				124	#define NEXTn(n) (p->next += (n))
				125	#define GETNEXT() (*p->next++)
				126	#define SETERROR(e) seterr(p, (e))
				127	#define REQUIRE(co, e) (void)((co) \|\| SETERROR(e))
				128	#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
				129	#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
				130	#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() \|\| PEEK() != (c), e))
				131	#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
				132	#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
				133	#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
				134	#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
				135	#define HERE() (p->slen)
				136	#define THERE() (p->slen - 1)
				137	#define THERETHERE() (p->slen - 2)
				138	#define DROP(n) (p->slen -= (n))
				139
				140	#ifdef _POSIX2_RE_DUP_MAX
				141	#define DUPMAX _POSIX2_RE_DUP_MAX
				142	#else
				143	#define DUPMAX 255
				144	#endif
Benjamin Kramer	2b37efa	2009-09-06 12:26:28 +0000	[diff] [blame]	145	#define INFINITY (DUPMAX + 1)
Torok Edwin	e14d4cd	2009-08-30 08:24:09 +0000	[diff] [blame]	146
				147	#ifndef NDEBUG
				148	static int never = 0; /* for use in asserts; shuts lint up */
				149	#else
				150	#define never 0 /* some <assert.h>s have bugs too */
				151	#endif
				152
				153	/*
				154	- llvm_regcomp - interface for parser and compilation
				155	*/
				156	int /* 0 success, otherwise REG_something */
				157	llvm_regcomp(llvm_regex_t preg, const char pattern, int cflags)
				158	{
				159	struct parse pa;
				160	struct re_guts *g;
				161	struct parse *p = &pa;
				162	int i;
				163	size_t len;
				164	#ifdef REDEBUG
				165	# define GOODFLAGS(f) (f)
				166	#else
				167	# define GOODFLAGS(f) ((f)&~REG_DUMP)
				168	#endif
				169
				170	cflags = GOODFLAGS(cflags);
				171	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
				172	return(REG_INVARG);
				173
				174	if (cflags&REG_PEND) {
				175	if (preg->re_endp < pattern)
				176	return(REG_INVARG);
				177	len = preg->re_endp - pattern;
				178	} else
				179	len = strlen((const char *)pattern);
				180
				181	/* do the mallocs early so failure handling is easy */
				182	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
				183	(NC-1)*sizeof(cat_t));
				184	if (g == NULL)
				185	return(REG_ESPACE);
				186	p->ssize = len/(size_t)2(size_t)3 + (size_t)1; / ugh */
				187	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
				188	p->slen = 0;
				189	if (p->strip == NULL) {
				190	free((char *)g);
				191	return(REG_ESPACE);
				192	}
				193
				194	/* set things up */
				195	p->g = g;
				196	p->next = (char )pattern; / convenience; we do not modify it */
				197	p->end = p->next + len;
				198	p->error = 0;
				199	p->ncsalloc = 0;
				200	for (i = 0; i < NPAREN; i++) {
				201	p->pbegin[i] = 0;
				202	p->pend[i] = 0;
				203	}
				204	g->csetsize = NC;
				205	g->sets = NULL;
				206	g->setbits = NULL;
				207	g->ncsets = 0;
				208	g->cflags = cflags;
				209	g->iflags = 0;
				210	g->nbol = 0;
				211	g->neol = 0;
				212	g->must = NULL;
				213	g->mlen = 0;
				214	g->nsub = 0;
				215	g->ncategories = 1; /* category 0 is "everything else" */
				216	g->categories = &g->catspace[-(CHAR_MIN)];
				217	(void) memset((char )g->catspace, 0, NCsizeof(cat_t));
				218	g->backrefs = 0;
				219
				220	/* do it */
				221	EMIT(OEND, 0);
				222	g->firststate = THERE();
				223	if (cflags&REG_EXTENDED)
				224	p_ere(p, OUT);
				225	else if (cflags&REG_NOSPEC)
				226	p_str(p);
				227	else
				228	p_bre(p, OUT, OUT);
				229	EMIT(OEND, 0);
				230	g->laststate = THERE();
				231
				232	/* tidy up loose ends and fill things in */
				233	categorize(p, g);
				234	stripsnug(p, g);
				235	findmust(p, g);
				236	g->nplus = pluscount(p, g);
				237	g->magic = MAGIC2;
				238	preg->re_nsub = g->nsub;
				239	preg->re_g = g;
				240	preg->re_magic = MAGIC1;
				241	#ifndef REDEBUG
				242	/* not debugging, so can't rely on the assert() in llvm_regexec() */
				243	if (g->iflags&REGEX_BAD)
				244	SETERROR(REG_ASSERT);
				245	#endif
				246
				247	/* win or lose, we're done */
				248	if (p->error != 0) /* lose */
				249	llvm_regfree(preg);
				250	return(p->error);
				251	}
				252
				253	/*
				254	- p_ere - ERE parser top level, concatenation and alternation
				255	*/
				256	static void
				257	p_ere(struct parse p, int stop) / character this ERE should end at */
				258	{
				259	char c;
Daniel Dunbar	6e4ed8c	2009-09-08 16:14:54 +0000	[diff] [blame]	260	sopno prevback = 0;
				261	sopno prevfwd = 0;
Torok Edwin	e14d4cd	2009-08-30 08:24:09 +0000	[diff] [blame]	262	sopno conc;
				263	int first = 1; /* is this the first alternative? */
				264
				265	for (;;) {
				266	/* do a bunch of concatenated expressions */
				267	conc = HERE();
				268	while (MORE() && (c = PEEK()) != '\|' && c != stop)
				269	p_ere_exp(p);
				270	REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
				271
				272	if (!EAT('\|'))
				273	break; /* NOTE BREAK OUT */
				274
				275	if (first) {
				276	INSERT(OCH_, conc); /* offset is wrong */
				277	prevfwd = conc;
				278	prevback = conc;
				279	first = 0;
				280	}
				281	ASTERN(OOR1, prevback);
				282	prevback = THERE();
				283	AHEAD(prevfwd); /* fix previous offset */
				284	prevfwd = HERE();
				285	EMIT(OOR2, 0); /* offset is very wrong */
				286	}
				287
				288	if (!first) { /* tail-end fixups */
				289	AHEAD(prevfwd);
				290	ASTERN(O_CH, prevback);
				291	}
				292
				293	assert(!MORE() \|\| SEE(stop));
				294	}
				295
				296	/*
				297	- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
				298	*/
				299	static void
				300	p_ere_exp(struct parse *p)
				301	{
				302	char c;
				303	sopno pos;
				304	int count;
				305	int count2;
				306	sopno subno;
				307	int wascaret = 0;
				308
				309	assert(MORE()); /* caller should have ensured this */
				310	c = GETNEXT();
				311
				312	pos = HERE();
				313	switch (c) {
				314	case '(':
				315	REQUIRE(MORE(), REG_EPAREN);
				316	p->g->nsub++;
				317	subno = p->g->nsub;
				318	if (subno < NPAREN)
				319	p->pbegin[subno] = HERE();
				320	EMIT(OLPAREN, subno);
				321	if (!SEE(')'))
				322	p_ere(p, ')');
				323	if (subno < NPAREN) {
				324	p->pend[subno] = HERE();
				325	assert(p->pend[subno] != 0);
				326	}
				327	EMIT(ORPAREN, subno);
				328	MUSTEAT(')', REG_EPAREN);
				329	break;
				330	#ifndef POSIX_MISTAKE
				331	case ')': /* happens only if no current unmatched ( */
				332	/*
				333	* You may ask, why the ifndef? Because I didn't notice
				334	* this until slightly too late for 1003.2, and none of the
				335	* other 1003.2 regular-expression reviewers noticed it at
				336	* all. So an unmatched ) is legal POSIX, at least until
				337	* we can get it fixed.
				338	*/
				339	SETERROR(REG_EPAREN);
				340	break;
				341	#endif
				342	case '^':
				343	EMIT(OBOL, 0);
				344	p->g->iflags \|= USEBOL;
				345	p->g->nbol++;
				346	wascaret = 1;
				347	break;
				348	case '$':
				349	EMIT(OEOL, 0);
				350	p->g->iflags \|= USEEOL;
				351	p->g->neol++;
				352	break;
				353	case '\|':
				354	SETERROR(REG_EMPTY);
				355	break;
				356	case '*':
				357	case '+':
				358	case '?':
				359	SETERROR(REG_BADRPT);
				360	break;
				361	case '.':
				362	if (p->g->cflags&REG_NEWLINE)
				363	nonnewline(p);
				364	else
				365	EMIT(OANY, 0);
				366	break;
				367	case '[':
				368	p_bracket(p);
				369	break;
				370	case '\\':
				371	REQUIRE(MORE(), REG_EESCAPE);
				372	c = GETNEXT();
				373	ordinary(p, c);
				374	break;
				375	case '{': /* okay as ordinary except if digit follows */
				376	REQUIRE(!MORE() \|\| !isdigit((uch)PEEK()), REG_BADRPT);
				377	/* FALLTHROUGH */
				378	default:
				379	ordinary(p, c);
				380	break;
				381	}
				382
				383	if (!MORE())
				384	return;
				385	c = PEEK();
				386	/* we call { a repetition if followed by a digit */
				387	if (!( c == '*' \|\| c == '+' \|\| c == '?' \|\|
				388	(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
				389	return; /* no repetition, we're done */
				390	NEXT();
				391
				392	REQUIRE(!wascaret, REG_BADRPT);
				393	switch (c) {
				394	case '': / implemented as +? */
				395	/* this case does not require the (y\|) trick, noKLUDGE */
				396	INSERT(OPLUS_, pos);
				397	ASTERN(O_PLUS, pos);
				398	INSERT(OQUEST_, pos);
				399	ASTERN(O_QUEST, pos);
				400	break;
				401	case '+':
				402	INSERT(OPLUS_, pos);
				403	ASTERN(O_PLUS, pos);
				404	break;
				405	case '?':
				406	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				407	INSERT(OCH_, pos); /* offset slightly wrong */
				408	ASTERN(OOR1, pos); /* this one's right */
				409	AHEAD(pos); /* fix the OCH_ */
				410	EMIT(OOR2, 0); /* offset very wrong... */
				411	AHEAD(THERE()); /* ...so fix it */
				412	ASTERN(O_CH, THERETHERE());
				413	break;
				414	case '{':
				415	count = p_count(p);
				416	if (EAT(',')) {
				417	if (isdigit((uch)PEEK())) {
				418	count2 = p_count(p);
				419	REQUIRE(count <= count2, REG_BADBR);
				420	} else /* single number with comma */
				421	count2 = INFINITY;
				422	} else /* just a single number */
				423	count2 = count;
				424	repeat(p, pos, count, count2);
				425	if (!EAT('}')) { /* error heuristics */
				426	while (MORE() && PEEK() != '}')
				427	NEXT();
				428	REQUIRE(MORE(), REG_EBRACE);
				429	SETERROR(REG_BADBR);
				430	}
				431	break;
				432	}
				433
				434	if (!MORE())
				435	return;
				436	c = PEEK();
				437	if (!( c == '*' \|\| c == '+' \|\| c == '?' \|\|
				438	(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
				439	return;
				440	SETERROR(REG_BADRPT);
				441	}
				442
				443	/*
				444	- p_str - string (no metacharacters) "parser"
				445	*/
				446	static void
				447	p_str(struct parse *p)
				448	{
				449	REQUIRE(MORE(), REG_EMPTY);
				450	while (MORE())
				451	ordinary(p, GETNEXT());
				452	}
				453
				454	/*
				455	- p_bre - BRE parser top level, anchoring and concatenation
				456	* Giving end1 as OUT essentially eliminates the end1/end2 check.
				457	*
				458	* This implementation is a bit of a kludge, in that a trailing $ is first
				459	* taken as an ordinary character and then revised to be an anchor. The
				460	* only undesirable side effect is that '$' gets included as a character
				461	* category in such cases. This is fairly harmless; not worth fixing.
				462	* The amount of lookahead needed to avoid this kludge is excessive.
				463	*/
				464	static void
				465	p_bre(struct parse *p,
				466	int end1, /* first terminating character */
				467	int end2) /* second terminating character */
				468	{
				469	sopno start = HERE();
				470	int first = 1; /* first subexpression? */
				471	int wasdollar = 0;
				472
				473	if (EAT('^')) {
				474	EMIT(OBOL, 0);
				475	p->g->iflags \|= USEBOL;
				476	p->g->nbol++;
				477	}
				478	while (MORE() && !SEETWO(end1, end2)) {
				479	wasdollar = p_simp_re(p, first);
				480	first = 0;
				481	}
				482	if (wasdollar) { /* oops, that was a trailing anchor */
				483	DROP(1);
				484	EMIT(OEOL, 0);
				485	p->g->iflags \|= USEEOL;
				486	p->g->neol++;
				487	}
				488
				489	REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
				490	}
				491
				492	/*
				493	- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
				494	*/
				495	static int /* was the simple RE an unbackslashed $? */
				496	p_simp_re(struct parse *p,
				497	int starordinary) /* is a leading * an ordinary character? */
				498	{
				499	int c;
				500	int count;
				501	int count2;
				502	sopno pos;
				503	int i;
				504	sopno subno;
				505	# define BACKSL (1<<CHAR_BIT)
				506
				507	pos = HERE(); /* repetion op, if any, covers from here */
				508
				509	assert(MORE()); /* caller should have ensured this */
				510	c = GETNEXT();
				511	if (c == '\\') {
				512	REQUIRE(MORE(), REG_EESCAPE);
				513	c = BACKSL \| GETNEXT();
				514	}
				515	switch (c) {
				516	case '.':
				517	if (p->g->cflags&REG_NEWLINE)
				518	nonnewline(p);
				519	else
				520	EMIT(OANY, 0);
				521	break;
				522	case '[':
				523	p_bracket(p);
				524	break;
				525	case BACKSL\|'{':
				526	SETERROR(REG_BADRPT);
				527	break;
				528	case BACKSL\|'(':
				529	p->g->nsub++;
				530	subno = p->g->nsub;
				531	if (subno < NPAREN)
				532	p->pbegin[subno] = HERE();
				533	EMIT(OLPAREN, subno);
				534	/* the MORE here is an error heuristic */
				535	if (MORE() && !SEETWO('\\', ')'))
				536	p_bre(p, '\\', ')');
				537	if (subno < NPAREN) {
				538	p->pend[subno] = HERE();
				539	assert(p->pend[subno] != 0);
				540	}
				541	EMIT(ORPAREN, subno);
				542	REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
				543	break;
				544	case BACKSL\|')': /* should not get here -- must be user */
				545	case BACKSL\|'}':
				546	SETERROR(REG_EPAREN);
				547	break;
				548	case BACKSL\|'1':
				549	case BACKSL\|'2':
				550	case BACKSL\|'3':
				551	case BACKSL\|'4':
				552	case BACKSL\|'5':
				553	case BACKSL\|'6':
				554	case BACKSL\|'7':
				555	case BACKSL\|'8':
				556	case BACKSL\|'9':
				557	i = (c&~BACKSL) - '0';
				558	assert(i < NPAREN);
				559	if (p->pend[i] != 0) {
				560	assert(i <= p->g->nsub);
				561	EMIT(OBACK_, i);
				562	assert(p->pbegin[i] != 0);
				563	assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
				564	assert(OP(p->strip[p->pend[i]]) == ORPAREN);
				565	(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
				566	EMIT(O_BACK, i);
				567	} else
				568	SETERROR(REG_ESUBREG);
				569	p->g->backrefs = 1;
				570	break;
				571	case '*':
				572	REQUIRE(starordinary, REG_BADRPT);
				573	/* FALLTHROUGH */
				574	default:
				575	ordinary(p, (char)c);
				576	break;
				577	}
				578
				579	if (EAT('')) { / implemented as +? */
				580	/* this case does not require the (y\|) trick, noKLUDGE */
				581	INSERT(OPLUS_, pos);
				582	ASTERN(O_PLUS, pos);
				583	INSERT(OQUEST_, pos);
				584	ASTERN(O_QUEST, pos);
				585	} else if (EATTWO('\\', '{')) {
				586	count = p_count(p);
				587	if (EAT(',')) {
				588	if (MORE() && isdigit((uch)PEEK())) {
				589	count2 = p_count(p);
				590	REQUIRE(count <= count2, REG_BADBR);
				591	} else /* single number with comma */
				592	count2 = INFINITY;
				593	} else /* just a single number */
				594	count2 = count;
				595	repeat(p, pos, count, count2);
				596	if (!EATTWO('\\', '}')) { /* error heuristics */
				597	while (MORE() && !SEETWO('\\', '}'))
				598	NEXT();
				599	REQUIRE(MORE(), REG_EBRACE);
				600	SETERROR(REG_BADBR);
				601	}
				602	} else if (c == '$') /* $ (but not \$) ends it */
				603	return(1);
				604
				605	return(0);
				606	}
				607
				608	/*
				609	- p_count - parse a repetition count
				610	*/
				611	static int /* the value */
				612	p_count(struct parse *p)
				613	{
				614	int count = 0;
				615	int ndigits = 0;
				616
				617	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
				618	count = count*10 + (GETNEXT() - '0');
				619	ndigits++;
				620	}
				621
				622	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
				623	return(count);
				624	}
				625
				626	/*
				627	- p_bracket - parse a bracketed character list
				628	*
				629	* Note a significant property of this code: if the allocset() did SETERROR,
				630	* no set operations are done.
				631	*/
				632	static void
				633	p_bracket(struct parse *p)
				634	{
				635	cset *cs;
				636	int invert = 0;
				637
				638	/* Dept of Truly Sickening Special-Case Kludges */
				639	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
				640	EMIT(OBOW, 0);
				641	NEXTn(6);
				642	return;
				643	}
				644	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
				645	EMIT(OEOW, 0);
				646	NEXTn(6);
				647	return;
				648	}
				649
				650	if ((cs = allocset(p)) == NULL) {
				651	/* allocset did set error status in p */
				652	return;
				653	}
				654
				655	if (EAT('^'))
				656	invert++; /* make note to invert set at end */
				657	if (EAT(']'))
				658	CHadd(cs, ']');
				659	else if (EAT('-'))
				660	CHadd(cs, '-');
				661	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
				662	p_b_term(p, cs);
				663	if (EAT('-'))
				664	CHadd(cs, '-');
				665	MUSTEAT(']', REG_EBRACK);
				666
				667	if (p->error != 0) { /* don't mess things up further */
				668	freeset(p, cs);
				669	return;
				670	}
				671
				672	if (p->g->cflags&REG_ICASE) {
				673	int i;
				674	int ci;
				675
				676	for (i = p->g->csetsize - 1; i >= 0; i--)
				677	if (CHIN(cs, i) && isalpha(i)) {
				678	ci = othercase(i);
				679	if (ci != i)
				680	CHadd(cs, ci);
				681	}
				682	if (cs->multis != NULL)
				683	mccase(p, cs);
				684	}
				685	if (invert) {
				686	int i;
				687
				688	for (i = p->g->csetsize - 1; i >= 0; i--)
				689	if (CHIN(cs, i))
				690	CHsub(cs, i);
				691	else
				692	CHadd(cs, i);
				693	if (p->g->cflags&REG_NEWLINE)
				694	CHsub(cs, '\n');
				695	if (cs->multis != NULL)
				696	mcinvert(p, cs);
				697	}
				698
				699	assert(cs->multis == NULL); /* xxx */
				700
				701	if (nch(p, cs) == 1) { /* optimize singleton sets */
				702	ordinary(p, firstch(p, cs));
				703	freeset(p, cs);
				704	} else
				705	EMIT(OANYOF, freezeset(p, cs));
				706	}
				707
				708	/*
				709	- p_b_term - parse one term of a bracketed character list
				710	*/
				711	static void
				712	p_b_term(struct parse p, cset cs)
				713	{
				714	char c;
				715	char start, finish;
				716	int i;
				717
				718	/* classify what we've got */
				719	switch ((MORE()) ? PEEK() : '\0') {
				720	case '[':
				721	c = (MORE2()) ? PEEK2() : '\0';
				722	break;
				723	case '-':
				724	SETERROR(REG_ERANGE);
				725	return; /* NOTE RETURN */
				726	break;
				727	default:
				728	c = '\0';
				729	break;
				730	}
				731
				732	switch (c) {
				733	case ':': /* character class */
				734	NEXT2();
				735	REQUIRE(MORE(), REG_EBRACK);
				736	c = PEEK();
				737	REQUIRE(c != '-' && c != ']', REG_ECTYPE);
				738	p_b_cclass(p, cs);
				739	REQUIRE(MORE(), REG_EBRACK);
				740	REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
				741	break;
				742	case '=': /* equivalence class */
				743	NEXT2();
				744	REQUIRE(MORE(), REG_EBRACK);
				745	c = PEEK();
				746	REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
				747	p_b_eclass(p, cs);
				748	REQUIRE(MORE(), REG_EBRACK);
				749	REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
				750	break;
				751	default: /* symbol, ordinary character, or range */
				752	/* xxx revision needed for multichar stuff */
				753	start = p_b_symbol(p);
				754	if (SEE('-') && MORE2() && PEEK2() != ']') {
				755	/* range */
				756	NEXT();
				757	if (EAT('-'))
				758	finish = '-';
				759	else
				760	finish = p_b_symbol(p);
				761	} else
				762	finish = start;
				763	/* xxx what about signed chars here... */
				764	REQUIRE(start <= finish, REG_ERANGE);
				765	for (i = start; i <= finish; i++)
				766	CHadd(cs, i);
				767	break;
				768	}
				769	}
				770
				771	/*
				772	- p_b_cclass - parse a character-class name and deal with it
				773	*/
				774	static void
				775	p_b_cclass(struct parse p, cset cs)
				776	{
				777	char *sp = p->next;
				778	struct cclass *cp;
				779	size_t len;
				780	const char *u;
				781	char c;
				782
				783	while (MORE() && isalpha(PEEK()))
				784	NEXT();
				785	len = p->next - sp;
				786	for (cp = cclasses; cp->name != NULL; cp++)
				787	if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
				788	break;
				789	if (cp->name == NULL) {
				790	/* oops, didn't find it */
				791	SETERROR(REG_ECTYPE);
				792	return;
				793	}
				794
				795	u = cp->chars;
				796	while ((c = *u++) != '\0')
				797	CHadd(cs, c);
				798	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
				799	MCadd(p, cs, u);
				800	}
				801
				802	/*
				803	- p_b_eclass - parse an equivalence-class name and deal with it
				804	*
				805	* This implementation is incomplete. xxx
				806	*/
				807	static void
				808	p_b_eclass(struct parse p, cset cs)
				809	{
				810	char c;
				811
				812	c = p_b_coll_elem(p, '=');
				813	CHadd(cs, c);
				814	}
				815
				816	/*
				817	- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
				818	*/
				819	static char /* value of symbol */
				820	p_b_symbol(struct parse *p)
				821	{
				822	char value;
				823
				824	REQUIRE(MORE(), REG_EBRACK);
				825	if (!EATTWO('[', '.'))
				826	return(GETNEXT());
				827
				828	/* collating symbol */
				829	value = p_b_coll_elem(p, '.');
				830	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
				831	return(value);
				832	}
				833
				834	/*
				835	- p_b_coll_elem - parse a collating-element name and look it up
				836	*/
				837	static char /* value of collating element */
				838	p_b_coll_elem(struct parse *p,
				839	int endc) /* name ended by endc,']' */
				840	{
				841	char *sp = p->next;
				842	struct cname *cp;
				843	int len;
				844
				845	while (MORE() && !SEETWO(endc, ']'))
				846	NEXT();
				847	if (!MORE()) {
				848	SETERROR(REG_EBRACK);
				849	return(0);
				850	}
				851	len = p->next - sp;
				852	for (cp = cnames; cp->name != NULL; cp++)
				853	if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
				854	return(cp->code); /* known name */
				855	if (len == 1)
				856	return(sp); / single character */
				857	SETERROR(REG_ECOLLATE); /* neither */
				858	return(0);
				859	}
				860
				861	/*
				862	- othercase - return the case counterpart of an alphabetic
				863	*/
				864	static char /* if no counterpart, return ch */
				865	othercase(int ch)
				866	{
				867	ch = (uch)ch;
				868	assert(isalpha(ch));
				869	if (isupper(ch))
				870	return ((uch)tolower(ch));
				871	else if (islower(ch))
				872	return ((uch)toupper(ch));
				873	else /* peculiar, but could happen */
				874	return(ch);
				875	}
				876
				877	/*
				878	- bothcases - emit a dualcase version of a two-case character
				879	*
				880	* Boy, is this implementation ever a kludge...
				881	*/
				882	static void
				883	bothcases(struct parse *p, int ch)
				884	{
				885	char *oldnext = p->next;
				886	char *oldend = p->end;
				887	char bracket[3];
				888
				889	ch = (uch)ch;
				890	assert(othercase(ch) != ch); /* p_bracket() would recurse */
				891	p->next = bracket;
				892	p->end = bracket+2;
				893	bracket[0] = ch;
				894	bracket[1] = ']';
				895	bracket[2] = '\0';
				896	p_bracket(p);
				897	assert(p->next == bracket+2);
				898	p->next = oldnext;
				899	p->end = oldend;
				900	}
				901
				902	/*
				903	- ordinary - emit an ordinary character
				904	*/
				905	static void
				906	ordinary(struct parse *p, int ch)
				907	{
				908	cat_t *cap = p->g->categories;
				909
				910	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
				911	bothcases(p, ch);
				912	else {
				913	EMIT(OCHAR, (uch)ch);
				914	if (cap[ch] == 0)
				915	cap[ch] = p->g->ncategories++;
				916	}
				917	}
				918
				919	/*
				920	- nonnewline - emit REG_NEWLINE version of OANY
				921	*
				922	* Boy, is this implementation ever a kludge...
				923	*/
				924	static void
				925	nonnewline(struct parse *p)
				926	{
				927	char *oldnext = p->next;
				928	char *oldend = p->end;
				929	char bracket[4];
				930
				931	p->next = bracket;
				932	p->end = bracket+3;
				933	bracket[0] = '^';
				934	bracket[1] = '\n';
				935	bracket[2] = ']';
				936	bracket[3] = '\0';
				937	p_bracket(p);
				938	assert(p->next == bracket+3);
				939	p->next = oldnext;
				940	p->end = oldend;
				941	}
				942
				943	/*
				944	- repeat - generate code for a bounded repetition, recursively if needed
				945	*/
				946	static void
				947	repeat(struct parse *p,
				948	sopno start, /* operand from here to end of strip */
				949	int from, /* repeated from this number */
				950	int to) /* to this number of times (maybe INFINITY) */
				951	{
				952	sopno finish = HERE();
				953	# define N 2
				954	# define INF 3
				955	# define REP(f, t) ((f)*8 + (t))
				956	# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
				957	sopno copy;
				958
				959	if (p->error != 0) /* head off possible runaway recursion */
				960	return;
				961
				962	assert(from <= to);
				963
				964	switch (REP(MAP(from), MAP(to))) {
				965	case REP(0, 0): /* must be user doing this */
				966	DROP(finish-start); /* drop the operand */
				967	break;
				968	case REP(0, 1): /* as x{1,1}? */
				969	case REP(0, N): /* as x{1,n}? */
				970	case REP(0, INF): /* as x{1,}? */
				971	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				972	INSERT(OCH_, start); /* offset is wrong... */
				973	repeat(p, start+1, 1, to);
				974	ASTERN(OOR1, start);
				975	AHEAD(start); /* ... fix it */
				976	EMIT(OOR2, 0);
				977	AHEAD(THERE());
				978	ASTERN(O_CH, THERETHERE());
				979	break;
				980	case REP(1, 1): /* trivial case */
				981	/* done */
				982	break;
				983	case REP(1, N): /* as x?x{1,n-1} */
				984	/* KLUDGE: emit y? as (y\|) until subtle bug gets fixed */
				985	INSERT(OCH_, start);
				986	ASTERN(OOR1, start);
				987	AHEAD(start);
				988	EMIT(OOR2, 0); /* offset very wrong... */
				989	AHEAD(THERE()); /* ...so fix it */
				990	ASTERN(O_CH, THERETHERE());
				991	copy = dupl(p, start+1, finish+1);
				992	assert(copy == finish+4);
				993	repeat(p, copy, 1, to-1);
				994	break;
				995	case REP(1, INF): /* as x+ */
				996	INSERT(OPLUS_, start);
				997	ASTERN(O_PLUS, start);
				998	break;
				999	case REP(N, N): /* as xx{m-1,n-1} */
				1000	copy = dupl(p, start, finish);
				1001	repeat(p, copy, from-1, to-1);
				1002	break;
				1003	case REP(N, INF): /* as xx{n-1,INF} */
				1004	copy = dupl(p, start, finish);
				1005	repeat(p, copy, from-1, to);
				1006	break;
				1007	default: /* "can't happen" */
				1008	SETERROR(REG_ASSERT); /* just in case */
				1009	break;
				1010	}
				1011	}
				1012
				1013	/*
				1014	- seterr - set an error condition
				1015	*/
				1016	static int /* useless but makes type checking happy */
				1017	seterr(struct parse *p, int e)
				1018	{
				1019	if (p->error == 0) /* keep earliest error condition */
				1020	p->error = e;
				1021	p->next = nuls; /* try to bring things to a halt */
				1022	p->end = nuls;
				1023	return(0); /* make the return value well-defined */
				1024	}
				1025
				1026	/*
				1027	- allocset - allocate a set of characters for []
				1028	*/
				1029	static cset *
				1030	allocset(struct parse *p)
				1031	{
				1032	int no = p->g->ncsets++;
				1033	size_t nc;
				1034	size_t nbytes;
				1035	cset *cs;
				1036	size_t css = (size_t)p->g->csetsize;
				1037	int i;
				1038
				1039	if (no >= p->ncsalloc) { /* need another column of space */
				1040	void *ptr;
				1041
				1042	p->ncsalloc += CHAR_BIT;
				1043	nc = p->ncsalloc;
				1044	assert(nc % CHAR_BIT == 0);
				1045	nbytes = nc / CHAR_BIT * css;
				1046
				1047	ptr = (cset )realloc((char )p->g->sets, nc * sizeof(cset));
				1048	if (ptr == NULL)
				1049	goto nomem;
				1050	p->g->sets = ptr;
				1051
				1052	ptr = (uch )realloc((char )p->g->setbits, nbytes);
				1053	if (ptr == NULL)
				1054	goto nomem;
				1055	p->g->setbits = ptr;
				1056
				1057	for (i = 0; i < no; i++)
				1058	p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
				1059
				1060	(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
				1061	}
				1062	/* XXX should not happen */
				1063	if (p->g->sets == NULL \|\| p->g->setbits == NULL)
				1064	goto nomem;
				1065
				1066	cs = &p->g->sets[no];
				1067	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
				1068	cs->mask = 1 << ((no) % CHAR_BIT);
				1069	cs->hash = 0;
				1070	cs->smultis = 0;
				1071	cs->multis = NULL;
				1072
				1073	return(cs);
				1074	nomem:
				1075	free(p->g->sets);
				1076	p->g->sets = NULL;
				1077	free(p->g->setbits);
				1078	p->g->setbits = NULL;
				1079
				1080	SETERROR(REG_ESPACE);
				1081	/* caller's responsibility not to do set ops */
				1082	return(NULL);
				1083	}
				1084
				1085	/*
				1086	- freeset - free a now-unused set
				1087	*/
				1088	static void
				1089	freeset(struct parse p, cset cs)
				1090	{
				1091	size_t i;
				1092	cset *top = &p->g->sets[p->g->ncsets];
				1093	size_t css = (size_t)p->g->csetsize;
				1094
				1095	for (i = 0; i < css; i++)
				1096	CHsub(cs, i);
				1097	if (cs == top-1) /* recover only the easy case */
				1098	p->g->ncsets--;
				1099	}
				1100
				1101	/*
				1102	- freezeset - final processing on a set of characters
				1103	*
				1104	* The main task here is merging identical sets. This is usually a waste
				1105	* of time (although the hash code minimizes the overhead), but can win
				1106	* big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
				1107	* is done using addition rather than xor -- all ASCII [aA] sets xor to
				1108	* the same value!
				1109	*/
				1110	static int /* set number */
				1111	freezeset(struct parse p, cset cs)
				1112	{
				1113	uch h = cs->hash;
				1114	size_t i;
				1115	cset *top = &p->g->sets[p->g->ncsets];
				1116	cset *cs2;
				1117	size_t css = (size_t)p->g->csetsize;
				1118
				1119	/* look for an earlier one which is the same */
				1120	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
				1121	if (cs2->hash == h && cs2 != cs) {
				1122	/* maybe */
				1123	for (i = 0; i < css; i++)
				1124	if (!!CHIN(cs2, i) != !!CHIN(cs, i))
				1125	break; /* no */
				1126	if (i == css)
				1127	break; /* yes */
				1128	}
				1129
				1130	if (cs2 < top) { /* found one */
				1131	freeset(p, cs);
				1132	cs = cs2;
				1133	}
				1134
				1135	return((int)(cs - p->g->sets));
				1136	}
				1137
				1138	/*
				1139	- firstch - return first character in a set (which must have at least one)
				1140	*/
				1141	static int /* character; there is no "none" value */
				1142	firstch(struct parse p, cset cs)
				1143	{
				1144	size_t i;
				1145	size_t css = (size_t)p->g->csetsize;
				1146
				1147	for (i = 0; i < css; i++)
				1148	if (CHIN(cs, i))
				1149	return((char)i);
				1150	assert(never);
				1151	return(0); /* arbitrary */
				1152	}
				1153
				1154	/*
				1155	- nch - number of characters in a set
				1156	*/
				1157	static int
				1158	nch(struct parse p, cset cs)
				1159	{
				1160	size_t i;
				1161	size_t css = (size_t)p->g->csetsize;
				1162	int n = 0;
				1163
				1164	for (i = 0; i < css; i++)
				1165	if (CHIN(cs, i))
				1166	n++;
				1167	return(n);
				1168	}
				1169
				1170	/*
				1171	- mcadd - add a collating element to a cset
				1172	*/
				1173	static void
				1174	mcadd( struct parse p, cset cs, const char *cp)
				1175	{
				1176	size_t oldend = cs->smultis;
				1177	void *np;
				1178
				1179	cs->smultis += strlen(cp) + 1;
				1180	np = realloc(cs->multis, cs->smultis);
				1181	if (np == NULL) {
				1182	if (cs->multis)
				1183	free(cs->multis);
				1184	cs->multis = NULL;
				1185	SETERROR(REG_ESPACE);
				1186	return;
				1187	}
				1188	cs->multis = np;
				1189
				1190	llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
				1191	}
				1192
				1193	/*
				1194	- mcinvert - invert the list of collating elements in a cset
				1195	*
				1196	* This would have to know the set of possibilities. Implementation
				1197	* is deferred.
				1198	*/
				1199	/* ARGSUSED */
				1200	static void
				1201	mcinvert(struct parse p, cset cs)
				1202	{
				1203	assert(cs->multis == NULL); /* xxx */
				1204	}
				1205
				1206	/*
				1207	- mccase - add case counterparts of the list of collating elements in a cset
				1208	*
				1209	* This would have to know the set of possibilities. Implementation
				1210	* is deferred.
				1211	*/
				1212	/* ARGSUSED */
				1213	static void
				1214	mccase(struct parse p, cset cs)
				1215	{
				1216	assert(cs->multis == NULL); /* xxx */
				1217	}
				1218
				1219	/*
				1220	- isinsets - is this character in any sets?
				1221	*/
				1222	static int /* predicate */
				1223	isinsets(struct re_guts *g, int c)
				1224	{
				1225	uch *col;
				1226	int i;
				1227	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
				1228	unsigned uc = (uch)c;
				1229
				1230	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
				1231	if (col[uc] != 0)
				1232	return(1);
				1233	return(0);
				1234	}
				1235
				1236	/*
				1237	- samesets - are these two characters in exactly the same sets?
				1238	*/
				1239	static int /* predicate */
				1240	samesets(struct re_guts *g, int c1, int c2)
				1241	{
				1242	uch *col;
				1243	int i;
				1244	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
				1245	unsigned uc1 = (uch)c1;
				1246	unsigned uc2 = (uch)c2;
				1247
				1248	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
				1249	if (col[uc1] != col[uc2])
				1250	return(0);
				1251	return(1);
				1252	}
				1253
				1254	/*
				1255	- categorize - sort out character categories
				1256	*/
				1257	static void
				1258	categorize(struct parse p, struct re_guts g)
				1259	{
				1260	cat_t *cats = g->categories;
				1261	int c;
				1262	int c2;
				1263	cat_t cat;
				1264
				1265	/* avoid making error situations worse */
				1266	if (p->error != 0)
				1267	return;
				1268
				1269	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
				1270	if (cats[c] == 0 && isinsets(g, c)) {
				1271	cat = g->ncategories++;
				1272	cats[c] = cat;
				1273	for (c2 = c+1; c2 <= CHAR_MAX; c2++)
				1274	if (cats[c2] == 0 && samesets(g, c, c2))
				1275	cats[c2] = cat;
				1276	}
				1277	}
				1278
				1279	/*
				1280	- dupl - emit a duplicate of a bunch of sops
				1281	*/
				1282	static sopno /* start of duplicate */
				1283	dupl(struct parse *p,
				1284	sopno start, /* from here */
				1285	sopno finish) /* to this less one */
				1286	{
				1287	sopno ret = HERE();
				1288	sopno len = finish - start;
				1289
				1290	assert(finish >= start);
				1291	if (len == 0)
				1292	return(ret);
				1293	enlarge(p, p->ssize + len); /* this many unexpected additions */
				1294	assert(p->ssize >= p->slen + len);
				1295	(void) memmove((char *)(p->strip + p->slen),
				1296	(char )(p->strip + start), (size_t)lensizeof(sop));
				1297	p->slen += len;
				1298	return(ret);
				1299	}
				1300
				1301	/*
				1302	- doemit - emit a strip operator
				1303	*
				1304	* It might seem better to implement this as a macro with a function as
				1305	* hard-case backup, but it's just too big and messy unless there are
				1306	* some changes to the data structures. Maybe later.
				1307	*/
				1308	static void
				1309	doemit(struct parse *p, sop op, size_t opnd)
				1310	{
				1311	/* avoid making error situations worse */
				1312	if (p->error != 0)
				1313	return;
				1314
				1315	/* deal with oversize operands ("can't happen", more or less) */
				1316	assert(opnd < 1<<OPSHIFT);
				1317
				1318	/* deal with undersized strip */
				1319	if (p->slen >= p->ssize)
				1320	enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
				1321	assert(p->slen < p->ssize);
				1322
				1323	/* finally, it's all reduced to the easy case */
				1324	p->strip[p->slen++] = SOP(op, opnd);
				1325	}
				1326
				1327	/*
				1328	- doinsert - insert a sop into the strip
				1329	*/
				1330	static void
				1331	doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
				1332	{
				1333	sopno sn;
				1334	sop s;
				1335	int i;
				1336
				1337	/* avoid making error situations worse */
				1338	if (p->error != 0)
				1339	return;
				1340
				1341	sn = HERE();
				1342	EMIT(op, opnd); /* do checks, ensure space */
				1343	assert(HERE() == sn+1);
				1344	s = p->strip[sn];
				1345
				1346	/* adjust paren pointers */
				1347	assert(pos > 0);
				1348	for (i = 1; i < NPAREN; i++) {
				1349	if (p->pbegin[i] >= pos) {
				1350	p->pbegin[i]++;
				1351	}
				1352	if (p->pend[i] >= pos) {
				1353	p->pend[i]++;
				1354	}
				1355	}
				1356
				1357	memmove((char )&p->strip[pos+1], (char )&p->strip[pos],
				1358	(HERE()-pos-1)*sizeof(sop));
				1359	p->strip[pos] = s;
				1360	}
				1361
				1362	/*
				1363	- dofwd - complete a forward reference
				1364	*/
				1365	static void
				1366	dofwd(struct parse *p, sopno pos, sop value)
				1367	{
				1368	/* avoid making error situations worse */
				1369	if (p->error != 0)
				1370	return;
				1371
				1372	assert(value < 1<<OPSHIFT);
				1373	p->strip[pos] = OP(p->strip[pos]) \| value;
				1374	}
				1375
				1376	/*
				1377	- enlarge - enlarge the strip
				1378	*/
				1379	static void
				1380	enlarge(struct parse *p, sopno size)
				1381	{
				1382	sop *sp;
				1383
				1384	if (p->ssize >= size)
				1385	return;
				1386
				1387	sp = (sop )realloc(p->strip, sizesizeof(sop));
				1388	if (sp == NULL) {
				1389	SETERROR(REG_ESPACE);
				1390	return;
				1391	}
				1392	p->strip = sp;
				1393	p->ssize = size;
				1394	}
				1395
				1396	/*
				1397	- stripsnug - compact the strip
				1398	*/
				1399	static void
				1400	stripsnug(struct parse p, struct re_guts g)
				1401	{
				1402	g->nstates = p->slen;
				1403	g->strip = (sop )realloc((char )p->strip, p->slen * sizeof(sop));
				1404	if (g->strip == NULL) {
				1405	SETERROR(REG_ESPACE);
				1406	g->strip = p->strip;
				1407	}
				1408	}
				1409
				1410	/*
				1411	- findmust - fill in must and mlen with longest mandatory literal string
				1412	*
				1413	* This algorithm could do fancy things like analyzing the operands of \|
				1414	* for common subsequences. Someday. This code is simple and finds most
				1415	* of the interesting cases.
				1416	*
				1417	* Note that must and mlen got initialized during setup.
				1418	*/
				1419	static void
				1420	findmust(struct parse p, struct re_guts g)
				1421	{
				1422	sop *scan;
Daniel Dunbar	bf75ffd	2009-08-30 21:13:58 +0000	[diff] [blame]	1423	sop start = 0; / start initialized in the default case, after that */
				1424	sop newstart = 0; / newstart was initialized in the OCHAR case */
Torok Edwin	e14d4cd	2009-08-30 08:24:09 +0000	[diff] [blame]	1425	sopno newlen;
				1426	sop s;
				1427	char *cp;
				1428	sopno i;
				1429
				1430	/* avoid making error situations worse */
				1431	if (p->error != 0)
				1432	return;
				1433
				1434	/* find the longest OCHAR sequence in strip */
				1435	newlen = 0;
				1436	scan = g->strip + 1;
				1437	do {
				1438	s = *scan++;
				1439	switch (OP(s)) {
				1440	case OCHAR: /* sequence member */
				1441	if (newlen == 0) /* new sequence */
				1442	newstart = scan - 1;
				1443	newlen++;
				1444	break;
				1445	case OPLUS_: /* things that don't break one */
				1446	case OLPAREN:
				1447	case ORPAREN:
				1448	break;
				1449	case OQUEST_: /* things that must be skipped */
				1450	case OCH_:
				1451	scan--;
				1452	do {
				1453	scan += OPND(s);
				1454	s = *scan;
				1455	/* assert() interferes w debug printouts */
				1456	if (OP(s) != O_QUEST && OP(s) != O_CH &&
				1457	OP(s) != OOR2) {
				1458	g->iflags \|= REGEX_BAD;
				1459	return;
				1460	}
				1461	} while (OP(s) != O_QUEST && OP(s) != O_CH);
				1462	/* fallthrough */
				1463	default: /* things that break a sequence */
				1464	if (newlen > g->mlen) { /* ends one */
				1465	start = newstart;
				1466	g->mlen = newlen;
				1467	}
				1468	newlen = 0;
				1469	break;
				1470	}
				1471	} while (OP(s) != OEND);
				1472
				1473	if (g->mlen == 0) /* there isn't one */
				1474	return;
				1475
				1476	/* turn it into a character string */
				1477	g->must = malloc((size_t)g->mlen + 1);
				1478	if (g->must == NULL) { /* argh; just forget it */
				1479	g->mlen = 0;
				1480	return;
				1481	}
				1482	cp = g->must;
				1483	scan = start;
				1484	for (i = g->mlen; i > 0; i--) {
				1485	while (OP(s = *scan++) != OCHAR)
				1486	continue;
				1487	assert(cp < g->must + g->mlen);
				1488	*cp++ = (char)OPND(s);
				1489	}
				1490	assert(cp == g->must + g->mlen);
				1491	cp++ = '\0'; / just on general principles */
				1492	}
				1493
				1494	/*
				1495	- pluscount - count + nesting
				1496	*/
				1497	static sopno /* nesting depth */
				1498	pluscount(struct parse p, struct re_guts g)
				1499	{
				1500	sop *scan;
				1501	sop s;
				1502	sopno plusnest = 0;
				1503	sopno maxnest = 0;
				1504
				1505	if (p->error != 0)
				1506	return(0); /* there may not be an OEND */
				1507
				1508	scan = g->strip + 1;
				1509	do {
				1510	s = *scan++;
				1511	switch (OP(s)) {
				1512	case OPLUS_:
				1513	plusnest++;
				1514	break;
				1515	case O_PLUS:
				1516	if (plusnest > maxnest)
				1517	maxnest = plusnest;
				1518	plusnest--;
				1519	break;
				1520	}
				1521	} while (OP(s) != OEND);
				1522	if (plusnest != 0)
				1523	g->iflags \|= REGEX_BAD;
				1524	return(maxnest);
				1525	}