I wrote:
> However, in a different line of thought, I realized that the
> memory allocation logic could use some polishing.  It gives out
> ten arcs per NFA state initially, and then adds ten more at a time.
> However, that's not very bright when you look at the actual usage
> patterns, because most states have only one or two out-arcs,
> but some have lots and lots.

Hold the phone ... after a bit I started to wonder why Spencer made
arc allocation be per-state at all, rather than using one big pool
of arcs.  Maybe there's some locality-of-reference argument to be
made for that, but I doubt he was worrying about that back in the
90s.  Besides, the regex compiler spends a lot of time iterating
over in-chains and color-chains, not just out-chains; it's hard
to see why trying to privilege the latter case would help much.

What I suspect, based on this old comment in regguts.h:
 * Having a "from" pointer within each arc may seem redundant, but it
 * saves a lot of hassle.
is that Henry did it like this initially to save having a "from"
pointer in each arc, and never re-thought the allocation mechanism
after he gave up on that idea.

So I rearranged things to allocate arcs out of a common pool, and for
good measure made the state allocation code do the same thing.  I was
pretty much blown away by the results: not only is the average-case
space usage about half what it is on HEAD, but the worst-case drops
by well more than a factor of ten.  I'd previously found, by raising
REG_MAX_COMPILE_SPACE, that the regexes in the second corpus that
trigger "regex too complex" errors all need 300 to 360 MB to compile
with our HEAD code.  With the new patch attached, they compile
successfully in a dozen or so MB.  (Yesterday's patch really did
nothing at all for these worst-case regexes, BTW.)

I also see about a 10% speedup overall, which I'm pretty sure is
down to needing fewer interactions with malloc() (this is partially
a function of having batched the state allocations, of course).
So even if there is a locality-of-reference loss, it's swamped by
fewer mallocs and less total space used.

                        regards, tom lane

diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 69929eddfc..df3351b3d0 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -59,7 +59,10 @@ newnfa(struct vars *v,
 
 	nfa->states = NULL;
 	nfa->slast = NULL;
-	nfa->free = NULL;
+	nfa->freestates = NULL;
+	nfa->freearcs = NULL;
+	nfa->lastsb = NULL;
+	nfa->lastab = NULL;
 	nfa->nstates = 0;
 	nfa->cm = cm;
 	nfa->v = v;
@@ -99,23 +102,27 @@ newnfa(struct vars *v,
 static void
 freenfa(struct nfa *nfa)
 {
-	struct state *s;
+	struct statebatch *sb;
+	struct statebatch *sbnext;
+	struct arcbatch *ab;
+	struct arcbatch *abnext;
 
-	while ((s = nfa->states) != NULL)
+	for (sb = nfa->lastsb; sb != NULL; sb = sbnext)
 	{
-		s->nins = s->nouts = 0; /* don't worry about arcs */
-		freestate(nfa, s);
+		sbnext = sb->next;
+		nfa->v->spaceused -= STATEBATCHSIZE(sb->nstates);
+		FREE(sb);
 	}
-	while ((s = nfa->free) != NULL)
+	nfa->lastsb = NULL;
+	for (ab = nfa->lastab; ab != NULL; ab = abnext)
 	{
-		nfa->free = s->next;
-		destroystate(nfa, s);
+		abnext = ab->next;
+		nfa->v->spaceused -= ARCBATCHSIZE(ab->narcs);
+		FREE(ab);
 	}
+	nfa->lastab = NULL;
 
-	nfa->slast = NULL;
 	nfa->nstates = -1;
-	nfa->pre = NULL;
-	nfa->post = NULL;
 	FREE(nfa);
 }
 
@@ -138,29 +145,45 @@ newstate(struct nfa *nfa)
 		return NULL;
 	}
 
-	if (nfa->free != NULL)
-	{
-		s = nfa->free;
-		nfa->free = s->next;
-	}
-	else
+	/* if none at hand, get more */
+	if (nfa->freestates == NULL)
 	{
+		struct statebatch *newSb;
+		size_t		nstates;
+		size_t		i;
+
 		if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
 		{
 			NERR(REG_ETOOBIG);
 			return NULL;
 		}
-		s = (struct state *) MALLOC(sizeof(struct state));
-		if (s == NULL)
+		nstates = (nfa->lastsb != NULL) ? nfa->lastsb->nstates * 2 : FIRSTSBSIZE;
+		if (nstates > MAXSBSIZE)
+			nstates = MAXSBSIZE;
+		newSb = (struct statebatch *) MALLOC(STATEBATCHSIZE(nstates));
+		if (newSb == NULL)
 		{
 			NERR(REG_ESPACE);
 			return NULL;
 		}
-		nfa->v->spaceused += sizeof(struct state);
-		s->oas.next = NULL;
-		s->free = NULL;
-		s->noas = 0;
+		nfa->v->spaceused += STATEBATCHSIZE(nstates);
+		newSb->nstates = nstates;
+		newSb->next = nfa->lastsb;
+		nfa->lastsb = newSb;
+
+		for (i = 0; i < nstates - 1; i++)
+		{
+			newSb->s[i].no = FREESTATE;
+			newSb->s[i].next = &newSb->s[i + 1];
+		}
+		newSb->s[i].no = FREESTATE;
+		newSb->s[i].next = NULL;
+		nfa->freestates = &newSb->s[0];
 	}
+	assert(nfa->freestates != NULL);
+
+	s = nfa->freestates;
+	nfa->freestates = s->next;
 
 	assert(nfa->nstates >= 0);
 	s->no = nfa->nstates++;
@@ -240,32 +263,8 @@ freestate(struct nfa *nfa,
 		nfa->states = s->next;
 	}
 	s->prev = NULL;
-	s->next = nfa->free;		/* don't delete it, put it on the free list */
-	nfa->free = s;
-}
-
-/*
- * destroystate - really get rid of an already-freed state
- */
-static void
-destroystate(struct nfa *nfa,
-			 struct state *s)
-{
-	struct arcbatch *ab;
-	struct arcbatch *abnext;
-
-	assert(s->no == FREESTATE);
-	for (ab = s->oas.next; ab != NULL; ab = abnext)
-	{
-		abnext = ab->next;
-		FREE(ab);
-		nfa->v->spaceused -= sizeof(struct arcbatch);
-	}
-	s->ins = NULL;
-	s->outs = NULL;
-	s->next = NULL;
-	FREE(s);
-	nfa->v->spaceused -= sizeof(struct state);
+	s->next = nfa->freestates;	/* don't delete it, put it on the free list */
+	nfa->freestates = s;
 }
 
 /*
@@ -334,8 +333,7 @@ createarc(struct nfa *nfa,
 {
 	struct arc *a;
 
-	/* the arc is physically allocated within its from-state */
-	a = allocarc(nfa, from);
+	a = allocarc(nfa);
 	if (NISERR())
 		return;
 	assert(a != NULL);
@@ -369,55 +367,52 @@ createarc(struct nfa *nfa,
 }
 
 /*
- * allocarc - allocate a new out-arc within a state
+ * allocarc - allocate a new arc within an NFA
  */
 static struct arc *				/* NULL for failure */
-allocarc(struct nfa *nfa,
-		 struct state *s)
+allocarc(struct nfa *nfa)
 {
 	struct arc *a;
 
-	/* shortcut */
-	if (s->free == NULL && s->noas < ABSIZE)
-	{
-		a = &s->oas.a[s->noas];
-		s->noas++;
-		return a;
-	}
-
 	/* if none at hand, get more */
-	if (s->free == NULL)
+	if (nfa->freearcs == NULL)
 	{
 		struct arcbatch *newAb;
-		int			i;
+		size_t		narcs;
+		size_t		i;
 
 		if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
 		{
 			NERR(REG_ETOOBIG);
 			return NULL;
 		}
-		newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
+		narcs = (nfa->lastab != NULL) ? nfa->lastab->narcs * 2 : FIRSTABSIZE;
+		if (narcs > MAXABSIZE)
+			narcs = MAXABSIZE;
+		newAb = (struct arcbatch *) MALLOC(ARCBATCHSIZE(narcs));
 		if (newAb == NULL)
 		{
 			NERR(REG_ESPACE);
 			return NULL;
 		}
-		nfa->v->spaceused += sizeof(struct arcbatch);
-		newAb->next = s->oas.next;
-		s->oas.next = newAb;
+		nfa->v->spaceused += ARCBATCHSIZE(narcs);
+		newAb->narcs = narcs;
+		newAb->next = nfa->lastab;
+		nfa->lastab = newAb;
 
-		for (i = 0; i < ABSIZE; i++)
+		for (i = 0; i < narcs - 1; i++)
 		{
 			newAb->a[i].type = 0;
 			newAb->a[i].freechain = &newAb->a[i + 1];
 		}
-		newAb->a[ABSIZE - 1].freechain = NULL;
-		s->free = &newAb->a[0];
+		newAb->a[i].type = 0;
+		newAb->a[i].freechain = NULL;
+		nfa->freearcs = &newAb->a[0];
 	}
-	assert(s->free != NULL);
+	assert(nfa->freearcs != NULL);
 
-	a = s->free;
-	s->free = a->freechain;
+	a = nfa->freearcs;
+	nfa->freearcs = a->freechain;
 	return a;
 }
 
@@ -478,7 +473,7 @@ freearc(struct nfa *nfa,
 	}
 	to->nins--;
 
-	/* clean up and place on from-state's free list */
+	/* clean up and place on NFA's free list */
 	victim->type = 0;
 	victim->from = NULL;		/* precautions... */
 	victim->to = NULL;
@@ -486,8 +481,8 @@ freearc(struct nfa *nfa,
 	victim->inchainRev = NULL;
 	victim->outchain = NULL;
 	victim->outchainRev = NULL;
-	victim->freechain = from->free;
-	from->free = victim;
+	victim->freechain = nfa->freearcs;
+	nfa->freearcs = victim;
 }
 
 /*
@@ -3413,7 +3408,6 @@ dumparc(struct arc *a,
 		FILE *f)
 {
 	struct arc *aa;
-	struct arcbatch *ab;
 
 	fprintf(f, "\t");
 	switch (a->type)
@@ -3451,16 +3445,11 @@ dumparc(struct arc *a,
 	}
 	if (a->from != s)
 		fprintf(f, "?%d?", a->from->no);
-	for (ab = &a->from->oas; ab != NULL; ab = ab->next)
-	{
-		for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
-			if (aa == a)
-				break;			/* NOTE BREAK OUT */
-		if (aa < &ab->a[ABSIZE])	/* propagate break */
+	for (aa = a->from->outs; aa != NULL; aa = aa->outchain)
+		if (aa == a)
 			break;				/* NOTE BREAK OUT */
-	}
-	if (ab == NULL)
-		fprintf(f, "?!?");		/* not in allocated space */
+	if (aa == NULL)
+		fprintf(f, "?!?");		/* missing from out-chain */
 	fprintf(f, "->");
 	if (a->to == NULL)
 	{
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index d3540fdd0f..a51555cfde 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -127,10 +127,9 @@ static struct state *newstate(struct nfa *);
 static struct state *newfstate(struct nfa *, int flag);
 static void dropstate(struct nfa *, struct state *);
 static void freestate(struct nfa *, struct state *);
-static void destroystate(struct nfa *, struct state *);
 static void newarc(struct nfa *, int, color, struct state *, struct state *);
 static void createarc(struct nfa *, int, color, struct state *, struct state *);
-static struct arc *allocarc(struct nfa *, struct state *);
+static struct arc *allocarc(struct nfa *);
 static void freearc(struct nfa *, struct arc *);
 static void changearctarget(struct arc *, struct state *);
 static int	hasnonemptyout(struct state *);
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 0e76a828f8..f3e7752171 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -284,9 +284,6 @@ struct cvec
 
 /*
  * definitions for NFA internal representation
- *
- * Having a "from" pointer within each arc may seem redundant, but it
- * saves a lot of hassle.
  */
 struct state;
 
@@ -294,7 +291,7 @@ struct arc
 {
 	int			type;			/* 0 if free, else an NFA arc type code */
 	color		co;				/* color the arc matches (possibly RAINBOW) */
-	struct state *from;			/* where it's from (and contained within) */
+	struct state *from;			/* where it's from */
 	struct state *to;			/* where it's to */
 	struct arc *outchain;		/* link in *from's outs chain or free chain */
 	struct arc *outchainRev;	/* back-link in *from's outs chain */
@@ -308,28 +305,39 @@ struct arc
 
 struct arcbatch
 {								/* for bulk allocation of arcs */
-	struct arcbatch *next;
-#define  ABSIZE  10
-	struct arc	a[ABSIZE];
+	struct arcbatch *next;		/* chain link */
+	size_t		narcs;			/* number of arcs allocated in this arcbatch */
+	struct arc	a[FLEXIBLE_ARRAY_MEMBER];
 };
+#define  ARCBATCHSIZE(n)  ((n) * sizeof(struct arc) + offsetof(struct arcbatch, a))
+#define  FIRSTABSIZE	64
+#define  MAXABSIZE		1024
 
 struct state
 {
-	int			no;
+	int			no;				/* state number, zero and up; or FREESTATE */
 #define  FREESTATE	 (-1)
 	char		flag;			/* marks special states */
 	int			nins;			/* number of inarcs */
-	struct arc *ins;			/* chain of inarcs */
 	int			nouts;			/* number of outarcs */
+	struct arc *ins;			/* chain of inarcs */
 	struct arc *outs;			/* chain of outarcs */
-	struct arc *free;			/* chain of free arcs */
 	struct state *tmp;			/* temporary for traversal algorithms */
-	struct state *next;			/* chain for traversing all */
-	struct state *prev;			/* back chain */
-	struct arcbatch oas;		/* first arcbatch, avoid malloc in easy case */
-	int			noas;			/* number of arcs used in first arcbatch */
+	struct state *next;			/* chain for traversing all live states */
+	/* the "next" field is also used to chain free states together */
+	struct state *prev;			/* back-link in chain of all live states */
 };
 
+struct statebatch
+{								/* for bulk allocation of states */
+	struct statebatch *next;	/* chain link */
+	size_t		nstates;		/* number of states allocated in this batch */
+	struct state s[FLEXIBLE_ARRAY_MEMBER];
+};
+#define  STATEBATCHSIZE(n)  ((n) * sizeof(struct state) + offsetof(struct statebatch, s))
+#define  FIRSTSBSIZE	32
+#define  MAXSBSIZE		1024
+
 struct nfa
 {
 	struct state *pre;			/* pre-initial state */
@@ -337,9 +345,12 @@ struct nfa
 	struct state *final;		/* final state */
 	struct state *post;			/* post-final state */
 	int			nstates;		/* for numbering states */
-	struct state *states;		/* state-chain header */
+	struct state *states;		/* chain of live states */
 	struct state *slast;		/* tail of the chain */
-	struct state *free;			/* free list */
+	struct state *freestates;	/* chain of free states */
+	struct arc *freearcs;		/* chain of free arcs */
+	struct statebatch *lastsb;	/* chain of statebatches */
+	struct arcbatch *lastab;	/* chain of arcbatches */
 	struct colormap *cm;		/* the color map */
 	color		bos[2];			/* colors, if any, assigned to BOS and BOL */
 	color		eos[2];			/* colors, if any, assigned to EOS and EOL */
@@ -387,7 +398,7 @@ struct cnfa
 {
 	int			nstates;		/* number of states */
 	int			ncolors;		/* number of colors (max color in use + 1) */
-	int			flags;
+	int			flags;			/* bitmask of the following flags: */
 #define  HASLACONS	01			/* uses lookaround constraints */
 #define  MATCHALL	02			/* matches all strings of a range of lengths */
 	int			pre;			/* setup state number */
@@ -422,10 +433,12 @@ struct cnfa
  * transient data is generally not large enough to notice compared to those.
  * Note that we do not charge anything for the final output data structures
  * (the compacted NFA and the colormap).
+ * The scaling here is based on an empirical measurement that very large
+ * NFAs tend to have about 4 arcs/state.
  */
 #ifndef REG_MAX_COMPILE_SPACE
 #define REG_MAX_COMPILE_SPACE  \
-	(100000 * sizeof(struct state) + 100000 * sizeof(struct arcbatch))
+	(500000 * (sizeof(struct state) + 4 * sizeof(struct arc)))
 #endif
 
 /*

Reply via email to