blob: a085c33e3f71adffd52945cc768daf0e836834fc [file] [log] [blame]
/*
* minilex.c
*
* High efficiency lexical state parser
*
* Copyright (C)2011-2013 Andy Green <andy@warmcat.com>
*
* Licensed under LGPL2
*
* Usage: gcc minilex.c -o minilex && ./minilex > lextable.h
*
* Run it twice to test parsing on the generated table on stderr
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* set of parsable strings */
const char *set[] = {
"GET ",
"Host:",
"Connection:",
"Sec-WebSocket-Key1:",
"Sec-WebSocket-Key2:",
"Sec-WebSocket-Protocol:",
"Upgrade:",
"Origin:",
"Sec-WebSocket-Draft:",
"\x0d\x0a",
"Sec-WebSocket-Key:",
"Sec-WebSocket-Version:",
"Sec-WebSocket-Origin:",
"Sec-WebSocket-Extensions:",
"Sec-WebSocket-Accept:",
"Sec-WebSocket-Nonce:",
"HTTP/1.1 ",
"Accept:",
"If-Modified-Since:",
"Accept-Encoding:",
"Accept-Language:",
"Pragma:",
"Cache-Control:",
"Authorization:",
"Cookie:",
"Content-Type:",
"Date:",
"Range:",
"Referer:"
};
unsigned char lextable[] = {
#include "lextable.h"
};
#define PARALLEL 30
struct state {
char c[PARALLEL];
int state[PARALLEL];
int count;
int bytepos;
};
struct state state[1000];
int next = 1;
int lextable_decode(int pos, char c)
{
while (1) {
if (!lextable[pos + 1]) /* terminal marker */
return pos;
if ((lextable[pos] & 0x7f) == c) /* goto */
return pos + (lextable[pos + 1] << 1);
if (lextable[pos] & 0x80) /* fail */
return -1;
pos += 2;
}
}
int main(void)
{
int n = 0;
int m = 0;
int prev;
char c;
int walk;
int saw;
int y;
while (n < sizeof(set) / sizeof(set[0])) {
m = 0;
walk = 0;
prev = 0;
while (set[n][m]) {
saw = 0;
for (y = 0; y < state[walk].count; y++)
if (state[walk].c[y] == set[n][m]) {
/* exists -- go forward */
walk = state[walk].state[y];
saw = 1;
break;
}
if (saw)
goto again;
/* something we didn't see before */
state[walk].c[state[walk].count] = set[n][m];
state[walk].state[state[walk].count] = next;
state[walk].count++;
walk = next++;
again:
m++;
}
state[walk].c[0] = n++;
state[walk].state[0] = 0; /* terminal marker */
state[walk].count = 1;
}
walk = 0;
for (n = 0; n < next; n++) {
state[n].bytepos = walk;
walk += (2 * state[n].count);
}
walk = 0;
for (n = 0; n < next; n++) {
for (m = 0; m < state[n].count; m++) {
if (!m)
fprintf(stdout, "/* pos %3d: state %3d */ ",
walk, n);
else
fprintf(stdout, " ");
y = state[n].c[m];
saw = state[n].state[m];
if (m == state[n].count - 1)
y |= 0x80; /* last option */
if (saw == 0) // c is a terminal then
fprintf(stdout, " 0x%02X, 0x00 "
"/* - terminal marker %2d - */, \n",
y, y - 0x80);
else { /* c is a character and we need a byte delta */
if ((state[saw].bytepos - walk) / 2 > 0xff) {
fprintf(stdout,
"Tried to jump > 510 bytes ahead\n");
return 1;
}
prev = y &0x7f;
if (prev < 32 || prev > 126)
prev = '.';
fprintf(stdout, " 0x%02X /* '%c' */, 0x%02X "
"/* (to pos %3d state %3d) */,\n",
y, prev,
(state[saw].bytepos - walk) / 2,
state[saw].bytepos, saw);
}
walk += 2;
}
}
fprintf(stdout, "/* total size %d bytes */\n", walk);
/*
* Test parser... real parser code is the same
*/
for (n = 0; n < sizeof(set) / sizeof(set[0]); n++) {
walk = 0;
m = 0;
fprintf(stderr, "Trying '%s'\n", set[n]);
while (set[n][m]) {
walk = lextable_decode(walk, set[n][m]);
if (walk < 0) {
fprintf(stderr, "failed\n");
break;
}
if (lextable[walk + 1] == 0) {
fprintf(stderr, "decode: %d\n",
lextable[walk] & 0x7f);
break;
}
m++;
}
}
return 0;
}