Merge pull request #173 from Arkshine/feature/compiler-concatenation

Add string literal concatenation and stringizing operator
This commit is contained in:
Vincent Herbet 2015-01-22 20:00:32 +01:00
commit 5ca0e5617d
2 changed files with 158 additions and 29 deletions

View File

@ -36,8 +36,9 @@
#endif #endif
/* flags for litchar() */ /* flags for litchar() */
#define RAWMODE 1 #define RAWMODE 0x1
#define UTF8MODE 2 #define UTF8MODE 0x2
#define ISPACKED 0x4
static cell litchar(const unsigned char **lptr,int flags); static cell litchar(const unsigned char **lptr,int flags);
static void substallpatterns(unsigned char *line,int buffersize); static void substallpatterns(unsigned char *line,int buffersize);
@ -1493,6 +1494,7 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char
const unsigned char *p,*s,*e; const unsigned char *p,*s,*e;
unsigned char *args[10]; unsigned char *args[10];
int match,arg,len,argsnum=0; int match,arg,len,argsnum=0;
int stringize;
memset(args,0,sizeof args); memset(args,0,sizeof args);
@ -1590,11 +1592,18 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char
if (match) { if (match) {
/* calculate the length of the substituted string */ /* calculate the length of the substituted string */
for (e=(unsigned char*)substitution,len=0; *e!='\0'; e++) { for (e=(unsigned char*)substitution,len=0; *e!='\0'; e++) {
if (*e=='#' && *(e+1)=='%' && isdigit(*(e+2))) {
stringize=1;
e++; /* skip '#' */
} else {
stringize=0;
} /* if */
if (*e=='%' && isdigit(*(e+1)) && argsnum) { if (*e=='%' && isdigit(*(e+1)) && argsnum) {
arg=*(e+1)-'0'; arg=*(e+1)-'0';
assert(arg>=0 && arg<=9); assert(arg>=0 && arg<=9);
assert(stringize == 0 || stringize == 1);
if (args[arg]!=NULL) { if (args[arg]!=NULL) {
len+=strlen((char*)args[arg]); len+=strlen((char*)args[arg])+2*stringize;
e++; /* skip %, digit is skipped later */ e++; /* skip %, digit is skipped later */
} else { } else {
len++; len++;
@ -1610,12 +1619,22 @@ static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char
/* substitute pattern */ /* substitute pattern */
strdel((char*)line,(int)(s-line)); strdel((char*)line,(int)(s-line));
for (e=(unsigned char*)substitution,s=line; *e!='\0'; e++) { for (e=(unsigned char*)substitution,s=line; *e!='\0'; e++) {
if (*e=='#' && *(e+1)=='%' && isdigit(*(e+2))) {
stringize=1;
e++; /* skip '#' */
} else {
stringize=0;
} /* if */
if (*e=='%' && isdigit(*(e+1))) { if (*e=='%' && isdigit(*(e+1))) {
arg=*(e+1)-'0'; arg=*(e+1)-'0';
assert(arg>=0 && arg<=9); assert(arg>=0 && arg<=9);
if (args[arg]!=NULL) { if (args[arg]!=NULL) {
if (stringize)
strins((char*)s++,"\"",1);
strins((char*)s,(char*)args[arg],strlen((char*)args[arg])); strins((char*)s,(char*)args[arg],strlen((char*)args[arg]));
s+=strlen((char*)args[arg]); s+=strlen((char*)args[arg]);
if (stringize)
strins((char*)s++,"\"",1);
e++; /* skip %, digit is skipped later */ e++; /* skip %, digit is skipped later */
} else { } else {
strins((char*)s,(char*)e,1); strins((char*)s,(char*)e,1);
@ -1703,6 +1722,61 @@ static void substallpatterns(unsigned char *line,int buffersize)
} }
#endif #endif
/* scanplus
* Look for + in the string and (if not there) in the remainder of the file,
* but restore (or keep intact):
* - the current position in the file
* - the comment parsing state
* - the line buffer used by the lexical analyser
* - the active line number and the active file
*
* The function returns 1 if a plus was found and 0 if not
*/
static int scanplus(const unsigned char *lptr)
{
static void *inpfmark=NULL;
unsigned char *localbuf;
short localcomment,found;
/* first look for the plus in the remainder of the string */
while (*lptr<=' ' && *lptr!='\0')
lptr++;
if (lptr[0]=='+')
return 1;
if (*lptr!='\0')
return 0; /* stumbled on something that is not a plus and not white-space */
/* the plus was not on the active line, read more lines from the current
* file (but save its position first)
*/
if (inpf==NULL || pc_eofsrc(inpf))
return 0; /* quick exit: cannot read after EOF */
if ((localbuf=(unsigned char*)malloc((sLINEMAX+1)*sizeof(unsigned char)))==NULL)
return 0;
inpfmark=pc_getpossrc(inpf);
localcomment=icomment;
found=0;
/* read from the file, skip preprocessing, but strip off comments */
while (!found && pc_readsrc(inpf,localbuf,sLINEMAX)!=NULL) {
stripcom(localbuf);
lptr=localbuf;
/* skip white space */
while (*lptr<=' ' && *lptr!='\0')
lptr++;
if (lptr[0]=='+')
found=1;
else if (*lptr!='\0')
break; /* stumbled on something that is not a plus and not white-space */
} /* while */
/* clean up & reset */
free(localbuf);
pc_resetsrc(inpf,inpfmark);
icomment=localcomment;
return found;
}
/* preprocess /* preprocess
* *
* Reads a line by readline() into "pline" and performs basic preprocessing: * Reads a line by readline() into "pline" and performs basic preprocessing:
@ -1974,38 +2048,92 @@ SC_FUNC int lex(cell *lexvalue,char **lexsym)
error(220); error(220);
} /* if */ } /* if */
} /* if */ } /* if */
} else if (*lptr=='\"' || (*lptr==sc_ctrlchar && *(lptr+1)=='\"')) } else if (*lptr=='\"' /* unpacked string literal */
{ /* unpacked string literal */ || (*lptr==sc_ctrlchar && *(lptr+1)=='\"') /* unpacked raw string */
|| (*lptr=='!' && *(lptr+1)=='\"') /* packed string */
|| (*lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"') /* packed raw string */
|| (*lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"')) /* packed raw string */
{
int stringflags,segmentflags;
char *cat;
if (sLiteralQueueDisabled) { if (sLiteralQueueDisabled) {
_lextok=tPENDING_STRING; _lextok=tPENDING_STRING;
return _lextok; return _lextok;
} }
_lextok=tSTRING; _lextok=tSTRING;
stringflags= (*lptr==sc_ctrlchar) ? RAWMODE : 0;
*lexvalue=_lexval=litidx; *lexvalue=_lexval=litidx;
lptr+=1; /* skip double quote */ _lexstr[0]='\0';
if ((stringflags & RAWMODE)!=0) stringflags=-1; /* to mark the first segment */
lptr+=1; /* skip "escape" character too */ for ( ;; ) {
lptr=sc_packstr ? packedstring(lptr,stringflags) : unpackedstring(lptr,stringflags); if (*lptr=='!')
if (*lptr=='\"') segmentflags= (*(lptr+1)==sc_ctrlchar) ? RAWMODE | ISPACKED: ISPACKED;
lptr+=1; /* skip final quote */ else if (*lptr==sc_ctrlchar)
segmentflags= (*(lptr+1)=='!') ? RAWMODE | ISPACKED : RAWMODE;
else
segmentflags=0;
if ((segmentflags & ISPACKED)!=0)
lptr+=1; /* skip '!' character */
if ((segmentflags & RAWMODE)!=0)
lptr+=1; /* skip "escape" character too */
assert(*lptr=='\"');
lptr+=1; /* skip double quote too */
if (stringflags==-1)
stringflags=segmentflags;
else if (stringflags!=segmentflags)
error(238); /* mixing packed/unpacked/raw strings in concatenation */
cat=strchr(_lexstr,'\0');
assert(cat!=NULL);
while (*lptr!='"' && *lptr!='\0' && (cat-_lexstr)<sLINEMAX) {
if (*lptr!='\a') { /* ignore '\a' (which was inserted at a line concatenation) */
*cat++=*lptr;
if (*lptr==sc_ctrlchar && *(lptr+1)!='\0')
*cat++=*++lptr; /* skip escape character plus the escaped character */
} /* if */
lptr++;
} /* while */
*cat='\0'; /* terminate string */
if (*lptr=='\"')
lptr+=1; /* skip final quote */
else
error(37); /* invalid (non-terminated) string */
/* see whether an ellipsis is following the string */
if (!scanplus(lptr))
break; /* no concatenation of string literals */
/* there is an ellipses, go on parsing (this time with full preprocessing) */
while (*lptr<=' ') {
if (*lptr=='\0') {
preprocess(); /* preprocess resets "lptr" */
assert(freading && lptr!=term_expr);
} else {
lptr++;
} /* if */
} /* while */
assert(freading && lptr[0]=='+');
lptr+=1;
while (*lptr<=' ') {
if (*lptr=='\0') {
preprocess(); /* preprocess resets "lptr" */
assert(freading && lptr!=term_expr);
} else {
lptr++;
} /* if */
} /* while */
if (!freading || !(*lptr=='\"'
|| (*lptr==sc_ctrlchar && *(lptr+1)=='\"')
|| (*lptr=='!' && *(lptr+1)=='\"')
|| (*lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"')
|| (*lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"')))
{
error(37); /* invalid string concatenation */
break;
} /* if */
} /* for */
if (sc_packstr)
stringflags ^= ISPACKED; /* invert packed/unpacked parameters */
if ((stringflags & ISPACKED)!=0)
packedstring((unsigned char*)_lexstr,stringflags);
else else
error(37); /* invalid (non-terminated) string */ unpackedstring((unsigned char*)_lexstr,stringflags);
} else if ((*lptr=='!' && *(lptr+1)=='\"')
|| (*lptr=='!' && *(lptr+1)==sc_ctrlchar && *(lptr+2)=='\"')
|| (*lptr==sc_ctrlchar && *(lptr+1)=='!' && *(lptr+2)=='\"'))
{ /* packed string literal */
_lextok=tSTRING;
stringflags= (*lptr==sc_ctrlchar || *(lptr+1)==sc_ctrlchar) ? RAWMODE : 0;
*lexvalue=_lexval=litidx;
lptr+=2; /* skip exclamation point and double quote */
if ((stringflags & RAWMODE)!=0)
lptr+=1; /* skip "escape" character too */
lptr=sc_packstr ? unpackedstring(lptr,stringflags) : packedstring(lptr,stringflags);
if (*lptr=='\"')
lptr+=1; /* skip final quote */
else
error(37); /* invalid (non-terminated) string */
} else if (*lptr=='\'') { /* character literal */ } else if (*lptr=='\'') { /* character literal */
lptr+=1; /* skip quote */ lptr+=1; /* skip quote */
_lextok=tNUMBER; _lextok=tNUMBER;

View File

@ -395,11 +395,12 @@ SC_FUNC int scan_utf8(FILE *fp,const char *filename)
#if defined NO_UTF8 #if defined NO_UTF8
return 0; return 0;
#else #else
void *resetpos=pc_getpossrc(fp); static void *resetpos=NULL;
int utf8=TRUE; int utf8=TRUE;
int firstchar=TRUE,bom_found=FALSE; int firstchar=TRUE,bom_found=FALSE;
const unsigned char *ptr; const unsigned char *ptr;
resetpos=pc_getpossrc(fp);
while (utf8 && pc_readsrc(fp,pline,sLINEMAX)!=NULL) { while (utf8 && pc_readsrc(fp,pline,sLINEMAX)!=NULL) {
ptr=pline; ptr=pline;
if (firstchar) { if (firstchar) {