diff mbox series

[v2,7/8] input: Allow MB_LEN_MAX calls to pungetc

Message ID 9a1c18b16b066510266ed9f14ec954840221e7c1.1714276539.git.herbert@gondor.apana.org.au (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series Add multi-byte support | expand

Commit Message

Herbert Xu April 28, 2024, 3:57 a.m. UTC
In order to parse multi-byte characters which may be up to MB_LEN_MAX
bytes long, allow enough calls to pungetc to undo a single multi-byte
character.

Also add a function pungetn to do multiple pungetc calls in a row.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/input.c | 58 ++++++++++++++++++++++++++++++++++-------------------
 src/input.h | 11 +++++-----
 2 files changed, 42 insertions(+), 27 deletions(-)
diff mbox series

Patch

diff --git a/src/input.c b/src/input.c
index fb9858f..c7805ad 100644
--- a/src/input.c
+++ b/src/input.c
@@ -56,7 +56,7 @@ 
 #include "main.h"
 #include "myhistedit.h"
 
-#define IBUFSIZ (BUFSIZ + 1)
+#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1)
 
 
 MKINIT struct parsefile basepf;	/* top level input file */
@@ -83,13 +83,16 @@  INIT {
 }
 
 RESET {
+	int c;
+
 	/* clear input buffer */
 	popallfiles();
-	basepf.unget = 0;
-	while (basepf.lastc[0] != '\n' &&
-	       basepf.lastc[0] != PEOF &&
-	       !int_pending())
-		pgetc();
+
+	c = PEOF;
+	if (basepf.nextc - basebuf > basepf.unget)
+		c = basepf.nextc[-basepf.unget];
+	while (c != '\n' && c != PEOF && !int_pending())
+		c = pgetc();
 }
 
 FORKRESET {
@@ -131,17 +134,20 @@  static int __pgetc(void)
 {
 	int c;
 
-	if (parsefile->unget)
-		return parsefile->lastc[--parsefile->unget];
+	if (parsefile->unget) {
+		long unget = -(long)(unsigned)parsefile->unget--;
+
+		if (parsefile->nleft < 0)
+			return preadbuffer();
+
+		return parsefile->nextc[unget];
+	}
 
 	if (--parsefile->nleft >= 0)
 		c = (signed char)*parsefile->nextc++;
 	else
 		c = preadbuffer();
 
-	parsefile->lastc[1] = parsefile->lastc[0];
-	parsefile->lastc[0] = c;
-
 	return c;
 }
 
@@ -176,9 +182,16 @@  static int stdin_clear_nonblock(void)
 static int
 preadfd(void)
 {
+	char *buf = parsefile->buf;
+	int unget;
 	int nr;
-	char *buf =  parsefile->buf;
-	parsefile->nextc = buf;
+
+	unget = parsefile->nextc - buf;
+	if (unget > PUNGETC_MAX)
+		unget = PUNGETC_MAX;
+
+	memmove(buf, parsefile->nextc - unget, unget);
+	parsefile->nextc = buf += unget;
 
 retry:
 #ifndef SMALL
@@ -196,8 +209,8 @@  retry:
 			nr = 0;
 		else {
 			nr = el_len;
-			if (nr > IBUFSIZ - 1)
-				nr = IBUFSIZ - 1;
+			if (nr > BUFSIZ)
+				nr = BUFSIZ;
 			memcpy(buf, rl_cp, nr);
 			if (nr != el_len) {
 				el_len -= nr;
@@ -209,9 +222,9 @@  retry:
 	} else
 #endif
 	if (parsefile->fd)
-		nr = read(parsefile->fd, buf, IBUFSIZ - 1);
+		nr = read(parsefile->fd, buf, BUFSIZ);
 	else {
-		unsigned len = IBUFSIZ - 1;
+		unsigned len = BUFSIZ;
 
 		nr = 0;
 
@@ -348,6 +361,11 @@  done:
 	return (signed char)*parsefile->nextc++;
 }
 
+void pungetn(int n)
+{
+	parsefile->unget += n;
+}
+
 /*
  * Undo a call to pgetc.  Only two characters may be pushed back.
  * PEOF may be pushed back.
@@ -356,7 +374,7 @@  done:
 void
 pungetc(void)
 {
-	parsefile->unget++;
+	pungetn(1);
 }
 
 /*
@@ -383,7 +401,6 @@  pushstring(char *s, void *ap)
 	sp->prevnleft = parsefile->nleft;
 	sp->unget = parsefile->unget;
 	sp->spfree = parsefile->spfree;
-	memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc));
 	sp->ap = (struct alias *)ap;
 	if (ap) {
 		((struct alias *)ap)->flag |= ALIASINUSE;
@@ -413,7 +430,6 @@  static void popstring(void)
 	parsefile->nextc = sp->prevstring;
 	parsefile->nleft = sp->prevnleft;
 	parsefile->unget = sp->unget;
-	memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc));
 /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/
 	parsefile->strpush = sp->prev;
 	parsefile->spfree = sp;
@@ -457,7 +473,7 @@  setinputfd(int fd, int push)
 	}
 	parsefile->fd = fd;
 	if (parsefile->buf == NULL)
-		parsefile->buf = ckmalloc(IBUFSIZ);
+		parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ);
 	input_set_lleft(parsefile, parsefile->nleft = 0);
 	plinno = 1;
 }
diff --git a/src/input.h b/src/input.h
index 1ff5773..5b4a045 100644
--- a/src/input.h
+++ b/src/input.h
@@ -34,12 +34,16 @@ 
  *	@(#)input.h	8.2 (Berkeley) 5/4/95
  */
 
+#include <limits.h>
+
 #ifdef SMALL
 #define IS_DEFINED_SMALL 1
 #else
 #define IS_DEFINED_SMALL 0
 #endif
 
+#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16)
+
 /* PEOF (the end of file marker) is defined in syntax.h */
 
 enum {
@@ -59,9 +63,6 @@  struct strpush {
 	/* Delay freeing so we can stop nested aliases. */
 	struct strpush *spfree;
 
-	/* Remember last two characters for pungetc. */
-	int lastc[2];
-
 	/* Number of outstanding calls to pungetc. */
 	int unget;
 };
@@ -87,9 +88,6 @@  struct parsefile {
 	/* Delay freeing so we can stop nested aliases. */
 	struct strpush *spfree;
 
-	/* Remember last two characters for pungetc. */
-	int lastc[2];
-
 	/* Number of outstanding calls to pungetc. */
 	int unget;
 };
@@ -106,6 +104,7 @@  extern struct parsefile *parsefile;
 int pgetc(void);
 int pgetc2(void);
 void pungetc(void);
+void pungetn(int);
 void pushstring(char *, void *);
 int setinputfile(const char *, int);
 void setinputstring(char *);