GNU bug report logs -
#17081
[PATCH] dfa: avoid undefined behavior
Previous Next
Reported by: Paul Eggert <eggert <at> CS.UCLA.EDU>
Date: Mon, 24 Mar 2014 06:07:01 UTC
Severity: normal
Tags: patch
Done: Paul Eggert <eggert <at> cs.ucla.edu>
Bug is archived. No further changes may be made.
Full log
View this message in rfc822 format
Il 24/03/2014 07:04, Paul Eggert ha scritto:
> * src/dfa.c (FETCH_WC, addtok_wc): Don't rely on undefined behavior
> when converting an out-of-range value to 'int'.
> (FETCH_WC, prepare_wc_buf): Don't rely on conversion state after
> mbrtowc returns a special value, as it's undefined for (size_t) -1.
> (prepare_wc_buf): Simplify test for valid character.
> ---
> src/dfa.c | 42 ++++++++++++++++++++++++------------------
> 1 file changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/src/dfa.c b/src/dfa.c
> index 92ac1b9..0a2b8b8 100644
> --- a/src/dfa.c
> +++ b/src/dfa.c
> @@ -807,7 +807,7 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */
> static int cur_mb_len = 1; /* Length of the multibyte representation of
> wctok. */
> /* These variables are used only if (MB_CUR_MAX > 1). */
> -static mbstate_t mbs; /* Mbstate for mbrlen. */
> +static mbstate_t mbs; /* mbstate for mbrtowc. */
> static wchar_t wctok; /* Wide character representation of the current
> multibyte character. */
> static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec.
> @@ -844,15 +844,18 @@ static unsigned char const *buf_end; /* reference to end in dfaexec. */
> else \
> { \
> wchar_t _wc; \
> - cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
> - if (cur_mb_len <= 0) \
> + size_t nbytes = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
> + bool valid_char = 1 <= nbytes && nbytes < (size_t) -2; \
I find these conditionals complicated to follow. In addition, a return
value of nbytes == 0 is valid, so I believe you should have simply
bool valid_char = nbytes < (size_t) -2;
or better:
> + if (! valid_char) \
if (nbytes >= (size_t) -2)
> { \
> + memset (&mbs, 0, sizeof mbs); \
> cur_mb_len = 1; \
> --lexleft; \
> (wc) = (c) = to_uchar (*lexptr++); \
> } \
> else \
> { \
> + cur_mb_len = nbytes; \
> lexptr += cur_mb_len; \
> lexleft -= cur_mb_len; \
> (wc) = _wc; \
> @@ -1685,16 +1688,19 @@ static void
> addtok_wc (wint_t wc)
> {
> unsigned char buf[MB_LEN_MAX];
> - mbstate_t s;
> + mbstate_t s = { 0 };
> int i;
> - memset (&s, 0, sizeof s);
> - cur_mb_len = wcrtomb ((char *) buf, wc, &s);
> + size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
>
> - /* This is merely stop-gap. When cur_mb_len is 0 or negative,
> - buf[0] is undefined, yet skipping the addtok_mb call altogether
> - can result in heap corruption. */
> - if (cur_mb_len <= 0)
> - buf[0] = 0;
> + if (stored_bytes != (size_t) -1)
> + cur_mb_len = stored_bytes;
> + else
> + {
> + /* This is merely stop-gap. buf[0] is undefined, yet skipping
> + the addtok_mb call altogether can corrupt the heap. */
> + cur_mb_len = 1;
> + buf[0] = 0;
> + }
>
> addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
> for (i = 1; i < cur_mb_len; i++)
> @@ -3328,13 +3334,13 @@ prepare_wc_buf (const char *begin, const char *end)
> {
> if (remain_bytes == 0)
> {
> - remain_bytes
> + size_t nbytes
> = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs);
> - if (remain_bytes < 1
> - || remain_bytes == (size_t) -1
> - || remain_bytes == (size_t) -2
> - || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
> + if (! (1 <= nbytes && nbytes < (size_t) -2)
if (nbytes >= (size_t) -2)
> + || (nbytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
> {
> + if ((size_t) -2 <= nbytes)
if (nbytes >= (size_t) -2)
> + memset (&mbs, 0, sizeof mbs);
> remain_bytes = 0;
> inputwcs[i] = (wchar_t) begin[i];
> mblen_buf[i] = 0;
> @@ -3343,8 +3349,8 @@ prepare_wc_buf (const char *begin, const char *end)
> }
> else
> {
> - mblen_buf[i] = remain_bytes;
> - remain_bytes--;
> + mblen_buf[i] = nbytes;
> + remain_bytes = nbytes - 1;
> }
> }
> else
>
This bug report was last modified 11 years and 107 days ago.
Previous Next
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.