diffutils/lib/mcel-casecmp.c
Paul Eggert 12bcf0bd50 diff: simplify multi-byte code (mbcel -> mcel)
* lib/Makefile.am: Adjust to file renamings and additions.
* lib/mbcel.c, lib/mbcel.h: Split into two APIs, replacing with ...
* lib/mcel.c, lib/mcel.h, lib/ucore.c, lib/ucore.h: ... these new files.
* lib/mcel.h: Simplify by assuming ucore.h is included.
Check that bytes have 8 bits.
(MCEL_LEN_MAX, mcel_t, MCEL_INLINE, MCEL_ENCODING_ERROR_SHIFT)
(mcel_scan, mcel_scant, mcel_scanz, mcel_casecmp):
Rename from MBCEL_LEN_MAX, mbcel_t, MBCEL_INLINE,
MBCEL_ENCODING_ERROR_SHIFT, mbcel_scan, mbcel_scanz, mbcel_scant,
mbcel_casecmp.
(mcel_t): New member c, replacing old members ch and err.
All uses changed.
(MBCEL_UCHAR_FITS, MBCEL_UCHAR_EASILY_FITS): Remove.
All uses removed.  No longer needed now 8-bit bytes are assumed.
(MCEL_ENCODING_ERROR_SHIFT): Check that it matches UCORE_ERR_MIN.
(mcel_isbasic): New function.  Use it where appropriate.
(mbcel_cmp, mbcel_casecmp): Remove; replaced by ucore_cmp,
ucore_tocmp.  All uses changed.
* lib/mcel-casecmp.c: Rename from lib/mbcel-strcasecmp.c.
Include mcel.h instead of mbcel.h.
(mcel_casecmp): Rename from mbcel_strcasecmp.  All uses changed.
Assert that UCHAR_MAX <= INT_MAX, as POSIX requires,
and simplify code accordingly.  Use mcel rather than mbcel.
* lib/ucore.h: Include verify.h.
(ucore_t): New type.
(UCORE_CHAR_MAX, UCORE_ERR_MIN, UCORE_ERR_MAX, UCORE_C32_SAFE):
New constants.  Check that information is not lost by encoding
errors as integers; this is a weaker test than CHAR_BIT == 8.
(ucore_iserr, ucore_is, ucore_to): New functions.
(ucore_cmp, ucore_tocmp): New functions, replacing the old
mbcel_cmp, mbcel_casecmp.  All uses changed.
* src/dir.c, src/io.c, src/side.c: Use mcel rather than mbcel.
* src/io.c (same_ch_err): Remove.  All uses replaced by ucore_cmp.
2023-08-15 10:41:54 -07:00

61 lines
1.6 KiB
C

/* Case-insensitive string comparison function.
Copyright 2023 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Written by Paul Eggert. */
#include <config.h>
/* Specification. */
#include <mcel.h>
#include <ctype.h>
#include <stdlib.h>
int
mcel_casecmp (char const *s1, char const *s2)
{
char const *p1 = s1;
char const *p2 = s2;
/* Do not look at the entire extent of S1 or S2 until needed:
when two strings differ, the difference is typically early. */
if (MB_CUR_MAX == 1)
while (true)
{
static_assert (UCHAR_MAX <= INT_MAX);
unsigned char c1 = *p1++;
unsigned char c2 = *p2++;
int cmp = c1 - c2;
if (_GL_UNLIKELY (cmp))
{
c1 = tolower (c1);
c2 = tolower (c2);
cmp = c1 - c2;
}
if (_GL_UNLIKELY (cmp | !c1))
return cmp;
}
else
while (true)
{
mcel_t g1 = mcel_scanz (p1); p1 += g1.len;
mcel_t g2 = mcel_scanz (p2); p2 += g2.len;
int cmp = ucore_tocmp (c32tolower, g1.c, g2.c);
if (_GL_UNLIKELY (cmp | !g1.c))
return cmp;
}
}