#!/bin/sh
# Test multi-byte delimiter handling in paste
# Copyright (C) 2026 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ paste printf
test "$LOCALE_FR_UTF8" != none || skip_ 'French UTF-8 locale not available'
LC_ALL=$LOCALE_FR_UTF8
export LC_ALL
# UTF-8 test: 2-byte character (e.g., cent sign)
delim_cent=$(env printf '\xc2\xa2')
# UTF-8 test: 3-byte character (e.g., euro sign)
delim_euro=$(env printf '\xe2\x82\xac')
# UTF-8 test: 4-byte character (e.g., emoji: U+1F600)
delim_emoji=$(env printf '\xf0\x9f\x98\x80')
printf '1\n2\n' > f1 || framework_failure_
printf 'a\nb\n' > f2 || framework_failure_
# Test parallel mode with multi-byte delimiters
for delim in "$delim_cent" "$delim_euro" "$delim_emoji"; do
paste -d "$delim" f1 f2 > out || fail=1
printf "1${delim}a\n2${delim}b\n" > exp || framework_failure_
compare exp out || fail=1
done
# Test serial mode with multi-byte delimiters
printf '1\n2\n3\n' > f3 || framework_failure_
for delim in "$delim_cent" "$delim_euro"; do
paste -s -d "$delim" f3 > out || fail=1
printf "1${delim}2${delim}3\n" > exp || framework_failure_
compare exp out || fail=1
done
# Test multiple multi-byte delimiters cycling
printf 'a\nb\nc\n' > f4 || framework_failure_
printf '1\n2\n3\n' > f5 || framework_failure_
printf 'x\ny\nz\n' > f6 || framework_failure_
paste -d "${delim_cent}${delim_euro}" f4 f5 f6 > out || fail=1
printf "a${delim_cent}1${delim_euro}x\n" > exp || framework_failure_
printf "b${delim_cent}2${delim_euro}y\n" >> exp || framework_failure_
printf "c${delim_cent}3${delim_euro}z\n" >> exp || framework_failure_
compare exp out || fail=1
# Test multi-byte delimiters mixed with empty delimiter (\0)
paste -s -d "${delim_euro}\\0" f3 > out || fail=1
printf "1${delim_euro}23\n" > exp || framework_failure_
compare exp out || fail=1
# Test invalid UTF-8 sequences are still passed through
delims_invalid=$(bad_unicode)
delim_invalid=$(env printf '%s' "$delims_invalid" | cut -b1)
paste -d "$delims_invalid" f1 f2 > out || fail=1
printf "1${delim_invalid}a\n2${delim_invalid}b\n" > exp || framework_failure_
compare exp out || fail=1
# Test that \ is treated like
# (unknown escapes pass through the escaped character)
paste -d "\\${delim_euro}" f1 f2 > out || fail=1
paste -d "$delim_euro" f1 f2 > exp || fail=1
compare exp out || fail=1
# Test GB18030 encoding if available
export LC_ALL=zh_CN.gb18030
if test "$(locale charmap 2>/dev/null | sed 's/gb/GB/')" = GB18030; then
# GB18030 2-byte character (e.g., 0xA2 0xE3 is a valid GB18030 char)
delim_gb18030=$(env printf '\xa2\xe3')
paste -d "$delim_gb18030" f1 f2 > out || fail=1
printf "1${delim_gb18030}a\n2${delim_gb18030}b\n" > exp || framework_failure_
compare exp out || fail=1
paste -s -d "$delim_gb18030" f3 > out || fail=1
printf "1${delim_gb18030}2${delim_gb18030}3\n" > exp || framework_failure_
compare exp out || fail=1
# Note 0xFF is invalid in GB18030, but we support all single byte delimiters
delim_ff=$(env printf '\xff')
paste -d "$delim_ff" f1 f2 > out || fail=1
printf "1${delim_ff}a\n2${delim_ff}b\n" > exp || framework_failure_
compare exp out || fail=1
fi
Exit $fail