tar: bug fix and compatibility improvements

1. A fix for a bug in the unarchive() function (causing metadata
loss when used on large tar files).  This bug is due to existing
code continuing to check for h->type == HARDLINK and SOFTLINK
(near end of function), when the entire header block has already
been overwritten by a call to eread() prior to the h->type checks.

2. Long (>=256 byte) file name compatibility: 'L' style long file
names were extremely simple to add, requiring a (net) addition of
a handful lines of code, and this patch supports both extracting
and creating L-style tar archives. Pax 'x' style long names are
more complex to parse, and this patch only supports extracting pax
'x' tars, but not creating them.

3. Command line argument compatibility improvements: 'c', 'x', 't'
args are accepted without needing a hyphen in front.  The '-p'
flag is also accepted but is no-op (as it is the normal behaviour
of sbase tar anyway, and allows sbase tar to be used in scripts
specifying this flag).  Directory tree member extraction is also
supported by this patch.

4. Handle tar archives with "." (current directory) entries.  Some
archives contain "." or "./" entries, causing error reports when
the current code tries to remove() the current dir.  I have added a
check in unarchive() to not perform remove() on encountering these
entries.
This commit is contained in:
Xan Phung 2025-05-04 16:19:02 +10:00 committed by Roberto E. Vargas Caballero
parent cfe9424ba9
commit 5aa63dbe05

212
tar.c
View File

@ -6,6 +6,7 @@
#include <sys/sysmacros.h>
#endif
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
@ -19,7 +20,7 @@
#include "fs.h"
#include "util.h"
#define BLKSIZ 512
#define BLKSIZ (sizeof (struct header)) /* must equal 512 bytes */
enum Type {
REG = '0',
@ -50,6 +51,7 @@ struct header {
char major[8];
char minor[8];
char prefix[155];
char padding[12];
};
static struct dirtime {
@ -169,6 +171,17 @@ ewrite(int fd, const void *buf, size_t n)
return r;
}
static unsigned
chksum(struct header *h)
{
unsigned sum, i;
memset(h->chksum, ' ', sizeof(h->chksum));
for (i = 0, sum = 0, assert(BLKSIZ == 512); i < BLKSIZ; i++)
sum += *((unsigned char *)h + i);
return sum;
}
static void
putoctal(char *dst, unsigned num, int size)
{
@ -179,14 +192,16 @@ putoctal(char *dst, unsigned num, int size)
static int
archive(const char *path)
{
char b[BLKSIZ];
const char *base, *p;
static const struct header blank = {
"././@LongLink", "0000000" , "0000000", "0000000", "00000000000",
"00000000000" , " ", AREG , "" , "ustar", "00",
};
char b[BLKSIZ + BLKSIZ], *p;
struct header *h = (struct header *)b;
struct group *gr;
struct header *h;
struct passwd *pw;
struct stat st;
size_t chksum, i, nlen, plen;
ssize_t l, r;
ssize_t l, n, r;
int fd = -1;
if (lstat(path, &st) < 0) {
@ -196,47 +211,37 @@ archive(const char *path)
weprintf("ignoring %s\n", path);
return 0;
}
pw = getpwuid(st.st_uid);
gr = getgrgid(st.st_gid);
h = (struct header *)b;
memset(b, 0, sizeof(b));
*h = blank;
n = strlcpy(h->name, path, sizeof(h->name));
if (n >= sizeof(h->name)) {
*++h = blank;
h->type = 'L';
putoctal(h->size, n, sizeof(h->size));
putoctal(h->chksum, chksum(h), sizeof(h->chksum));
plen = 0;
base = path;
if ((nlen = strlen(base)) >= sizeof(h->name)) {
/*
* Cover case where path name is too long (in which case we
* need to split it to prefix and name).
*/
if ((base = strrchr(path, '/')) == NULL)
goto too_long;
for (p = base++; p > path && *p == '/'; --p)
;
nlen -= base - path;
plen = p - path + 1;
if (nlen >= sizeof(h->name) || plen >= sizeof(h->prefix))
goto too_long;
ewrite(tarfd, (char *)h, BLKSIZ);
for (p = (char *)path; n > 0; n -= BLKSIZ, p += BLKSIZ) {
if (n < BLKSIZ) {
p = memcpy(h--, p, n);
memset(p + n, 0, BLKSIZ - n);
}
ewrite(tarfd, p, BLKSIZ);
}
}
memcpy(h->name, base, nlen);
memcpy(h->prefix, path, plen);
putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode));
putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid));
putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid));
putoctal(h->size, 0, sizeof(h->size));
putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime));
memcpy( h->magic, "ustar", sizeof(h->magic));
memcpy( h->version, "00", sizeof(h->version));
estrlcpy(h->uname, pw ? pw->pw_name : "", sizeof(h->uname));
estrlcpy(h->gname, gr ? gr->gr_name : "", sizeof(h->gname));
if (S_ISREG(st.st_mode)) {
h->type = REG;
putoctal(h->size, (unsigned)st.st_size, sizeof(h->size));
putoctal(h->size, st.st_size, sizeof(h->size));
fd = open(path, O_RDONLY);
if (fd < 0)
eprintf("open %s:", path);
@ -255,10 +260,7 @@ archive(const char *path)
h->type = FIFO;
}
memset(h->chksum, ' ', sizeof(h->chksum));
for (i = 0, chksum = 0; i < sizeof(*h); i++)
chksum += (unsigned char)b[i];
putoctal(h->chksum, chksum, sizeof(h->chksum));
putoctal(h->chksum, chksum(h), sizeof(h->chksum));
ewrite(tarfd, b, BLKSIZ);
if (fd != -1) {
@ -271,24 +273,21 @@ archive(const char *path)
}
return 0;
too_long:
eprintf("filename too long: %s\n", path);
}
static int
unarchive(char *fname, ssize_t l, char b[BLKSIZ])
{
struct header *h = (struct header *)b;
struct timespec times[2];
char lname[101], *tmp, *p;
long mode, major, minor, type, mtime, uid, gid;
struct header *h = (struct header *)b;
int fd = -1;
struct timespec times[2];
int fd = -1, lnk = h->type == SYMLINK;
if (!mflag && ((mtime = strtol(h->mtime, &p, 8)) < 0 || *p != '\0'))
eprintf("strtol %s: invalid number\n", h->mtime);
if (remove(fname) < 0 && errno != ENOENT)
weprintf("remove %s:", fname);
if (strcmp(fname, ".") && strcmp(fname, "./") && remove(fname) < 0)
if (errno != ENOENT) weprintf("remove %s:", fname);
tmp = estrdup(fname);
mkdirp(dirname(tmp), 0777, 0777);
@ -308,10 +307,9 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
case SYMLINK:
snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname),
h->linkname);
if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0)
eprintf("%s %s -> %s:",
(h->type == HARDLINK) ? "link" : "symlink",
fname, lname);
if ((lnk ? symlink:link)(lname, fname) < 0)
eprintf("%s %s -> %s:", lnk ? "symlink":"link", fname, lname);
lnk++;
break;
case DIRECTORY:
if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0')
@ -354,14 +352,14 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
close(fd);
}
if (h->type == HARDLINK)
if (lnk == 1)
return 0;
times[0].tv_sec = times[1].tv_sec = mtime;
times[0].tv_nsec = times[1].tv_nsec = 0;
if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 0)
weprintf("utimensat %s:", fname);
if (h->type == SYMLINK) {
if (lnk) {
if (!getuid() && lchown(fname, uid, gid))
weprintf("lchown %s:", fname);
} else {
@ -435,9 +433,9 @@ sanitize(struct header *h)
static void
chktar(struct header *h)
{
char tmp[8], *err, *p = (char *)h;
const char *reason;
long s1, s2, i;
char tmp[sizeof h->chksum], *err = "";
long sum, i;
if (h->prefix[0] == '\0' && h->name[0] == '\0') {
reason = "empty filename";
@ -448,23 +446,19 @@ chktar(struct header *h)
goto bad;
}
memcpy(tmp, h->chksum, sizeof(tmp));
for (i = 0; i < sizeof(tmp) && tmp[i] == ' '; i++);
for (; i < sizeof(tmp); i++)
if (tmp[i] == ' ')
for (i = sizeof(tmp)-1; i > 0 && tmp[i] == ' '; i--) {
tmp[i] = '\0';
s1 = strtol(tmp, &err, 8);
if (s1 < 0 || *err != '\0') {
}
sum = strtol(tmp, &err, 8);
if (sum < 0 || sum >= BLKSIZ*256 || *err != '\0') {
reason = "invalid checksum";
goto bad;
}
memset(h->chksum, ' ', sizeof(h->chksum));
for (i = 0, s2 = 0; i < sizeof(*h); i++)
s2 += (unsigned char)p[i];
if (s1 != s2) {
if (sum != chksum(h)) {
reason = "incorrect checksum";
goto bad;
}
memcpy(h->chksum, tmp, sizeof(h->chksum));
memcpy(h->chksum, tmp, sizeof(tmp));
return;
bad:
eprintf("malformed tar archive: %s\n", reason);
@ -473,45 +467,70 @@ bad:
static void
xt(int argc, char *argv[], int mode)
{
char b[BLKSIZ], fname[256 + 1], *p;
long size, l;
char b[BLKSIZ], fname[l = PATH_MAX + 1], *p, *q = NULL;
int i, m, n;
int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print;
struct timespec times[2];
struct header *h = (struct header *)b;
struct dirtime *dirtime;
long size;
int i, n;
int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print;
while (eread(tarfd, b, BLKSIZ) > 0 && (h->name[0] || h->prefix[0])) {
chktar(h);
sanitize(h), n = 0;
/* small dance around non-null terminated fields */
if (h->prefix[0])
n = snprintf(fname, sizeof(fname), "%.*s/",
(int)sizeof(h->prefix), h->prefix);
snprintf(fname + n, sizeof(fname) - n, "%.*s",
(int)sizeof(h->name), h->name);
sanitize(h);
if ((size = strtol(h->size, &p, 8)) < 0 || *p != '\0')
eprintf("strtol %s: invalid number\n", h->size);
eprintf("strtol %s: invalid size\n", h->size);
if (argc) {
/* only extract the given files */
for (i = 0; i < argc; i++)
if (!strcmp(argv[i], fname))
/* Long file path is read direcly into fname*/
if (h->type == 'L' || h->type == 'x' || h->type == 'g') {
/* Read header only up to size of fname buffer */
for (q = fname; q < fname+size; q += BLKSIZ) {
if (q + BLKSIZ >= fname + l)
eprintf("name exceeds buffer: %s\n", fname);
eread(tarfd, q, BLKSIZ);
}
/* Convert pax x header with 'path=' field into L header */
if (h->type == 'x') for (q = fname; q < fname+size-16; q += n) {
if ((n = strtol(q, &p, 10)) < 0 || *p != ' ')
eprintf("strtol %.*s: invalid number\n", p+1-q, q);
if (n && strncmp(p+1, "path=", 5) == 0) {
memmove(fname, p+6, size = q+n - p-6 - 1);
h->type = 'L';
break;
}
}
fname[size] = '\0';
/* Non L-like header (eg. pax 'g') is skipped by setting q=null */
if (h->type != 'L')
q = NULL;
continue;
}
/* Ustar path is copied into fname if no L header (ie: q is NULL) */
if (!q) {
m = sizeof h->prefix, n = sizeof h->name;
p = "/" + !h->prefix[0];
snprintf(fname, l, "%.*s%s%.*s", m, h->prefix, p, n, h->name);
}
q = NULL;
/* If argc > 0 then only extract the given files/dirs */
if (argc) {
for (i = 0; i < argc; i++) {
if (strncmp(argv[i], fname, n = strlen(argv[i])) == 0)
if (strchr("/", fname[n]) || argv[i][n-1] == '/')
break;
}
if (i == argc) {
skipblk(size);
continue;
}
}
/* ignore global pax header craziness */
if (h->type == 'g' || h->type == 'x') {
skipblk(size);
continue;
}
fn(fname, size, b);
if (vflag && mode != 't')
puts(fname);
@ -530,12 +549,15 @@ xt(int argc, char *argv[], int mode)
}
}
char **args;
int argn;
static void
usage(void)
{
eprintf("usage: %s [-C dir] [-J | -Z | -a | -j | -z] -x [-m | -t] "
eprintf("usage: %s [x | t | -x | -t] [-C dir] [-J | -Z | -a | -j | -z] [-m] [-p] "
"[-f file] [file ...]\n"
" %s [-C dir] [-J | -Z | -a | -j | -z] [-h] -c path ... "
" %s [c | -c] [-C dir] [-J | -Z | -a | -j | -z] [-h] path ... "
"[-f file]\n", argv0, argv0);
}
@ -547,6 +569,10 @@ main(int argc, char *argv[])
char *file = NULL, *dir = ".", mode = '\0';
int fd;
argv0 = argv[0];
if (argc > 1 && strchr("cxt", mode = *argv[1]))
*(argv[1]+1) ? *argv[1] = '-' : (*++argv = argv0, --argc);
ARGBEGIN {
case 'x':
case 'c':
@ -576,18 +602,16 @@ main(int argc, char *argv[])
case 'v':
vflag = 1;
break;
case 'p':
break; /* Do nothing as already default behaviour */
default:
usage();
} ARGEND
if (!mode)
usage();
if (mode == 'c')
if (!argc)
usage();
switch (mode) {
case 'c':
if (!argc)
usage();
tarfd = 1;
if (file && *file != '-') {
tarfd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0644);
@ -626,6 +650,8 @@ main(int argc, char *argv[])
eprintf("chdir %s:", dir);
xt(argc, argv, mode);
break;
default:
usage();
}
return recurse_status;