Fix O(n^2) time bug in --delay-directory-restore

delayed_set_stat avoids inserting duplicate entries into
delayed_set_stat_head. It was doing this by scanning the entire
list.

Normally this list is small, but if --delay-directory-restore is
used (including automatically for incremental archives), this list
grows with the total number of directories in the archive.

The entire scan takes O(n) time. Extracting an archive with n
directories could therefore take O(n^2) time.

The included test uses AT_SKIP_LARGE_FILES, allowing it to optionally be
skipped. It may execute slowly on certain filesystems or disks, as it
creates thousands of directories.

There are still potentially problematic O(n) scans in
find_direct_ancestor and remove_delayed_set_stat, which this patch does
not attempt to fix.

* NEWS: Update.
* src/extract.c (delayed_set_stat_table): Create a table for O(1)
lookups of entries in the delayed_set_stat_head list. The list
remains, as tracking insertion order is important.
(dl_hash, dl_compare): New hash table helper functions.
(delay_set_stat): Create the hash table, replace the O(n) list scan
with a hash_lookup, insert new entries into the hash table.
(remove_delayed_set_stat): Also remove entry from hash table.
(apply_nonancestor_delayed_set_stat): Also remove entry from hash
table.
(extract_finish): Free the (empty) hash table.
* tests/extrac26.at: New file.
* tests/Makefile.am (TESTSUITE_AT): Include extrac26.at.
* tests/testsuite.at: Include extrac26.at.
This commit is contained in:
Benjamin Woodruff 2023-08-21 13:06:45 -07:00 committed by Paul Eggert
parent bfee1d44a3
commit a5afb36765
5 changed files with 84 additions and 5 deletions

5
NEWS
View File

@ -1,4 +1,4 @@
GNU tar NEWS - User visible changes. 2023-08-02
GNU tar NEWS - User visible changes. 2023-08-21
Please send GNU tar bug reports to <bug-tar@gnu.org>
version TBD
@ -26,6 +26,9 @@ used, command output will be parsed using strptime(3).
* Bug fixes
** Fixed O(n^2) time complexity bug for large numbers of directories when
extracting with --delay-directory-restore or reading incremental archives.
** tar no longer uses alloca, fixing an unlikely stack overflow.

View File

@ -130,6 +130,9 @@ struct delayed_set_stat
static struct delayed_set_stat *delayed_set_stat_head;
/* Table of delayed stat updates hashed by path; null if none. */
static Hash_table *delayed_set_stat_table;
/* A link whose creation we have delayed. */
struct delayed_link
{
@ -214,6 +217,20 @@ dl_compare (void const *a, void const *b)
return (da->dev == db->dev) & (da->ino == db->ino);
}
static size_t
ds_hash (void const *entry, size_t table_size)
{
struct delayed_set_stat const *ds = entry;
return hash_string (ds->file_name, table_size);
}
static bool
ds_compare (void const *a, void const *b)
{
struct delayed_set_stat const *dsa = a, *dsb = b;
return strcmp (dsa->file_name, dsb->file_name) == 0;
}
/* Set up to extract files. */
void
extr_init (void)
@ -513,11 +530,14 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st,
size_t file_name_len = strlen (file_name);
struct delayed_set_stat *data;
for (data = delayed_set_stat_head; data; data = data->next)
if (strcmp (data->file_name, file_name) == 0)
break;
if (! (delayed_set_stat_table
|| (delayed_set_stat_table = hash_initialize (0, 0, ds_hash,
ds_compare, NULL))))
xalloc_die ();
if (data)
const struct delayed_set_stat key = { .file_name = (char*) file_name };
if ((data = hash_lookup (delayed_set_stat_table, &key)) != NULL)
{
if (data->interdir)
{
@ -541,6 +561,8 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st,
delayed_set_stat_head = data;
data->file_name_len = file_name_len;
data->file_name = xstrdup (file_name);
if (! hash_insert (delayed_set_stat_table, data))
xalloc_die ();
data->after_links = false;
if (st)
{
@ -652,6 +674,7 @@ remove_delayed_set_stat (const char *fname)
if (chdir_current == data->change_dir
&& strcmp (data->file_name, fname) == 0)
{
hash_remove (delayed_set_stat_table, data);
free_delayed_set_stat (data);
if (prev)
prev->next = next;
@ -1000,6 +1023,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links)
}
delayed_set_stat_head = data->next;
hash_remove (delayed_set_stat_table, data);
free_delayed_set_stat (data);
}
}
@ -1962,6 +1986,13 @@ extract_finish (void)
/* Finally, fix the status of directories that are ancestors
of delayed links. */
apply_nonancestor_delayed_set_stat ("", 1);
/* This table should be empty after apply_nonancestor_delayed_set_stat */
if (delayed_set_stat_table != NULL)
{
hash_free (delayed_set_stat_table);
delayed_set_stat_table = NULL;
}
}
bool

View File

@ -133,6 +133,7 @@ TESTSUITE_AT = \
extrac23.at\
extrac24.at\
extrac25.at\
extrac26.at\
filerem01.at\
filerem02.at\
grow.at\

43
tests/extrac26.at Normal file
View File

@ -0,0 +1,43 @@
# Test suite for GNU tar. -*- autotest -*-
# Copyright 2022-2023 Free Software Foundation, Inc.
#
# This file is part of GNU tar.
#
# GNU tar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GNU tar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
AT_SETUP([extract a large directory tree with --delay-directory-restore])
AT_KEYWORDS([extract extrac26])
AT_TAR_CHECK([
AT_SKIP_LARGE_FILES
AT_TIMEOUT_PREREQ
echo Creating dirtree
awk 'BEGIN { for (j = 0; j < 300; j++) for (k = 0; k < 300; k++) print "dirtree/" j "/" k }' | \
xargs mkdir -p
echo Creating archive
tar -cf archive.tar dirtree
echo Extracting archive
mkdir output
timeout 15 tar -xf archive.tar --delay-directory-restore -C output
],
[0],
[Creating dirtree
Creating archive
Extracting archive
],
[],[],[],[gnu])
AT_CLEANUP

View File

@ -349,6 +349,7 @@ m4_include([extrac22.at])
m4_include([extrac23.at])
m4_include([extrac24.at])
m4_include([extrac25.at])
m4_include([extrac26.at])
m4_include([backup01.at])