mirror of
https://https.git.savannah.gnu.org/git/tar.git
synced 2026-01-26 08:07:53 +00:00
Fix O(n^2) time bug in --delay-directory-restore
delayed_set_stat avoids inserting duplicate entries into delayed_set_stat_head. It was doing this by scanning the entire list. Normally this list is small, but if --delay-directory-restore is used (including automatically for incremental archives), this list grows with the total number of directories in the archive. The entire scan takes O(n) time. Extracting an archive with n directories could therefore take O(n^2) time. The included test uses AT_SKIP_LARGE_FILES, allowing it to optionally be skipped. It may execute slowly on certain filesystems or disks, as it creates thousands of directories. There are still potentially problematic O(n) scans in find_direct_ancestor and remove_delayed_set_stat, which this patch does not attempt to fix. * NEWS: Update. * src/extract.c (delayed_set_stat_table): Create a table for O(1) lookups of entries in the delayed_set_stat_head list. The list remains, as tracking insertion order is important. (dl_hash, dl_compare): New hash table helper functions. (delay_set_stat): Create the hash table, replace the O(n) list scan with a hash_lookup, insert new entries into the hash table. (remove_delayed_set_stat): Also remove entry from hash table. (apply_nonancestor_delayed_set_stat): Also remove entry from hash table. (extract_finish): Free the (empty) hash table. * tests/extrac26.at: New file. * tests/Makefile.am (TESTSUITE_AT): Include extrac26.at. * tests/testsuite.at: Include extrac26.at.
This commit is contained in:
parent
bfee1d44a3
commit
a5afb36765
5
NEWS
5
NEWS
@ -1,4 +1,4 @@
|
||||
GNU tar NEWS - User visible changes. 2023-08-02
|
||||
GNU tar NEWS - User visible changes. 2023-08-21
|
||||
Please send GNU tar bug reports to <bug-tar@gnu.org>
|
||||
|
||||
version TBD
|
||||
@ -26,6 +26,9 @@ used, command output will be parsed using strptime(3).
|
||||
|
||||
* Bug fixes
|
||||
|
||||
** Fixed O(n^2) time complexity bug for large numbers of directories when
|
||||
extracting with --delay-directory-restore or reading incremental archives.
|
||||
|
||||
** tar no longer uses alloca, fixing an unlikely stack overflow.
|
||||
|
||||
|
||||
|
||||
@ -130,6 +130,9 @@ struct delayed_set_stat
|
||||
|
||||
static struct delayed_set_stat *delayed_set_stat_head;
|
||||
|
||||
/* Table of delayed stat updates hashed by path; null if none. */
|
||||
static Hash_table *delayed_set_stat_table;
|
||||
|
||||
/* A link whose creation we have delayed. */
|
||||
struct delayed_link
|
||||
{
|
||||
@ -214,6 +217,20 @@ dl_compare (void const *a, void const *b)
|
||||
return (da->dev == db->dev) & (da->ino == db->ino);
|
||||
}
|
||||
|
||||
static size_t
|
||||
ds_hash (void const *entry, size_t table_size)
|
||||
{
|
||||
struct delayed_set_stat const *ds = entry;
|
||||
return hash_string (ds->file_name, table_size);
|
||||
}
|
||||
|
||||
static bool
|
||||
ds_compare (void const *a, void const *b)
|
||||
{
|
||||
struct delayed_set_stat const *dsa = a, *dsb = b;
|
||||
return strcmp (dsa->file_name, dsb->file_name) == 0;
|
||||
}
|
||||
|
||||
/* Set up to extract files. */
|
||||
void
|
||||
extr_init (void)
|
||||
@ -513,11 +530,14 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st,
|
||||
size_t file_name_len = strlen (file_name);
|
||||
struct delayed_set_stat *data;
|
||||
|
||||
for (data = delayed_set_stat_head; data; data = data->next)
|
||||
if (strcmp (data->file_name, file_name) == 0)
|
||||
break;
|
||||
if (! (delayed_set_stat_table
|
||||
|| (delayed_set_stat_table = hash_initialize (0, 0, ds_hash,
|
||||
ds_compare, NULL))))
|
||||
xalloc_die ();
|
||||
|
||||
if (data)
|
||||
const struct delayed_set_stat key = { .file_name = (char*) file_name };
|
||||
|
||||
if ((data = hash_lookup (delayed_set_stat_table, &key)) != NULL)
|
||||
{
|
||||
if (data->interdir)
|
||||
{
|
||||
@ -541,6 +561,8 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st,
|
||||
delayed_set_stat_head = data;
|
||||
data->file_name_len = file_name_len;
|
||||
data->file_name = xstrdup (file_name);
|
||||
if (! hash_insert (delayed_set_stat_table, data))
|
||||
xalloc_die ();
|
||||
data->after_links = false;
|
||||
if (st)
|
||||
{
|
||||
@ -652,6 +674,7 @@ remove_delayed_set_stat (const char *fname)
|
||||
if (chdir_current == data->change_dir
|
||||
&& strcmp (data->file_name, fname) == 0)
|
||||
{
|
||||
hash_remove (delayed_set_stat_table, data);
|
||||
free_delayed_set_stat (data);
|
||||
if (prev)
|
||||
prev->next = next;
|
||||
@ -1000,6 +1023,7 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links)
|
||||
}
|
||||
|
||||
delayed_set_stat_head = data->next;
|
||||
hash_remove (delayed_set_stat_table, data);
|
||||
free_delayed_set_stat (data);
|
||||
}
|
||||
}
|
||||
@ -1962,6 +1986,13 @@ extract_finish (void)
|
||||
/* Finally, fix the status of directories that are ancestors
|
||||
of delayed links. */
|
||||
apply_nonancestor_delayed_set_stat ("", 1);
|
||||
|
||||
/* This table should be empty after apply_nonancestor_delayed_set_stat */
|
||||
if (delayed_set_stat_table != NULL)
|
||||
{
|
||||
hash_free (delayed_set_stat_table);
|
||||
delayed_set_stat_table = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@ -133,6 +133,7 @@ TESTSUITE_AT = \
|
||||
extrac23.at\
|
||||
extrac24.at\
|
||||
extrac25.at\
|
||||
extrac26.at\
|
||||
filerem01.at\
|
||||
filerem02.at\
|
||||
grow.at\
|
||||
|
||||
43
tests/extrac26.at
Normal file
43
tests/extrac26.at
Normal file
@ -0,0 +1,43 @@
|
||||
# Test suite for GNU tar. -*- autotest -*-
|
||||
# Copyright 2022-2023 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU tar.
|
||||
#
|
||||
# GNU tar is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# GNU tar is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
AT_SETUP([extract a large directory tree with --delay-directory-restore])
|
||||
AT_KEYWORDS([extract extrac26])
|
||||
|
||||
AT_TAR_CHECK([
|
||||
AT_SKIP_LARGE_FILES
|
||||
AT_TIMEOUT_PREREQ
|
||||
|
||||
echo Creating dirtree
|
||||
awk 'BEGIN { for (j = 0; j < 300; j++) for (k = 0; k < 300; k++) print "dirtree/" j "/" k }' | \
|
||||
xargs mkdir -p
|
||||
|
||||
echo Creating archive
|
||||
tar -cf archive.tar dirtree
|
||||
|
||||
echo Extracting archive
|
||||
mkdir output
|
||||
timeout 15 tar -xf archive.tar --delay-directory-restore -C output
|
||||
],
|
||||
[0],
|
||||
[Creating dirtree
|
||||
Creating archive
|
||||
Extracting archive
|
||||
],
|
||||
[],[],[],[gnu])
|
||||
|
||||
AT_CLEANUP
|
||||
@ -349,6 +349,7 @@ m4_include([extrac22.at])
|
||||
m4_include([extrac23.at])
|
||||
m4_include([extrac24.at])
|
||||
m4_include([extrac25.at])
|
||||
m4_include([extrac26.at])
|
||||
|
||||
m4_include([backup01.at])
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user