From c5ae2254182b803618a4b01c12fa88c42642e806 Mon Sep 17 00:00:00 2001 From: Brendan Hoar Date: Fri, 26 Apr 2024 07:55:39 -0400 Subject: [PATCH] Better handling of embeddings with two rare, but not unusual, files in them I have encountered pickled embeddings with a short byteorder file at the top-level, as well as a .data/serialization_id file. Both load fine after allowing these files in the dataset. I do not think it is likely adding them to the safe unpickle regular expression would be a security risk, but that's for the maintainers to decide. --- modules/safe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/safe.py b/modules/safe.py index b1d08a792..ee8789268 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -65,7 +65,7 @@ class RestrictedUnpickler(pickle.Unpickler): # Regular expression that accepts 'dirname/version', 'dirname/data.pkl', and 'dirname/data/' -allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|(data\.pkl))$") +allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|byteorder|(\.data\/serialization_id)|version|(data\.pkl))$") data_pkl_re = re.compile(r"^([^/]+)/data\.pkl$") def check_zip_filenames(filename, names):