diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2021-09-27 10:00:32 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-27 10:00:32 -0600 |
commit | ae7839bbe817329dd015f9195da308a0f3fbd3e2 (patch) | |
tree | ba710c468adef4718e3d1ed9747d2acdc830216b | |
parent | bpo-43914: Correctly highlight SyntaxError exceptions for invalid generator e... (diff) | |
download | cpython-ae7839bbe817329dd015f9195da308a0f3fbd3e2.tar.gz cpython-ae7839bbe817329dd015f9195da308a0f3fbd3e2.tar.bz2 cpython-ae7839bbe817329dd015f9195da308a0f3fbd3e2.zip |
bpo-45211: Move helpers from getpath.c to internal API. (gh-28550)
This accomplishes 2 things:
* consolidates some common code between getpath.c and getpathp.c
* makes the helpers available to code in other files
FWIW, the signature of the join_relfile() function (in fileutils.c) intentionally mirrors that of Windows' PathCchCombineEx().
Note that this change is mostly moving code around. No behavior is meant to change.
https://bugs.python.org/issue45211
-rw-r--r-- | Include/internal/pycore_fileutils.h | 15 | ||||
-rw-r--r-- | Include/internal/pycore_pystate.h | 11 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 2 | ||||
-rw-r--r-- | Modules/getpath.c | 64 | ||||
-rw-r--r-- | PC/getpathp.c | 55 | ||||
-rw-r--r-- | Python/fileutils.c | 102 | ||||
-rw-r--r-- | Python/initconfig.c | 5 | ||||
-rw-r--r-- | Python/preconfig.c | 10 |
8 files changed, 164 insertions, 100 deletions
diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index c1c9244a1bc..8491ed9b5ff 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -10,6 +10,12 @@ extern "C" { #include <locale.h> /* struct lconv */ +// This is used after getting NULL back from Py_DecodeLocale(). +#define DECODE_LOCALE_ERR(NAME, LEN) \ + ((LEN) == (size_t)-2) \ + ? _PyStatus_ERR("cannot decode " NAME) \ + : _PyStatus_NO_MEMORY() + PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors; PyAPI_FUNC(int) _Py_DecodeUTF8Ex( @@ -33,6 +39,9 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape( Py_ssize_t arglen, size_t *wlen); +extern int +_Py_wstat(const wchar_t *, struct stat *); + PyAPI_FUNC(int) _Py_GetForceASCII(void); /* Reset "force ASCII" mode (if it was initialized). @@ -65,6 +74,12 @@ extern int _Py_EncodeNonUnicodeWchar_InPlace( Py_ssize_t size); #endif +extern wchar_t * _Py_join_relfile(const wchar_t *dirname, + const wchar_t *relfile); +extern int _Py_add_relfile(wchar_t *dirname, + const wchar_t *relfile, + size_t bufsize); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 4b894f3eff4..aef318989aa 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -30,6 +30,17 @@ _Py_IsMainInterpreter(PyInterpreterState *interp) } +static inline const PyConfig * +_Py_GetMainConfig(void) +{ + PyInterpreterState *interp = _PyRuntime.interpreters.main; + if (interp == NULL) { + return NULL; + } + return _PyInterpreterState_GetConfig(interp); +} + + /* Only handle signals on the main thread of the main interpreter. */ static inline int _Py_ThreadCanHandleSignals(PyInterpreterState *interp) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index e1b466a7b56..cda814c3ed3 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -434,7 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'pathconfig_warnings': 1, '_init_main': 1, '_isolated_interpreter': 0, - 'use_frozen_modules': False, + 'use_frozen_modules': 0, } if MS_WINDOWS: CONFIG_COMPAT.update({ diff --git a/Modules/getpath.c b/Modules/getpath.c index 363d62a0657..de1c6e3fbb6 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -115,11 +115,6 @@ extern "C" { #define BUILD_LANDMARK L"Modules/Setup.local" -#define DECODE_LOCALE_ERR(NAME, LEN) \ - ((LEN) == (size_t)-2) \ - ? _PyStatus_ERR("cannot decode " NAME) \ - : _PyStatus_NO_MEMORY() - #define PATHLEN_ERR() _PyStatus_ERR("path configuration: path too long") typedef struct { @@ -149,23 +144,6 @@ static const wchar_t delimiter[2] = {DELIM, '\0'}; static const wchar_t separator[2] = {SEP, '\0'}; -/* Get file status. Encode the path to the locale encoding. */ -static int -_Py_wstat(const wchar_t* path, struct stat *buf) -{ - int err; - char *fname; - fname = _Py_EncodeLocaleRaw(path, NULL); - if (fname == NULL) { - errno = EINVAL; - return -1; - } - err = stat(fname, buf); - PyMem_RawFree(fname); - return err; -} - - static void reduce(wchar_t *dir) { @@ -235,28 +213,18 @@ isdir(const wchar_t *filename) static PyStatus joinpath(wchar_t *path, const wchar_t *path2, size_t path_len) { - size_t n; - if (!_Py_isabs(path2)) { - n = wcslen(path); - if (n >= path_len) { + if (_Py_isabs(path2)) { + if (wcslen(path2) >= path_len) { return PATHLEN_ERR(); } - - if (n > 0 && path[n-1] != SEP) { - path[n++] = SEP; - } + wcscpy(path, path2); } else { - n = 0; - } - - size_t k = wcslen(path2); - if (n + k >= path_len) { - return PATHLEN_ERR(); + if (_Py_add_relfile(path, path2, path_len) < 0) { + return PATHLEN_ERR(); + } + return _PyStatus_OK(); } - wcsncpy(path + n, path2, k); - path[n + k] = '\0'; - return _PyStatus_OK(); } @@ -283,23 +251,7 @@ joinpath2(const wchar_t *path, const wchar_t *path2) if (_Py_isabs(path2)) { return _PyMem_RawWcsdup(path2); } - - size_t len = wcslen(path); - int add_sep = (len > 0 && path[len - 1] != SEP); - len += add_sep; - len += wcslen(path2); - - wchar_t *new_path = PyMem_RawMalloc((len + 1) * sizeof(wchar_t)); - if (new_path == NULL) { - return NULL; - } - - wcscpy(new_path, path); - if (add_sep) { - wcscat(new_path, separator); - } - wcscat(new_path, path2); - return new_path; + return _Py_join_relfile(path, path2); } diff --git a/PC/getpathp.c b/PC/getpathp.c index 603a1eb13c4..38009465ae6 100644 --- a/PC/getpathp.c +++ b/PC/getpathp.c @@ -82,6 +82,7 @@ #include "Python.h" #include "pycore_initconfig.h" // PyStatus #include "pycore_pathconfig.h" // _PyPathConfig +#include "pycore_fileutils.h" // _Py_add_relfile() #include "osdefs.h" // SEP, ALTSEP #include <wchar.h> @@ -115,10 +116,6 @@ * with a semicolon separated path prior to calling Py_Initialize. */ -#ifndef LANDMARK -# define LANDMARK L"lib\\os.py" -#endif - #define INIT_ERR_BUFFER_OVERFLOW() _PyStatus_ERR("buffer overflow") @@ -216,7 +213,7 @@ exists(const wchar_t *filename) Assumes 'filename' MAXPATHLEN+1 bytes long - may extend 'filename' by one character. */ static int -ismodule(wchar_t *filename, int update_filename) +ismodule(wchar_t *filename) { size_t n; @@ -231,9 +228,8 @@ ismodule(wchar_t *filename, int update_filename) filename[n] = L'c'; filename[n + 1] = L'\0'; exist = exists(filename); - if (!update_filename) { - filename[n] = L'\0'; - } + // Drop the 'c' we just added. + filename[n] = L'\0'; return exist; } return 0; @@ -253,7 +249,7 @@ ismodule(wchar_t *filename, int update_filename) static void join(wchar_t *buffer, const wchar_t *stuff) { - if (FAILED(PathCchCombineEx(buffer, MAXPATHLEN+1, buffer, stuff, 0))) { + if (_Py_add_relfile(buffer, stuff, MAXPATHLEN+1) < 0) { Py_FatalError("buffer overflow in getpathp.c's join()"); } } @@ -273,30 +269,37 @@ canonicalize(wchar_t *buffer, const wchar_t *path) return _PyStatus_OK(); } - -/* gotlandmark only called by search_for_prefix, which ensures - 'prefix' is null terminated in bounds. join() ensures - 'landmark' can not overflow prefix if too long. */ static int -gotlandmark(const wchar_t *prefix, const wchar_t *landmark) +is_stdlibdir(wchar_t *stdlibdir) { - wchar_t filename[MAXPATHLEN+1]; - memset(filename, 0, sizeof(filename)); - wcscpy_s(filename, Py_ARRAY_LENGTH(filename), prefix); - join(filename, landmark); - return ismodule(filename, FALSE); + wchar_t *filename = stdlibdir; +#ifndef LANDMARK +# define LANDMARK L"os.py" +#endif + /* join() ensures 'landmark' can not overflow prefix if too long. */ + join(filename, LANDMARK); + return ismodule(filename); } - /* assumes argv0_path is MAXPATHLEN+1 bytes long, already \0 term'd. assumption provided by only caller, calculate_path() */ static int -search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path, const wchar_t *landmark) +search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path) { - /* Search from argv0_path, until landmark is found */ - wcscpy_s(prefix, MAXPATHLEN + 1, argv0_path); + /* Search from argv0_path, until LANDMARK is found. + We guarantee 'prefix' is null terminated in bounds. */ + wcscpy_s(prefix, MAXPATHLEN+1, argv0_path); + wchar_t stdlibdir[MAXPATHLEN+1]; + wcscpy_s(stdlibdir, Py_ARRAY_LENGTH(stdlibdir), prefix); + /* We initialize with the longest possible path, in case it doesn't fit. + This also gives us an initial SEP at stdlibdir[wcslen(prefix)]. */ + join(stdlibdir, L"lib"); do { - if (gotlandmark(prefix, landmark)) { + assert(stdlibdir[wcslen(prefix)] == SEP); + /* Due to reduce() and our initial value, this result + is guaranteed to fit. */ + wcscpy(&stdlibdir[wcslen(prefix) + 1], L"lib"); + if (is_stdlibdir(stdlibdir)) { return 1; } reduce(prefix); @@ -758,7 +761,7 @@ calculate_home_prefix(PyCalculatePath *calculate, reduce(prefix); calculate->home = prefix; } - else if (search_for_prefix(prefix, argv0_path, LANDMARK)) { + else if (search_for_prefix(prefix, argv0_path)) { calculate->home = prefix; } else { @@ -936,7 +939,7 @@ calculate_module_search_path(PyCalculatePath *calculate, lookBuf[nchars] = L'\0'; /* Up one level to the parent */ reduce(lookBuf); - if (search_for_prefix(prefix, lookBuf, LANDMARK)) { + if (search_for_prefix(prefix, lookBuf)) { break; } /* If we are out of paths to search - give up */ diff --git a/Python/fileutils.c b/Python/fileutils.c index 9e732ddca55..2492d0567d8 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -7,6 +7,7 @@ #ifdef MS_WINDOWS # include <malloc.h> # include <windows.h> +# include <pathcch.h> // PathCchCombineEx extern int winerror_to_errno(int); #endif @@ -1205,6 +1206,31 @@ _Py_fstat(int fd, struct _Py_stat_struct *status) return 0; } +/* Like _Py_stat() but with a raw filename. */ +int +_Py_wstat(const wchar_t* path, struct stat *buf) +{ + int err; +#ifdef MS_WINDOWS + struct _stat wstatbuf; + err = _wstat(path, &wstatbuf); + if (!err) { + buf->st_mode = wstatbuf.st_mode; + } +#else + char *fname; + fname = _Py_EncodeLocaleRaw(path, NULL); + if (fname == NULL) { + errno = EINVAL; + return -1; + } + err = stat(fname, buf); + PyMem_RawFree(fname); +#endif + return err; +} + + /* Call _wstat() on Windows, or encode the path to the filesystem encoding and call stat() otherwise. Only fill st_mode attribute on Windows. @@ -1216,7 +1242,6 @@ _Py_stat(PyObject *path, struct stat *statbuf) { #ifdef MS_WINDOWS int err; - struct _stat wstatbuf; #if USE_UNICODE_WCHAR_CACHE const wchar_t *wpath = _PyUnicode_AsUnicode(path); @@ -1226,9 +1251,7 @@ _Py_stat(PyObject *path, struct stat *statbuf) if (wpath == NULL) return -2; - err = _wstat(wpath, &wstatbuf); - if (!err) - statbuf->st_mode = wstatbuf.st_mode; + err = _Py_wstat(wpath, statbuf); #if !USE_UNICODE_WCHAR_CACHE PyMem_Free(wpath); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -2072,6 +2095,77 @@ _Py_abspath(const wchar_t *path, wchar_t **abspath_p) } +// The caller must ensure "buffer" is big enough. +static int +join_relfile(wchar_t *buffer, size_t bufsize, + const wchar_t *dirname, const wchar_t *relfile) +{ +#ifdef MS_WINDOWS + if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile, 0))) { + return -1; + } +#else + assert(!_Py_isabs(relfile)); + size_t dirlen = wcslen(dirname); + size_t rellen = wcslen(relfile); + size_t maxlen = bufsize - 1; + if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) { + return -1; + } + if (dirlen == 0) { + // We do not add a leading separator. + wcscpy(buffer, relfile); + } + else { + if (dirname != buffer) { + wcscpy(buffer, dirname); + } + size_t relstart = dirlen; + if (dirlen > 1 && dirname[dirlen - 1] != SEP) { + buffer[dirlen] = SEP; + relstart += 1; + } + wcscpy(&buffer[relstart], relfile); + } +#endif + return 0; +} + +/* Join the two paths together, like os.path.join(). Return NULL + if memory could not be allocated. The caller is responsible + for calling PyMem_RawFree() on the result. */ +wchar_t * +_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile) +{ + assert(dirname != NULL && relfile != NULL); + assert(!_Py_isabs(relfile)); + size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile); + size_t bufsize = maxlen + 1; + wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t)); + if (filename == NULL) { + return NULL; + } + assert(wcslen(dirname) < MAXPATHLEN); + assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname)); + join_relfile(filename, bufsize, dirname, relfile); + return filename; +} + +/* Join the two paths together, like os.path.join(). + dirname: the target buffer with the dirname already in place, + including trailing NUL + relfile: this must be a relative path + bufsize: total allocated size of the buffer + Return -1 if anything is wrong with the path lengths. */ +int +_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize) +{ + assert(dirname != NULL && relfile != NULL); + assert(bufsize > 0); + return join_relfile(dirname, bufsize, dirname, relfile); +} + + /* Get the current directory. buflen is the buffer size in wide characters including the null character. Decode the path from the locale encoding. diff --git a/Python/initconfig.c b/Python/initconfig.c index 8740cc1cf7a..40a5846f43b 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -587,11 +587,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv) /* --- PyConfig ---------------------------------------------- */ -#define DECODE_LOCALE_ERR(NAME, LEN) \ - (((LEN) == -2) \ - ? _PyStatus_ERR("cannot decode " NAME) \ - : _PyStatus_NO_MEMORY()) - #define MAX_HASH_SEED 4294967295UL diff --git a/Python/preconfig.c b/Python/preconfig.c index ae1cc3f90fc..d59273159a6 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -1,4 +1,5 @@ #include "Python.h" +#include "pycore_fileutils.h" // DECODE_LOCALE_ERR #include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_initconfig.h" // _PyArgv #include "pycore_pymem.h" // _PyMem_GetAllocatorName() @@ -6,12 +7,6 @@ #include <locale.h> // setlocale() -#define DECODE_LOCALE_ERR(NAME, LEN) \ - (((LEN) == -2) \ - ? _PyStatus_ERR("cannot decode " NAME) \ - : _PyStatus_NO_MEMORY()) - - /* Forward declarations */ static void preconfig_copy(PyPreConfig *config, const PyPreConfig *config2); @@ -87,8 +82,7 @@ _PyArgv_AsWstrList(const _PyArgv *args, PyWideStringList *list) wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len); if (arg == NULL) { _PyWideStringList_Clear(&wargv); - return DECODE_LOCALE_ERR("command line arguments", - (Py_ssize_t)len); + return DECODE_LOCALE_ERR("command line arguments", len); } wargv.items[i] = arg; wargv.length++; |