summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKerin Millar <kfm@plushkava.net>2024-08-10 09:34:25 +0100
committerSam James <sam@gentoo.org>2024-08-11 11:11:05 +0100
commit866af9c1fe47529bdefa02cd0e3eccfea2bebd2b (patch)
tree3e835e6eb8306eb2fd2805a3fff7671fa073b594
parentRemedy false positives in categories SC2034 and SC2154 (diff)
downloadgentoo-functions-866af9c1fe47529bdefa02cd0e3eccfea2bebd2b.tar.gz
gentoo-functions-866af9c1fe47529bdefa02cd0e3eccfea2bebd2b.tar.bz2
gentoo-functions-866af9c1fe47529bdefa02cd0e3eccfea2bebd2b.zip
Render the non-bash srandom() implementation faster
Presently, there are three implementations of srandom(), one of which is the preferred implementation for shells other than bash. It is a little on the slow side as it has to fork and execute both od(1) and tr(1) every time, just to read 4 bytes. Accelerate it by having the shell maintain its own entropy pool of up to 512 hex digits in size. Consider the following benchmark. i=0; while [ $((i += 1)) -le 30000 ]; do srandom; done >/dev/null As conducted with dash on a system with a 2nd generation Intel Xeon, I obtained the following figures. BEFORE real 0m49.878s use 1m1.985s sys 0m17.035s AFTER real 0m12.866s user 0m12.559s sys 0m0.962s It should be noted that the optimised routine will only be utilised in cases where the kernel is Linux and the shell has not forked itself. $ uname Linux $ srandom # uses the fast path $ number=$(srandom) # subshell; probably uses the slow path $ srandom | { read -r number; } # ditto Still, there are conceivable use cases for which this optimisation may prove useful. Below is an example in which it is known in advance that up to 100 random numbers are required, and where writing them to temporary storage is not considered to be a risk. i=0 tmpfile=${TMPDIR:-/tmp}/random-numbers.$$.$(srandom) while [ $((i += 1)) -le 100 ]; do srandom done > "$tmpfile" while read -r number; do do_something_with "$number" done < "$tmpfile" Signed-off-by: Kerin Millar <kfm@plushkava.net> Signed-off-by: Sam James <sam@gentoo.org>
-rw-r--r--functions.sh69
-rwxr-xr-xtest-functions44
2 files changed, 109 insertions, 4 deletions
diff --git a/functions.sh b/functions.sh
index 38eef62..733e4e9 100644
--- a/functions.sh
+++ b/functions.sh
@@ -18,7 +18,7 @@
# BASH : whether bash-specific features may be employed
# BASH_VERSINFO : whether bash-specific features may be employed
-# BASHPID : may be used by _update_columns() to detect subshells
+# BASHPID : may be used by _update_columns() and _update_pid()
# COLUMNS : may be used by _update_columns() to get the column count
# EPOCHREALTIME : potentially used by _update_time() to get the time
# GENFUN_MODULES : which of the optional function collections must be sourced
@@ -587,12 +587,35 @@ srandom()
printf '%d\n' "0x${hex}"
}
elif [ -c /dev/urandom ]; then
+ unset -v genfun_entropy
+
srandom()
{
local hex
- hex=$(LC_ALL=C od -vAn -N4 -tx1 /dev/urandom | tr -d '[:space:]')
- [ "${hex}" ] && printf '%d\n' "$(( 0x${hex} >> 1 ))"
+ # If the shell has forked itself, collect 4 bytes worth
+ # of entropy.
+ if ! _update_pid || [ "$$" != "${genfun_pid}" ]; then
+ hex=$(LC_ALL=C od -vAn -N4 -tx1 /dev/urandom | tr -d '[:space:]')
+ test "${#hex}" -eq 8 && printf '%d\n' "$(( 0x${hex} >> 1 ))"
+ return
+ fi
+
+ # Otherwise, employ a faster method whereby the shell
+ # maintains an entropy pool of up to 512 hex digits in
+ # size.
+ if [ "${#genfun_entropy}" -lt 8 ]; then
+ genfun_entropy=$(
+ LC_ALL=C od -vAn -N256 -tx1 /dev/urandom | tr -d '[:space:]'
+ )
+ fi
+ if [ "${#genfun_entropy}" -lt 8 ]; then
+ false
+ else
+ hex=${genfun_entropy}
+ genfun_entropy=${genfun_entropy%????????}
+ printf '%d\n' "$(( 0x${hex#"$genfun_entropy"} >> 1 ))"
+ fi
}
else
warn "srandom: /dev/urandom doesn't exist as a character device"
@@ -849,6 +872,46 @@ _update_columns()
}
#
+# Determines the PID of the current shell process. Upon success, the PID shall
+# be assigned to genfun_pid. Otherwise, the return value shall be greater than
+# 0. The obtained PID value will differ from the value of $$ under certain
+# circumstances, such as where a shell forks itself to create a subshell.
+#
+_update_pid()
+{
+ if [ "${BASH}" ]; then
+ _update_pid()
+ {
+ # shellcheck disable=3028
+ genfun_pid=${BASHPID}
+ }
+ elif [ -d /proc/self/task ]; then
+ # This method relies on the proc_pid_task(5) interface of Linux.
+ _update_pid()
+ {
+ local dir tid
+
+ for dir in /proc/self/task/*/; do
+ if [ "${tid}" ] || [ ! -e "${dir}" ]; then
+ return 1
+ else
+ dir=${dir%/}
+ tid=${dir##*/}
+ fi
+ done
+ genfun_pid=${tid}
+ }
+ else
+ _update_pid()
+ {
+ false
+ }
+ fi
+
+ _update_pid
+}
+
+#
# Determines either the number of centiseconds elapsed since the unix epoch or
# the number of centiseconds that the operating system has been online,
# depending on the capabilities of the shell and/or platform. Upon success, the
diff --git a/test-functions b/test-functions
index fe26905..51f3adf 100755
--- a/test-functions
+++ b/test-functions
@@ -454,6 +454,44 @@ test_srandom() {
iterate_tests 2 "$@"
}
+test_srandom_forked()
+{
+ set -- \
+ eq 0 unforked \
+ eq 0 forking
+
+ callback() {
+ local mode number
+
+ shift
+ mode=$1
+ set --
+ test_description="srandom equality where $mode"
+ if [ "${mode}" = "forking" ]; then
+ srandom
+ ( srandom )
+ srandom
+ ( srandom )
+ else
+ srandom
+ srandom
+ srandom
+ srandom
+ fi > random_numbers || return
+ while read -r number; do
+ set -- "$@" "${number}"
+ done < random_numbers
+ test_description="srandom equality where $mode ($*)"
+ if [ "${mode}" = "forking" ]; then
+ test "$#" -eq 4 && test "$1" -ne "$2" && test "$3" -ne "$4"
+ else
+ test "$#" -eq 4 && ! trueof_all test "$1" -eq -- "$@"
+ fi
+ }
+
+ iterate_tests 3 "$@"
+}
+
test_newest() {
set -- \
ge 1 non-existent non-existent \
@@ -1212,7 +1250,11 @@ else
test_yesno || rc=1
test_die || rc=1
test_edo || rc=1
- test_srandom || rc=1
+ if ! test_srandom; then
+ rc=1
+ else
+ test_srandom_forked || rc=1
+ fi
test_newest || rc=1
test_oldest || rc=1
test_trim || rc=1