The below is a hack, but... well, that was already known. :)
Let's start with setting up a test dataset:
for f in 1_{dog_{yorkshire,"golden retriever"},cat_{persian,siamese}}.sh \
2_spider_tarantula.sh; do
echo "$f" >"$f"
done
We can then establish an environment variable per file, with an array of contents:
# encode name to be a valid shell variable
translate_name() {
local -a components
local val retval
IFS=_ read -r -a components <<<"$1"
for component in "${components[@]}"; do
val=$(printf '%s' "$component" | base64 - -)
val_eqs=${val//[!=]/}
val_eqs_count=${#val_eqs}
val_no_eqs=${val//=/}
printf -v retval '%s%s_%s__' "$retval" "$val_no_eqs" "$val_eqs_count"
done
printf '%s\n' "${retval%__}"
}
for f in *.sh; do
varname=$(translate_name "${f%.sh}")
mapfile -t "CONTENT_$varname" <"$f"
done
So, then -- let's say you want to walk a subtree.
You can list the array variables associated with that subtree:
get_subtree_vars() {
local subst varname
varname=CONTENT_$(IFS=_; translate_name "$*")
printf -v subst '"${!'"$varname"'@}"'
eval 'printf "%s\n" "'"$subst"'"'
}
...and convert them back to keys:
# given an encoded variable name, return its original name
# inverse of translate_name
get_name() {
local varname section
local -a sections
for varname; do
retval=
varname=${varname#CONTENT_}
varname=${varname//__/ }
IFS=' ' read -r -a sections <<<"$varname"
for section in "${sections[@]}"; do
val_eqs_count=${section##*_}
val_no_eqs=${section%_*}
val=$val_no_eqs
for (( i=0; i<val_eqs_count; i++ )); do
val+="="
done
retval+=$(base64 -D - - <<<"$val")_
done
printf '%s\n' "${retval%_}"
done
}
...and retrieve their values:
# given an encoded name, retrieve a NUL-delimited list of values stored
# this could be done much more safely with bash 4.3+ using namerefs
get_values() {
local name cmd
local -a values
for name; do
[[ $name = CONTENT_* ]] || name=CONTENT_$name
printf -v cmd 'values=( "${%q[@]}" )' "$name" && eval "$cmd"
printf '%s\0' "${values[@]}"
done
}
# given a name, call a function for each leaf value associated
call_for_each() {
local funcname=$1; shift
while IFS= read -u 3 -r subtree_var; do
while IFS= read -u 4 -r -d '' value; do
"$funcname" "$value"
done 4< <(get_values "$subtree_var")
done 3< <(get_subtree_vars "$@")
}
Thus:
printfunc() { printf '%q\n' "$@"; }
call_for_each printfunc 1 cat
...will emit:
1_cat_siamese.sh
1_cat_persian.sh
notably, these are the data, not the metadata -- note the .sh extensions, which we stripped from the variables on creation!
As another note: The eval
use in the code above should be safe from escape attempts (and thus shell injection attacks via malicious filenames) on account of the use of base64-encoding to sanitize any attempted shell escapes which might be present in filenames; the printf %q
use provides an additional layer. Be careful deploying the methods above in any scenario where these guarantees aren't present.
All that said -- by reading content into memory, the above is making things really unnecessarily complex. Consider as an alternative to the above example the following self-contained code:
get_subtree_files() {
local prefix
local -a files
prefix=$(IFS=_; printf '%s\n' "$*")
files=( "$prefix"* )
# note that the test only checks the first entry of the array
# ...but that's good enough to detect the no-matches case.
[[ -e $files ]] && printf '%s\0' "${files[@]}"
}
xargs -0 cat < <(get_subtree_files 1 cat)