|
|
sfeed_update: use xargs -P -0 - sfeed - RSS and Atom parser |
|
|
 |
git clone git://git.codemadness.org/sfeed (git://git.codemadness.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
 |
commit cdb8f7feb135adf6f18e389b4bbf47886089474a |
|
|
 |
parent 62bfed65ca91c34ea24b81b191c23d4542a7075b |
|
|
 |
Author: Hiltjo Posthuma <hiltjo@codemadness.org> (mailto://) |
application/vnd.lotus-organizer |
|
|
Date: Tue, 26 Dec 2023 15:59:39 +0100 |
|
|
|
|
|
|
|
sfeed_update: use xargs -P -0 |
|
|
|
|
|
|
|
Some of the options, like -P are as of writing (2023) non-POSIX: |
|
|
|
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/xargs.html. However |
|
|
|
many systems support this useful extension for many years now. |
|
|
|
|
|
|
|
Some historic context: |
|
|
|
|
|
|
|
The xargs -0 option was added on 1996-06-11, about a year after the NetBSD |
|
|
|
import (over 27 years ago at the time of writing): |
|
|
|
|
|
|
|
http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.2&content-type=text/x-cvsweb-markup |
|
|
|
|
|
|
|
On OpenBSD the xargs -P option was added on 2003-12-06 by syncing the FreeBSD |
|
|
|
code: |
|
|
|
|
|
|
|
http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/xargs/xargs.c?rev=1.14&content-type=text/x-cvsweb-markup |
|
|
|
|
|
|
|
Looking at the imported git history log of GNU findutils (which has xargs), the |
|
|
|
very first commit already had the -0 and -P option on Sun Feb 4 20:35:16 1996 |
|
|
|
+0000. |
|
|
|
|
|
|
|
Tested on many systems, old and new, some notable: |
|
|
|
|
|
|
|
- OpenBSD 7.4 |
|
|
|
- Void Linux |
|
|
|
- FreeBSD 12 |
|
|
|
- NetBSD 9.3 |
|
|
|
- HaikuOS (uses GNU tools). |
|
|
|
- Slackware 11 |
|
|
|
- OpenBSD 3.8 |
|
|
|
- NetBSD 5.1 |
|
|
|
|
|
|
|
Some shells: |
|
|
|
- oksh |
|
|
|
- bash |
|
|
|
- dash |
|
|
|
- zsh |
|
|
|
|
|
|
|
During testing there are some incompatibilities found in parsing the fields so |
|
|
|
the arguments are passed as one argument which is split later on by the child |
|
|
|
program. |
|
|
|
|
|
|
|
Diffstat: |
|
|
|
M sfeed_update | 48 +++++++++++++++++++++----------- |
|
|
|
|
|
|
|
1 file changed, 32 insertions(+), 16 deletions(-) |
|
|
|
--- |
|
|
 |
diff --git a/sfeed_update b/sfeed_update |
|
|
|
@@ -163,14 +163,12 @@ _feed() { |
|
|
|
# fetch and process a feed in parallel. |
|
|
|
# feed(name, feedurl, [basesiteurl], [encoding]) |
|
|
|
feed() { |
|
|
|
- # wait until ${maxjobs} are finished: will stall the queue if an item |
|
|
|
- # is slow, but it is portable. |
|
|
|
- [ ${signo} -ne 0 ] && return |
|
|
|
- [ $((curjobs % maxjobs)) -eq 0 ] && wait |
|
|
|
- [ ${signo} -ne 0 ] && return |
|
|
|
- curjobs=$((curjobs + 1)) |
|
|
|
- |
|
|
|
- _feed "$@" & |
|
|
|
+ # Job parameters for xargs. |
|
|
|
+ # Specify fields as a single parameter separated by the NUL separator. |
|
|
|
+ # These fields are split later by the child process, this allows xargs |
|
|
|
+ # with empty fields across many implementations. |
|
|
|
+ printf '%s\037%s\037%s\037%s\037%s\037%s\0' \ |
|
|
|
+ "${config}" "${sfeedtmpdir}" "$1" "$2" "$3" "$4" |
|
|
|
} |
|
|
|
|
|
|
|
cleanup() { |
|
|
|
@@ -201,8 +199,6 @@ feeds() { |
|
|
|
} |
|
|
|
|
|
|
|
main() { |
|
|
|
- # job counter. |
|
|
|
- curjobs=0 |
|
|
|
# signal number received for parent. |
|
|
|
signo=0 |
|
|
|
# SIGINT: signal to interrupt parent. |
|
|
|
@@ -217,16 +213,36 @@ main() { |
|
|
|
touch "${sfeedtmpdir}/ok" || die |
|
|
|
# make sure path exists. |
|
|
|
mkdir -p "${sfeedpath}" |
|
|
|
- # fetch feeds specified in config file. |
|
|
|
- feeds |
|
|
|
- # wait till all feeds are fetched (concurrently). |
|
|
|
- [ ${signo} -eq 0 ] && wait |
|
|
|
- # check error exit status indicator for parallel jobs. |
|
|
|
- [ -f "${sfeedtmpdir}/ok" ] |
|
|
|
+ |
|
|
|
+ # print feeds for parallel processing with xargs. |
|
|
|
+ feeds > "${sfeedtmpdir}/jobs" || die |
|
|
|
+ SFEED_UPDATE_CHILD="1" xargs -s 65535 -x -0 -P "${maxjobs}" -n 1 \ |
|
|
|
+ "$(readlink -f "${argv0}")" < "${sfeedtmpdir}/jobs" |
|
|
|
statuscode=$? |
|
|
|
+ |
|
|
|
+ # check error exit status indicator for parallel jobs. |
|
|
|
+ [ -f "${sfeedtmpdir}/ok" ] || statuscode=1 |
|
|
|
# on signal SIGINT and SIGTERM exit with signal number + 128. |
|
|
|
[ ${signo} -ne 0 ] && die $((signo+128)) |
|
|
|
die ${statuscode} |
|
|
|
} |
|
|
|
|
|
|
|
+# process a single feed. |
|
|
|
+# parameters are: config, tmpdir, name, feedurl, basesiteurl, encoding |
|
|
|
+if [ "${SFEED_UPDATE_CHILD}" = "1" ]; then |
|
|
|
+ IFS="" # "\037" |
|
|
|
+ [ "$1" = "" ] && exit 0 # must have an argument set |
|
|
|
+ printf '%s\n' "$1" | \ |
|
|
|
+ while read -r config tmpdir name feedurl basesiteurl encoding; do |
|
|
|
+ # load config file, sets $config. |
|
|
|
+ loadconfig "${config}" |
|
|
|
+ sfeedtmpdir="${tmpdir}" |
|
|
|
+ _feed "${name}" "${feedurl}" "${basesiteurl}" "${encoding}" |
|
|
|
+ exit "$?" |
|
|
|
+ done |
|
|
|
+ exit 0 |
|
|
|
+fi |
|
|
|
+ |
|
|
|
+# ...else parent mode: |
|
|
|
+argv0="$0" # remember $0, in shells like zsh $0 is the function name. |
|
|
|
[ "${SFEED_UPDATE_INCLUDE}" = "1" ] || main "$@" |
|