From e0033236020a587f430e9595be867ce1e9fea799 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Tue, 21 Oct 2025 14:29:27 +0200 Subject: [PATCH] New program 'po-fetch'. * gettext-tools/misc/po-fetch.in: New file. * gettext-tools/misc/Makefile.am: New file. * gettext-tools/configure.ac: Add AC_CONFIG_FILES invocations for the misc/ directory. * gettext-tools/Makefile.am (SUBDIRS): Add misc. (EXTRA_DIST): Remove misc/disclaim-translations.txt. (pkgdata_DATA): Remove variable. * gettext-tools/doc/po-fetch.texi: New file. * gettext-tools/doc/Makefile.am (gettext_TEXINFOS): Add it. * gettext-tools/doc/gettext.texi (po-fetch): Add global menu entry. (po-fetch Invocation): New section. * PACKAGING: Mention the installed 'po-fetch' program. * NEWS: Mention 'po-fetch'. --- .gitignore | 3 + NEWS | 4 + PACKAGING | 1 + gettext-tools/Makefile.am | 8 +- gettext-tools/configure.ac | 3 + gettext-tools/doc/Makefile.am | 1 + gettext-tools/doc/gettext.texi | 8 + gettext-tools/doc/po-fetch.texi | 51 +++ gettext-tools/misc/Makefile.am | 36 ++ gettext-tools/misc/po-fetch.in | 690 ++++++++++++++++++++++++++++++++ 10 files changed, 799 insertions(+), 6 deletions(-) create mode 100644 gettext-tools/doc/po-fetch.texi create mode 100644 gettext-tools/misc/Makefile.am create mode 100644 gettext-tools/misc/po-fetch.in diff --git a/.gitignore b/.gitignore index 93842d4de..48954eae7 100644 --- a/.gitignore +++ b/.gitignore @@ -454,6 +454,7 @@ /gettext-tools/libgrep/Makefile.in /gettext-tools/m4/Makefile.in /gettext-tools/man/Makefile.in +/gettext-tools/misc/Makefile.in /gettext-tools/projects/Makefile.in /gettext-tools/src/Makefile.in /gettext-tools/styles/Makefile.in @@ -673,6 +674,8 @@ autom4te.cache/ /gettext-tools/m4/Makefile /gettext-tools/man/Makefile /gettext-tools/man/x-to-1 +/gettext-tools/misc/Makefile +/gettext-tools/misc/po-fetch /gettext-tools/po/Makefile.in /gettext-tools/po/Makefile /gettext-tools/projects/Makefile diff --git a/NEWS b/NEWS index 3b192432f..2691f41ee 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,10 @@ Version 1.0 - October 2025 going through a translation project) now need to run "msginit" before starting work on a PO file. + * A new program 'po-fetch' is provided, that fetches the translated + PO files from a translation project's site on the internet, and + updates the LINGUAS file accordingly. + # Improvements for translators: * msginit: - When the PO file already exists, 'msginit' now updates it w.r.t. the diff --git a/PACKAGING b/PACKAGING index a520407e6..beac11ddd 100644 --- a/PACKAGING +++ b/PACKAGING @@ -137,6 +137,7 @@ the following file list. $prefix/bin/xgettext $prefix/bin/gettextize $prefix/bin/autopoint + $prefix/bin/po-fetch $prefix/bin/recode* $prefix/share/man/man1/msg*.1 $prefix/share/man/man1/xgettext.1 diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am index 3a78b68f8..11214dcfc 100644 --- a/gettext-tools/Makefile.am +++ b/gettext-tools/Makefile.am @@ -19,9 +19,9 @@ AUTOMAKE_OPTIONS = 1.5 gnu ACLOCAL_AMFLAGS = -I m4 -I ../gettext-runtime/m4 -I ../m4 -I gnulib-m4 -I libgrep/gnulib-m4 -I libgettextpo/gnulib-m4 -I tests/gnulib-m4 -SUBDIRS = gnulib-lib libgrep src libgettextpo po its projects styles wizard autotools emacs man m4 tests system-tests gnulib-tests install-tests examples doc +SUBDIRS = gnulib-lib libgrep src libgettextpo po its projects styles wizard autotools misc emacs man m4 tests system-tests gnulib-tests install-tests examples doc -EXTRA_DIST = misc/disclaim-translations.txt +EXTRA_DIST = MOSTLYCLEANFILES = core *.stackdump DISTCLEANFILES = build-aux/xml-validate-10 build-aux/xml-validate-11 @@ -117,10 +117,6 @@ EXTRA_DIST += \ tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/tree_sitter/array.h \ tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/tree_sitter/parser.h -# Files installed for the user. - -pkgdata_DATA = misc/disclaim-translations.txt - # Files installed for use by gettextize. diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index 67ce19fb8..8c3617923 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -754,6 +754,9 @@ AC_CONFIG_FILES([autotools/Makefile]) AC_CONFIG_FILES([autotools/autopoint], [chmod a+x autotools/autopoint]) AC_CONFIG_FILES([autotools/convert-archive], [chmod a+x autotools/convert-archive]) +AC_CONFIG_FILES([misc/Makefile]) +AC_CONFIG_FILES([misc/po-fetch], [chmod a+x misc/po-fetch]) + AC_CONFIG_FILES([emacs/Makefile]) AC_CONFIG_FILES([man/Makefile]) diff --git a/gettext-tools/doc/Makefile.am b/gettext-tools/doc/Makefile.am index 9162a8f12..9a4cef942 100644 --- a/gettext-tools/doc/Makefile.am +++ b/gettext-tools/doc/Makefile.am @@ -59,6 +59,7 @@ gettext_TEXINFOS = \ msgunfmt.texi \ gettextize.texi \ autopoint.texi \ + po-fetch.texi \ lang-c.texi \ lang-python.texi \ lang-java.texi \ diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index e0fa330ff..d86a24e7a 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -80,6 +80,7 @@ * msgunfmt: (gettext)msgunfmt Invocation. Uncompile MO file into PO file. * msguniq: (gettext)msguniq Invocation. Unify duplicates for PO file. * ngettext: (gettext)ngettext Invocation. Translate a message with plural. +* po-fetch: (gettext)po-fetch Invocation. Fetches a set of PO files. * printf_gettext: (gettext)printf_gettext Invocation. Translate a format string. * printf_ngettext: (gettext)printf_ngettext Invocation. Translate a format string with plural. * xgettext: (gettext)xgettext Invocation. Extract strings into a PO file. @@ -395,6 +396,7 @@ Integrating with Version Control Systems * Files under Version Control:: Files to put under version control * Translations under Version Control:: Put PO Files under Version Control * autopoint Invocation:: Invoking the @code{autopoint} Program +* po-fetch Invocation:: Invoking the @code{po-fetch} Program Other Programming Languages @@ -9834,6 +9836,7 @@ version controlled files. * Files under Version Control:: Files to put under version control * Translations under Version Control:: Put PO Files under Version Control * autopoint Invocation:: Invoking the @code{autopoint} Program +* po-fetch Invocation:: Invoking the @code{po-fetch} Program @end menu @node Distributed Development @@ -10002,6 +10005,11 @@ recover the location comments by running @code{msgmerge} again. @include autopoint.texi +@node po-fetch Invocation +@subsection Invoking the @code{po-fetch} Program + +@include po-fetch.texi + @node Release Management @section Creating a Distribution Tarball diff --git a/gettext-tools/doc/po-fetch.texi b/gettext-tools/doc/po-fetch.texi new file mode 100644 index 000000000..e5b204dd3 --- /dev/null +++ b/gettext-tools/doc/po-fetch.texi @@ -0,0 +1,51 @@ +@c This file is part of the GNU gettext manual. +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c See the file gettext.texi for copying conditions. + +@pindex po-fetch +@cindex @code{po-fetch} program, usage + +@example +po-fetch [@var{option}...] TP @var{domain} +po-fetch [@var{option}...] Weblate @var{base-url} @var{project} @var{component} +po-fetch [@var{option}...] git @var{base-url} @var{branch} @var{subdir} +@end example + +The @code{po-fetch} program fetches a set of PO files, produced by translators, +from a translation project's site on the internet, +and updates the @code{LINGUAS} file accordingly. + +The first argument indicates the kind of translation project: + +@table @samp +@item TP +denotes the Translation Project (@code{translationproject.org}). +@item Weblate +denotes a Weblate instance. +@item git +denotes a web front-end to a @code{git} repository +(such as a @code{gitweb} instance, a @code{cgit} instance, +or a GitLab or GitHub or Forgejo instance). +@end table + +@subsubsection Options + +@table @samp +@item --git +@opindex --git@r{, @code{po-fetch} option} +Make changes in the @code{git} index, to prepare for a @code{git} commit. + +@end table + +@subsubsection Informative output + +@table @samp +@item --help +@opindex --help@r{, @code{po-fetch} option} +Display this help and exit. + +@item --version +@opindex --version@r{, @code{po-fetch} option} +Output version information and exit. + +@end table diff --git a/gettext-tools/misc/Makefile.am b/gettext-tools/misc/Makefile.am new file mode 100644 index 000000000..881cf130f --- /dev/null +++ b/gettext-tools/misc/Makefile.am @@ -0,0 +1,36 @@ +## Makefile for the gettext-tools/misc subdirectory of GNU gettext +## Copyright (C) 2025 Free Software Foundation, Inc. +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = 1.2 gnits +EXTRA_DIST = +MOSTLYCLEANFILES = +CLEANFILES = +DISTCLEANFILES = + + +# Scripts for maintainers. + +bin_SCRIPTS = po-fetch + +DISTCLEANFILES += po-fetch + + +# Files installed for the user. + +pkgdata_DATA = disclaim-translations.txt +EXTRA_DIST += disclaim-translations.txt diff --git a/gettext-tools/misc/po-fetch.in b/gettext-tools/misc/po-fetch.in new file mode 100644 index 000000000..4ba16f3b3 --- /dev/null +++ b/gettext-tools/misc/po-fetch.in @@ -0,0 +1,690 @@ +#! /bin/sh +# +# Copyright (C) 2002-2025 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# This program fetches a set of PO files, produced by translators, from +# a translation project's site on the internet, and updates the LINGUAS +# file accordingly. +# +# It uses GNU wget. Alternatives would be: +# - GNU wget2 +# - curl +# - wcurl +# but each alternative would require separate testing. + +progname=$0 +package=@PACKAGE@ +version=@VERSION@ + +# func_exit STATUS +# exits with a given status. +# This function needs to be used, rather than 'exit', when a 'trap' handler is +# in effect that refers to $?. +func_exit () +{ + (exit $1); exit $1 +} + +# func_tmpdir +# creates a temporary directory. +# Input: +# - progname name of this program +# Sets variable +# - tmp pathname of freshly created temporary directory +func_tmpdir () +{ + # Use the environment variable TMPDIR, falling back to /tmp. This allows + # users to specify a different temporary directory, for example, if their + # /tmp is filled up or too small. + : "${TMPDIR=/tmp}" + { + # Use the mktemp program if available. If not available, hide the error + # message. + tmp=`(umask 077 && mktemp -d "$TMPDIR/gtXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" + } || + { + # Use a simple mkdir command. It is guaranteed to fail if the directory + # already exists. $RANDOM is bash specific and expands to empty in shells + # other than bash, ksh and zsh. Its use does not increase security; + # rather, it minimizes the probability of failure in a very cluttered /tmp + # directory. + tmp=$TMPDIR/gt$$-$RANDOM + (umask 077 && mkdir "$tmp") + } || + { + echo "$progname: cannot create a temporary directory in $TMPDIR" >&2 + func_exit 1 + } +} + +# func_usage +# outputs to stdout the --help usage message. +func_usage () +{ + echo "\ +Usage: po-fetch [OPTION...] TP DOMAIN + po-fetch [OPTION...] Weblate BASE-URL PROJECT COMPONENT + po-fetch [OPTION...] git BASE-URL BRANCH SUBDIR + +Fetches a set of PO files, produced by translators, from a translation +project's site on the internet, and updates the LINGUAS file accordingly. + +The first argument indicates the kind of translation project: + + TP denotes the Translation Project (translationproject.org). + Weblate denotes a Weblate instance. + git denotes a web front-end to a git repository + (such as a gitweb instance, a cgit instance, or a GitLab + or GitHub or Forgejo instance). + +Options: + --git Make changes in the git index, to prepare for a git commit. + +Informative output: + + --help Show this help text. + --version Show version and authorship information. + +Report bugs in the bug tracker at +or by email to ." +} + +# func_version +# outputs to stdout the --version message. +func_version () +{ + echo "$progname (GNU $package) $version" + echo "Copyright (C) 2002-2025 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law." + echo "Written by" "Bruno Haible" +} + +# func_fetch_TP DOMAIN +# fetches a set of PO files from the Translation Project. +# Input: +# - tmp an empty temporary directory +func_fetch_TP () +{ + domain="$1" + main_url="https://translationproject.org/latest/${domain}/" + # The contents of this URL is an HTML page with hyperlinks to the PO files. + # We use 'wget' as an HTML parsing engine. + # Options '--recursive --level=1' tell wget to download the HTML page and the + # files referenced by the hyperlinks. + # Options '--no-host-directories --cut-dirs=2' tell wget to not create deeply + # nested subdirectories. The option '--no-directories' does not work as well, + # because it causes modified PO files to be downloaded as *.po.1, *.po.2, etc. + # which is not what we want. + # Option '--accept=po' tells wget to create only files named *.po and throw + # away all other downloaded data. + # Option '--directory-prefix=...' tells wget to store the resulting files in + # the specified subdirectory. We use a temporary directory, so that when a + # translation disappears (for example, when a translation team was renamed), + # we will actually remove that PO file. + # Option '--no-verbose' avoids too much chatter. + # Option '--https-only' is a safety measure, to prevent downloading PO files + # from insecure 'http' URLs. + wget --recursive --level=1 \ + --no-host-directories --cut-dirs=2 \ + --accept=po \ + --directory-prefix="$tmp" \ + --no-verbose \ + --https-only \ + "${main_url}" \ + || func_exit 1 +} + +# func_fetch_weblate BASE-URL PROJECT COMPONENT +# fetches a set of PO files from a Weblate instance. +# Input: +# - tmp an empty temporary directory +func_fetch_weblate () +{ + base_url="$1" + project="$2" + component="$3" + # Ensure $base_url ends in a slash. + case "$base_url" in + */) ;; + *) base_url="${base_url}/" ;; + esac + webui_url="${base_url}projects/${project}/${component}/" + # The contents of this URL is an HTML page with a menu entry + # "Files > Download translation files as ZIP file". + zip_url="${base_url}download/${project}/${component}/?format=zip" + # Download this file. + # (Alternatively, we could use the Weblate REST API + # , + # to download the PO files one by one.) + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.zip --no-verbose "$zip_url" \ + || func_exit 1 + # Unpack it. It contains a subdirectory with a .pot file and some .po files. + (cd "$tmp" && unzip -x -n -j _all_.zip) \ + || func_exit 1 + rm -f "$tmp"/_all_.zip + # Remove all non-PO files. + (cd "$tmp" \ + && for file in *; do \ + case "$file" in \ + *.po) ;; \ + *) rm -f "$file" ;; \ + esac; \ + done \ + ) + # Some projects use a "File mask" of "SOME_PREFIX.*.po" rather than "*.po". + # Rename the PO files so that they fit the usual naming scheme. + # Also, make all of the PO files world-readable. + (cd "$tmp" \ + && sed_trim='s/^.*\.\(.*\.po\)/\1/' \ + && for file in *.po; do \ + chmod a+r "$file"; \ + case "$file" in \ + *.*.po) g=`echo "$file" | sed -e "$sed_trim"`; mv "$file" "$g";; \ + esac; \ + done \ + ) +} + +# func_fetch_gitweb BASE-URL BRANCH SUBDIR +# fetches a set of PO files from a git repository via the gitweb protocol. +# Input: +# - tmp an empty temporary directory +func_fetch_gitweb () +{ + base_url="$1" + branch="$2" + subdir="$3" + # Validate the base_url. + case "$base_url" in + *"?p="*) ;; + *) + echo "gitweb URL is not valid (missing 'p' parameter)" 1>&2 + func_exit 1 + ;; + esac + webui_url="${base_url};a=tree;hb=refs/heads/${branch}" + if test -n "$subdir"; then + webui_url="${webui_url};f=${subdir}" + fi + # The contents of this URL is an HTML page with a list of files. + # Download it. + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.html --no-verbose "$webui_url" \ + || func_exit 1 + # For each file, there is an HTML element + # FILENAME + sed -n -e 's/^.*\([^<>]*\)<[/]a>.*$/\1/p' < "$tmp"/_all_.html \ + | while read filename; do + case "$filename" in + *.po) + if test -n "$subdir"; then + subdir_filename="$subdir/$filename" + else + subdir_filename="$filename" + fi + file_url="${base_url};a=blob_plain;hb=refs/heads/${branch};f=${subdir_filename}" + wget -O "$tmp/$filename" --no-verbose "$file_url" \ + || func_exit 1 + ;; + esac + done + rm -f "$tmp"/_all_.html +} + +# func_fetch_cgit BASE-URL BRANCH SUBDIR +# fetches a set of PO files from a git repository via the cgit protocol. +# Input: +# - tmp an empty temporary directory +func_fetch_cgit () +{ + base_url="$1" + branch="$2" + subdir="$3" + # Validate the base_url. + case "$base_url" in + *.git) ;; + *) + echo "cgit URL is not valid (missing '.git' suffix)" 1>&2 + func_exit 1 + ;; + esac + webui_url="${base_url}/tree" + if test -n "$subdir"; then + webui_url="${webui_url}/${subdir}" + fi + webui_url="${webui_url}?h=${branch}" + # The contents of this URL is an HTML page with a list of files. + # Download it. + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.html --no-verbose "$webui_url" \ + || func_exit 1 + # For each file, there is an HTML element + # ]*>\([^<>]*\)<[/]a>.*$/\1/p' < "$tmp"/_all_.html \ + | while read filename; do + case "$filename" in + *.po) + if test -n "$subdir"; then + subdir_filename="$subdir/$filename" + else + subdir_filename="$filename" + fi + file_url="${base_url}/plain/${subdir_filename}?h=${branch}" + wget -O "$tmp/$filename" --no-verbose "$file_url" \ + || func_exit 1 + ;; + esac + done + rm -f "$tmp"/_all_.html +} + +# func_fetch_gitlab BASE-URL BRANCH SUBDIR +# fetches a set of PO files from a git repository in a GitLab instance +# browsable through some web UI. +# Input: +# - tmp an empty temporary directory +func_fetch_gitlab () +{ + base_url="$1" + branch="$2" + subdir="$3" + # Ensure $base_url ends in a slash. + case "$base_url" in + */) ;; + *) base_url="${base_url}/" ;; + esac + webui_url="${base_url}-/tree/${branch}" + if test -n "$subdir"; then + webui_url="${webui_url}/${subdir}" + fi + # The contents of this URL is an HTML page. But it does not contain the list + # of files. Instead, the web request that returns the list of files is + filelist_url="${base_url}-/refs/${branch}/logs_tree" + if test -n "$subdir"; then + filelist_url="${filelist_url}/${subdir}" + fi + filelist_url="${filelist_url}?format=json&offset=0" + # Download it. + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.json --no-verbose "$filelist_url" \ + || func_exit 1 + # For each file, there is a JSON array element {"file_name":"FILENAME",...}. + sed_add_newlines='s/},{"file_name":/},\ +{"file_name":/g' + sed_extract_filename='s/^.*{"file_name":"\([^"]*\)",.*$/\1/p' + { sed -e "$sed_add_newlines" < "$tmp"/_all_.json; echo; } \ + | sed -n -e "$sed_extract_filename" \ + | while read filename; do + case "$filename" in + *.po) + if test -n "$subdir"; then + subdir_filename="$subdir/$filename" + else + subdir_filename="$filename" + fi + file_url="${base_url}-/raw/${branch}/${subdir_filename}" + wget -O "$tmp/$filename" --no-verbose "$file_url" \ + || func_exit 1 + ;; + esac + done + rm -f "$tmp"/_all_.json +} + +# func_fetch_github BASE-URL BRANCH SUBDIR +# fetches a set of PO files from a git repository in a GitHub instance +# browsable through some web UI. +# Input: +# - tmp an empty temporary directory +func_fetch_github () +{ + base_url="$1" + branch="$2" + subdir="$3" + # Ensure $base_url ends in a slash. + case "$base_url" in + */) ;; + *) base_url="${base_url}/" ;; + esac + webui_url="${base_url}tree/${branch}" + if test -n "$subdir"; then + webui_url="${webui_url}/${subdir}" + fi + # The contents of this URL is an HTML page with a list of files. + # But a web request with a much smaller response, that also contains the list + # of files, is: + filelist_url="${base_url}tree-commit-info/${branch}" + if test -n "$subdir"; then + filelist_url="${filelist_url}/${subdir}" + fi + # Download it. + # The 'Accept' header is necessary to avoid HTTP status 400 (Bad Request). + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.json --header="Accept: application/json" --no-verbose "$filelist_url" \ + || func_exit 1 + # For each file, there is a JSON object element "FILENAME":{"oid":...}. + sed_add_newlines='s/},"\([^"]*\)":{"oid":/},\ +"\1":{"oid":/g' + sed_extract_filename='s/^[^"]*"\([^"]*\)":.*/\1/p' + { sed -e "$sed_add_newlines" < "$tmp"/_all_.json; echo; } \ + | sed -n -e "$sed_extract_filename" \ + | while read filename; do + case "$filename" in + *.po) + if test -n "$subdir"; then + subdir_filename="$subdir/$filename" + else + subdir_filename="$filename" + fi + file_url="${base_url}raw/refs/heads/${branch}/${subdir_filename}" + wget -O "$tmp/$filename" --no-verbose "$file_url" \ + || func_exit 1 + ;; + esac + done + rm -f "$tmp"/_all_.json +} + +# func_fetch_forgejo BASE-URL BRANCH SUBDIR +# fetches a set of PO files from a git repository in a Forgejo instance +# browsable through some web UI. +# Input: +# - tmp an empty temporary directory +func_fetch_forgejo () +{ + base_url="$1" + branch="$2" + subdir="$3" + # Ensure $base_url ends in a slash. + case "$base_url" in + */) ;; + *) base_url="${base_url}/" ;; + esac + base_url_path=`echo "$base_url" | sed -e 's|^[^/]*//[^/]*||'` + webui_url="${base_url}src/branch/${branch}" + if test -n "$subdir"; then + webui_url="${webui_url}/${subdir}" + fi + # The contents of this URL is an HTML page with a list of files. + # Download it. + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_all_.html --no-verbose "$webui_url" \ + || func_exit 1 + # For each file, there is an HTML element + # + expected_href_prefix="${base_url_path}src/branch/${branch}" + if test -n "$subdir"; then + expected_href_prefix="${expected_href_prefix}/${subdir}" + fi + expected_href_prefix="${expected_href_prefix}/" + sed -n -e 's/^.*]* href="\([^"]*\).*$/\1/p' < "$tmp"/_all_.html \ + | while read anchor; do + : echo "anchor=$anchor" + case "$anchor" in + "$expected_href_prefix"*) + filename=`echo "$anchor" | sed -e "s|^${expected_href_prefix}||"` + : echo "filename=$filename" + case "$filename" in + *.po) + if test -n "$subdir"; then + subdir_filename="$subdir/$filename" + else + subdir_filename="$filename" + fi + file_url="${base_url}raw/branch/${branch}/${subdir_filename}" + wget -O "$tmp/$filename" --no-verbose "$file_url" \ + || func_exit 1 + ;; + esac + ;; + esac + done + rm -f "$tmp"/_all_.html +} + +# Unset CDPATH. Otherwise, output from 'cd dir' can surprise callers. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Command-line option processing. +# Removes the OPTIONS from the arguments. Sets the variables: +# - kind denotes the kind of translation project +# - do_git true if changes should be staged in git, false otherwise +{ + kind= + do_git=false + + while test $# -gt 0; do + case "$1" in + --git | --gi | --g ) + shift + do_git=true + ;; + --help | --hel | --he | --h ) + func_usage + func_exit $? ;; + --version | --versio | --versi | --vers | --ver | --ve | --v ) + func_version + func_exit $? ;; + -- ) + # Stop option processing + shift + break ;; + -* ) + echo "po-fetch: unknown option $1" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 ;; + * ) + break ;; + esac + done +} + +if test $# = 0; then + echo "po-fetch: missing argument" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 +fi +kind="$1" +shift + +# Create a temporary directory, and prepare for cleaning it up. +func_tmpdir +trap 'exit_status=$? + if test "$signal" != EXIT; then + echo "caught signal SIG$signal" >&2 + fi + rm -rf "$tmp" + exit $exit_status' EXIT +for signal in HUP INT QUIT PIPE TERM; do + trap '{ signal='$signal'; func_exit 1; }' $signal +done +signal=EXIT + +# Fetch the PO files into the temporary directory. +case "$kind" in + TP) + if test $# -lt 1; then + echo "po-fetch: missing argument" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 + fi + func_fetch_TP "$@" + ;; + Weblate) + if test $# -lt 3; then + echo "po-fetch: missing argument" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 + fi + func_fetch_weblate "$@" + ;; + git) + if test $# -lt 3; then + echo "po-fetch: missing argument" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 + fi + # Download the contents of the BASE-URL. + # Option '--no-verbose' avoids too much chatter. + wget -O "$tmp"/_meta_.html --no-verbose "$1" \ + || func_exit 1 + # Analyze it, to find the supported protocol. + protocol= + sed_extract_generator_1='s/^.*&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 + ;; + esac + ;; + *) + echo "po-fetch: invalid first argument '$kind'" 1>&2 + echo "Try 'po-fetch --help' for more information." 1>&2 + func_exit 1 + ;; +esac + +# Eliminate broken PO files. +(cd "$tmp" + for file in *.po; do + if test -f "$file"; then + # Check against encoding error. + if msgcat -t UTF-8 "$file" 2>&1 >/dev/null | grep .; then + echo "po-fetch: warning: $file has an encoding error; skipping it" 1>&2 + rm -f "$file" + else + # Check against other errors. + if msgfmt -c -o /dev/null "$file"; then + # Check whether it contains some translations at all. + if msgfmt --statistics -o /dev/null "$file" 2>&1 | grep '^0 ' >/dev/null; then + echo "po-fetch: note: $file has no translations; skipping it" 1>&2 + rm -f "$file" + fi + else + echo "po-fetch: warning: $file has errors; skipping it" 1>&2 + rm -f "$file" + fi + fi + fi + done +) + +# Backup the LINGUAS file. +if test -f LINGUAS; then + mv LINGUAS LINGUAS~ +fi + +# Create the new LINGUAS file. +{ echo '# Set of available languages.' + echo '# This file is automatically generated from the list of PO files.' + (cd "$tmp" + for file in *.po; do + if test -f "$file"; then + echo "$file" + fi + done + ) | sed -e 's/\.po$//' | LC_ALL=C sort +} > LINGUAS \ + || func_exit 1 +if $do_git; then + git add LINGUAS +fi + +# Update existing PO files. Remove outdated PO files. +for file in *.po; do + if test -f "$file"; then + if test -f "$tmp/$file"; then + mv "$tmp/$file" "$file" || func_exit 1 + if $do_git; then + git add "$file" + fi + else + rm -f "$file" + if $do_git; then + git rm "$file" + fi + fi + fi +done + +# Add new PO files. +for tmpf in "$tmp"/*.po; do + if test -f "$tmpf"; then + file=`echo "$tmpf" | sed -e 's|^.*/||'` + mv "$tmpf" "$file" || func_exit 1 + if $do_git; then + git add "$file" + fi + fi +done + +# Examples for testing: +# $ po-fetch TP hello +# $ po-fetch Weblate https://translate.codeberg.org/ gnuhealth health_dentistry +# $ po-fetch Weblate https://translate.codeberg.org/ lazarr lzr_gui +# $ po-fetch git https://gitweb.git.savannah.gnu.org/gitweb/?p=gettext.git master gettext-runtime/po +# $ po-fetch git https://cgit.git.savannah.gnu.org/cgit/gettext.git master gettext-runtime/po +# $ po-fetch git https://gitlab.com/qemu-project/qemu master po +# $ po-fetch git https://github.com/autotools-mirror/gettext/ master gettext-runtime/po +# $ po-fetch git https://codeberg.org/guix/translations/ master po/guix +