From f12d6634be8c083b870121f59b368e7a38bc7398 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 27 Dec 2025 23:53:53 +0100 Subject: [PATCH] New program 'spit'. * gettext-tools/machine-translation/README: New file. * gettext-tools/machine-translation/prototype/README: New file. * gettext-tools/machine-translation/prototype/OllamaSpit.java: New file. * gettext-tools/machine-translation/prototype/ollama-spit.c: New file. * gettext-tools/machine-translation/prototype/ollama-spit.go: New file. * gettext-tools/machine-translation/prototype/ollama-spit.py: New file. * gettext-tools/machine-translation/prototype/ollama-spit.sh: New file. * gettext-tools/configure.ac (INCJSON_C, LIBJSON_C, INCCURL, LIBCURL): New variables. (BUILD_SPIT_IN_C): New conditional. (AC_CONFIG_FILES): Add src/spit.py. * gettext-tools/src/country-table.h: New file. * gettext-tools/src/country-table.c: New file. * gettext-tools/src/spit.c: New file. * gettext-tools/src/spit.py.in: New file. * gettext-tools/src/FILES: Mention the new files. * gettext-tools/src/Makefile.am (bin_PROGRAMS): Conditionally add 'spit'. (noinst_SCRIPTS): New variable. (noinst_HEADERS): Add country-table.h. (spit_SOURCES, spit_CFLAGS, spit_LDADD, spit_DEPENDENCIES, spit_CPPFLAGS, spit_LDFLAGS): New variables. (install-exec-local): Conditionally install spit.py. (installdirs-local, uninstall-local): Update accordingly. (DISTCLEANFILES): Add spit.py. * gettext-tools/po/POTFILES.in: Add spit.c. * gettext-tools/man/spit.x: New file. * gettext-tools/man/Makefile.am (man_aux): Add spit.x. (man_MAN1SRC): Add spit.1. (man_HTML): Add spit.1.html. (spit.1, spit.1.html): Add dependencies. * gettext-tools/doc/gettext.texi (Pretranslating): New chapter. * gettext-tools/doc/spit.texi: New file. * gettext-tools/doc/Makefile.am (gettext_TEXINFOS): Add it. * DEPENDENCIES: Update URLs for libxml2. Add libjson-c, libcurl, Python, the Python module 'requests'. * PACKAGING: Mention the 'spit' program and its manual page. * NEWS: Mention the change. --- .gitignore | 6 + Admin/release-steps | 2 + DEPENDENCIES | 82 +- NEWS | 7 +- PACKAGING | 3 + gettext-tools/configure.ac | 85 ++ gettext-tools/doc/Makefile.am | 1 + gettext-tools/doc/gettext.texi | 83 ++ gettext-tools/doc/spit.texi | 108 +++ gettext-tools/machine-translation/README | 62 ++ .../prototype/OllamaSpit.java | 228 +++++ .../machine-translation/prototype/README | 89 ++ .../prototype/ollama-spit.c | 440 ++++++++++ .../prototype/ollama-spit.go | 164 ++++ .../prototype/ollama-spit.py | 130 +++ .../prototype/ollama-spit.sh | 130 +++ gettext-tools/man/Makefile.am | 7 +- gettext-tools/man/spit.x | 4 + gettext-tools/po/POTFILES.in | 1 + gettext-tools/src/FILES | 10 + gettext-tools/src/Makefile.am | 37 +- gettext-tools/src/country-table.c | 277 ++++++ gettext-tools/src/country-table.h | 45 + gettext-tools/src/spit.c | 666 +++++++++++++++ gettext-tools/src/spit.py.in | 800 ++++++++++++++++++ 25 files changed, 3445 insertions(+), 22 deletions(-) create mode 100644 gettext-tools/doc/spit.texi create mode 100644 gettext-tools/machine-translation/README create mode 100644 gettext-tools/machine-translation/prototype/OllamaSpit.java create mode 100644 gettext-tools/machine-translation/prototype/README create mode 100644 gettext-tools/machine-translation/prototype/ollama-spit.c create mode 100644 gettext-tools/machine-translation/prototype/ollama-spit.go create mode 100755 gettext-tools/machine-translation/prototype/ollama-spit.py create mode 100755 gettext-tools/machine-translation/prototype/ollama-spit.sh create mode 100644 gettext-tools/man/spit.x create mode 100644 gettext-tools/src/country-table.c create mode 100644 gettext-tools/src/country-table.h create mode 100644 gettext-tools/src/spit.c create mode 100644 gettext-tools/src/spit.py.in diff --git a/.gitignore b/.gitignore index 48954eae7..4f68013ae 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ /gettext-runtime/doc/Admin/jdom-1.0.jar /gettext-runtime/doc/Admin/Matrix*.class /gettext-runtime/doc/Admin/matrix.xml +/gettext-tools/machine-translation/prototype/OllamaSpit.class # Files brought in by autopull.sh: /gettext-tools/tree-sitter-* @@ -550,6 +551,8 @@ /gettext-tools/man/msguniq.1.html /gettext-tools/man/recode-sr-latin.1 /gettext-tools/man/recode-sr-latin.1.html +/gettext-tools/man/spit.1 +/gettext-tools/man/spit.1.html /gettext-tools/man/xgettext.1 /gettext-tools/man/xgettext.1.html /gettext-tools/po/gettext-tools.pot @@ -680,6 +683,7 @@ autom4te.cache/ /gettext-tools/po/Makefile /gettext-tools/projects/Makefile /gettext-tools/src/Makefile +/gettext-tools/src/spit.py /gettext-tools/src/user-email /gettext-tools/styles/Makefile /gettext-tools/system-tests/Makefile @@ -792,6 +796,8 @@ autom4te.cache/ /gettext-tools/src/msguniq.exe /gettext-tools/src/recode-sr-latin /gettext-tools/src/recode-sr-latin.exe +/gettext-tools/src/spit +/gettext-tools/src/spit.exe /gettext-tools/src/urlget /gettext-tools/src/urlget.exe /gettext-tools/src/xgettext diff --git a/Admin/release-steps b/Admin/release-steps index 1d82e1c5c..387396e84 100644 --- a/Admin/release-steps +++ b/Admin/release-steps @@ -143,6 +143,8 @@ We assume that the following environment variables are set: gettext-tools/src/msgunfmt.c gettext-tools/src/msguniq.c gettext-tools/src/recode-sr-latin.c + gettext-tools/src/spit.c + gettext-tools/src/spit.py.in gettext-tools/src/urlget.c gettext-tools/src/xgettext.c diff --git a/DEPENDENCIES b/DEPENDENCIES index 92eca9909..9cdce74b6 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -37,15 +37,14 @@ The following packages should be installed before GNU gettext is installed * libxml2 + Recommended. - Needed for 'xgettext' and 'msgfmt', so that it can parse XML - files. Also needed for the --color option of the various - programs. + Needed for 'xgettext' and 'msgfmt', so that it can parse XML files. + Also needed for the --color option of the various programs. If not present, a subset of libxml2 (included in this package) will be compiled into libgettextlib. + Homepage: - http://xmlsoft.org/ + https://gitlab.gnome.org/GNOME/libxml2/-/wikis/home + Download: - ftp://xmlsoft.org/libxml2/ + https://download.gnome.org/sources/libxml2/ + Pre-built package name: - On Debian and Debian-based systems: libxml2-dev, - On Red Hat distributions: libxml2-devel. @@ -53,6 +52,32 @@ The following packages should be installed before GNU gettext is installed + If it is installed in a nonstandard directory, pass the option --with-libxml2-prefix=DIR to 'configure'. +* libjson-c + + Recommended. + Needed for machine translation. + If not present, 'spit' will be a Python script instead of an executable. + + Homepage: + https://github.com/json-c/json-c/wiki + + Download: + https://s3.amazonaws.com/json-c_releases/releases/index.html + + Pre-built package name: + - On Debian and Debian-based systems: libjson-c-dev, + - On Red Hat distributions: json-c-devel. + - Other: https://repology.org/project/json-c/versions + +* libcurl + + Recommended. + Needed for machine translation. + If not present, 'spit' will be a Python script instead of an executable. + + Homepage: + https://curl.se/libcurl/ + + Download: + https://curl.se/download.html + + Pre-built package name: + - On Debian and Debian-based systems: libcurl4-gnutls-dev or libcurl4-openssl-dev, + - On Red Hat distributions: libcurl-devel. + - Other: https://repology.org/project/curl/versions + * libacl + Recommended on Linux systems. Needed so that the creation of backup files respects the access control @@ -66,18 +91,18 @@ The following packages should be installed before GNU gettext is installed - On Red Hat distributions: acl, libacl-devel. - Other: https://repology.org/project/acl/versions - * libattr - + Recommended on Linux systems. - Needed so that the creation of backup files respects the access control - lists (ACLs) set on the original files, with fewer system calls. - + Homepage: - https://savannah.nongnu.org/projects/attr/ - + Download: - https://download.savannah.nongnu.org/releases/attr/ - + Pre-built package name: - - On Debian and Debian-based systems: libattr1-dev, - - On Red Hat distributions: libattr-devel. - - Other: https://repology.org/project/attr/versions +* libattr + + Recommended on Linux systems. + Needed so that the creation of backup files respects the access control + lists (ACLs) set on the original files, with fewer system calls. + + Homepage: + https://savannah.nongnu.org/projects/attr/ + + Download: + https://download.savannah.nongnu.org/releases/attr/ + + Pre-built package name: + - On Debian and Debian-based systems: libattr1-dev, + - On Red Hat distributions: libattr-devel. + - Other: https://repology.org/project/attr/versions * A Java runtime and compiler (e.g. OpenJDK, AdoptOpenJDK, or kaffe). + Recommended. @@ -266,6 +291,29 @@ The following packages should be installed when GNU gettext is installed + Download: https://ftp.gnu.org/gnu/gnulib/gnulib-l10n-* +* Python 3.7 or newer. + + Recommended if GNU Gettext was built without libjson-c or without libcurl. + Needed for machine translation. + + Homepage: + https://www.python.org/ + + Download: + https://www.python.org/downloads/ + + Pre-built package name: + - On Debian and Debian-based systems: python3, + - On Red Hat distributions: python3. + - Other: https://repology.org/project/python/versions +* The Python module 'requests'. + + Recommended if GNU Gettext was built without libjson-c or without libcurl. + Needed for machine translation. + + Homepage: + https://pypi.org/project/requests/ + + Download: + https://pypi.org/project/requests/#files + + Pre-built package name: + - On Debian and Debian-based systems: python3-requests, + - On Red Hat distributions: python-requests. + - Other: https://repology.org/project/python%3Arequests/versions + The following should be installed when GNU gettext is built, but are not needed later, once it is installed (build dependencies, but not runtime diff --git a/NEWS b/NEWS index 587c3a129..2ba5da5dd 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Version 1.0 - October 2025 +Version 1.0 - December 2025 # Improvements for maintainers and distributors: * In a po/ directory, the PO files are now exactly those that the @@ -36,6 +36,11 @@ Version 1.0 - October 2025 POT file, like 'msgmerge' would do. Previously, 'msginit' failed with an error message in this situation. + * Pretranslation: + - A new program 'spit' is provided, that implements machine translation + through a locally installed Large Language Model (LLM). + - The documentation has a new chapter "Pretranslation". + # Programming languages support: * OCaml: - xgettext now supports OCaml. diff --git a/PACKAGING b/PACKAGING index beac11ddd..508d62335 100644 --- a/PACKAGING +++ b/PACKAGING @@ -139,16 +139,19 @@ the following file list. $prefix/bin/autopoint $prefix/bin/po-fetch $prefix/bin/recode* + $prefix/bin/spit $prefix/share/man/man1/msg*.1 $prefix/share/man/man1/xgettext.1 $prefix/share/man/man1/gettextize.1 $prefix/share/man/man1/autopoint.1 $prefix/share/man/man1/recode*.1 + $prefix/share/man/man1/spit.1 $prefix/share/doc/gettext/msg*.1.html $prefix/share/doc/gettext/xgettext.1.html $prefix/share/doc/gettext/gettextize.1.html $prefix/share/doc/gettext/autopoint.1.html $prefix/share/doc/gettext/recode*.1.html + $prefix/share/doc/gettext/spit.1.html $prefix/share/doc/gettext/gettext_*.html $prefix/share/doc/gettext/FAQ.html $prefix/share/doc/gettext/tutorial.html diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index 8c3617923..58e59a912 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -477,6 +477,90 @@ AS_IF([test "$MODULA2_CHOICE" != no], ]) AC_SUBST([BUILDMODULA2]) +dnl Check for the libjson-c library. +dnl Set INCJSON_C to the -I option for the include files. +dnl Set LIBJSON_C to the -L and -l options for the library. +AC_MSG_CHECKING([for the libjson-c library]) +LIBJSON_C=? +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], [[]])], + [], + [dnl The include files are not present. + LIBJSON_C= + ]) +if test "$LIBJSON_C" = "?"; then + save_LIBS="$LIBS" + LIBS="$LIBS -ljson-c" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[return json_c_version () [0] != '0';]]) + ], + [LIBJSON_C="-ljson-c"], + [dnl The library is not present. + LIBJSON_C= + ]) + LIBS="$save_LIBS" +fi +dnl Find the location of the include files subdirectory. +INCJSON_C= +if test -n "$LIBJSON_C"; then + gl_ABSOLUTE_HEADER_ONE([json-c/json_c_version.h]) + if test -n "$gl_cv_absolute_json_c_json_c_version_h"; then + INCJSON_C="-I"`echo "$gl_cv_absolute_json_c_json_c_version_h" | sed -e 's|.json_c_version[.]h$||'` + else + dnl Could not find the include files subdirectory. + LIBJSON_C= + fi +fi +AC_SUBST([INCJSON_C]) +AC_SUBST([LIBJSON_C]) +if test -n "$LIBJSON_C"; then + gt_libjson_c_found=yes +else + gt_libjson_c_found=no +fi +AC_MSG_RESULT([$gt_libjson_c_found]) + +dnl Check for the libcurl library. +dnl Set INCCURL to the -I option for the include files. +dnl Set LIBCURL to the -L and -l options for the library. +dnl TODO: Support linking with libcurl.a and its many dependencies. +AC_MSG_CHECKING([for the libcurl library]) +INCCURL= +LIBCURL=? +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], [[]])], + [], + [dnl The include files are not present. + LIBCURL= + ]) +if test "$LIBCURL" = "?"; then + save_LIBS="$LIBS" + LIBS="$LIBS -lcurl" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[curl_global_init (CURL_GLOBAL_DEFAULT);]]) + ], + [LIBCURL="-lcurl"], + [dnl The library is not present. + LIBCURL= + ]) + LIBS="$save_LIBS" +fi +AC_SUBST([INCCURL]) +AC_SUBST([LIBCURL]) +if test -n "$LIBCURL"; then + gt_libcurl_found=yes +else + gt_libcurl_found=no +fi +AC_MSG_RESULT([$gt_libcurl_found]) + +dnl Check in which form to install the 'spit' program. +AM_CONDITIONAL([BUILD_SPIT_IN_C], [test -n "$LIBJSON_C" && test -n "$LIBCURL"]) + dnl Check for Emacs and where to install .elc files. dnl Sometimes Emacs is badly installed. Allow the user to work around it. AC_ARG_WITH([emacs], @@ -737,6 +821,7 @@ AC_CONFIG_FILES([libgrep/gnulib-lib/Makefile]) AC_CONFIG_FILES([src/Makefile]) AC_CONFIG_FILES([src/user-email:src/user-email.sh.in]) +AC_CONFIG_FILES([src/spit.py], [chmod a+x src/spit.py]) AC_CONFIG_FILES([libgettextpo/Makefile]) AC_CONFIG_FILES([libgettextpo/exported.sh]) diff --git a/gettext-tools/doc/Makefile.am b/gettext-tools/doc/Makefile.am index 9a4cef942..fefe79226 100644 --- a/gettext-tools/doc/Makefile.am +++ b/gettext-tools/doc/Makefile.am @@ -45,6 +45,7 @@ gettext_TEXINFOS = \ xgettext.texi \ msginit.texi \ msgmerge.texi \ + spit.texi \ msgcat.texi \ msgconv.texi \ msggrep.texi \ diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 9f683e6b3..fb32044c2 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -169,6 +169,7 @@ version @value{VERSION}. * Template:: Making the PO Template File * Creating:: Creating a New PO File * Updating:: Updating Existing PO Files +* Pretranslating:: Pretranslating PO Files * Editing:: Editing PO Files * Manipulating:: Manipulating PO Files * Binaries:: Producing Binary MO Files @@ -254,6 +255,11 @@ Updating Existing PO Files * msgmerge Invocation:: Invoking the @code{msgmerge} Program +Pretranslating PO Files + +* Installing an LLM:: Installing a Large Language Model +* spit Invocation:: Invoking the @code{spit} Program + Editing PO Files * Web based localization:: Web-based PO editing @@ -4035,6 +4041,83 @@ format of the plural forms field is described in @ref{Plural forms} and @include msgmerge.texi +@node Pretranslating +@chapter Pretranslating PO Files +@cindex Pretranslating PO Files +@cindex Machine translation + +As a translator, +you may save yourself some work +by starting from a reasonably good translation produced by a machine, +and modify that translation, to make it perfect. + +Thus, before you start working on a translation, +you might have the PO file @emph{pretranslated}. + +This process, also called @emph{machine translation}, +is nowadays best performed through a Large Language Model (LLM). +(See @url{https://en.wikipedia.org/wiki/Machine_translation#Neural_MT}, +@url{https://en.wikipedia.org/wiki/Neural_machine_translation#Generative_LLMs}.) + +@node Installing an LLM +@section Installing a Large Language Model + +We don't recommend to use machine translation +through a web service in the cloud, controlled by someone else than yourself. +Such a machine translation service would be have major drawbacks +(it could go away any time, +it could be used to spy on you or manipulate you, +or the costs could go up beyond your control); +see @url{https://www.gnu.org/philosophy/who-does-that-server-really-serve.en.html}. +Additionally, such a service typically has some cost +(between $10 and $25 per megabyte, as of 2025). + +Instead, we recommend a Large Language Model execution engine +that runs on hardware under your control. +This can be a desktop computer, +or for instance a single-board computer in your local network. + +@cindex ollama +At this point (in 2025), +a Large Language Model execution engine that is Free Software is +@samp{ollama}, that can be downloaded from @url{https://ollama.com/}. + +Together with an LLM of reasonable quality, +such as the model @code{ministral-3:14b}, +the system requirements are as follows: +@itemize @bullet +@item +RAM: 16 GB. +@item +Disk space: 10 GB (1 GB for @code{ollama}, 9 GB for the model). +@item +GPU or TPU: A GPU (Graphics Processing Unit) or TPU (Tensor Processing Unit) +is not needed. +As of 2025, a high-end GPU from certain vendors +can be used by @code{ollama} to provide an optional speedup. +@end itemize + +Additional configuration: +@itemize @bullet +@item +If you are running @code{ollama} on your computer directly, +no further configuration is needed. +@item +If you are running @code{ollama} on a separate machine, +and want to make it accessible from all machines in the LAN: +Edit the file @file{/etc/systemd/system/ollama.services}, +adding a line: @code{Environment="OLLAMA_HOST=0.0.0.0"}. +See @url{https://github.com/ollama/ollama/issues/703}. +@item +If you are running @code{ollama} in a virtual machine, +make the port 11434 accessible through port forwarding. +@end itemize + +@node spit Invocation +@section Invoking the @code{spit} Program + +@include spit.texi + @node Editing @chapter Editing PO Files @cindex Editing PO Files diff --git a/gettext-tools/doc/spit.texi b/gettext-tools/doc/spit.texi new file mode 100644 index 000000000..ab8a47745 --- /dev/null +++ b/gettext-tools/doc/spit.texi @@ -0,0 +1,108 @@ +@c This file is part of the GNU gettext manual. +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c See the file gettext.texi for copying conditions. + +@pindex spit +@cindex @code{spit} program, usage +@example +spit [@var{option}...] +@end example + +@cindex query a Large Language Model +@cindex translate through a Large Language Model +The @code{spit} program +passes its input to a Large Language Model (LLM) instance +and prints the response. +With the @code{--to} option, +it translates its input to the specified language +through a Large Language Model (LLM) and prints the translation. + +@strong{Warning:} The output might not be what you expect. +It might be of the wrong form, be of poor quality, or reflect some biases. + +@subsection Large Language Model (LLM) options + +@table @samp +@item --species=@var{type} +@opindex --species@r{, @code{spit} option} +Specifies the type of Large Language Model execution engine. +The default and only valid value is @code{ollama}. + +@item --url=@var{url} +@opindex --url@r{, @code{spit} option} +Specifies the URL of the server that runs Large Language Model execution engine. +For @code{ollama}, the default is @code{http://localhost:11434}. + +@item -m @var{model} +@itemx --model=@var{model} +@opindex -m@r{, @code{spit} option} +@opindex --model@r{, @code{spit} option} +Specifies the model to use. +This option is mandatory; no default exists. +The specified model must +already be installed in the Large Language Model execution engine. + +@item --to=@var{language} +@opindex --to@r{, @code{spit} option} +Specifies the target language. +@var{language} may be specified +as an ISO 639 language code (such as @code{fr} for French), +as a combination of an ISO 639 language code and an ISO 3166 country code +(such as @code{fr_CA} for French in Canada, +or @code{zh_TW} for traditional Chinese), +or as the English name of a language (such as @code{French}). + +The effect of this option is to +add a prompt similar to "Translate to @var{language}:". + +@item --prompt=@var{text} +@opindex --prompt@r{, @code{spit} option} +Specifies the prompt to use before the input that comes from standard input. +It allows you to specify extra instructions for the LLM. + +This option overrides the @code{--to} option. + +@item --postprocess=@var{command} +@opindex --postprocess@r{, @code{spit} option} +Specifies a command to post-process the output. +This should be a Bourne shell command +that reads from standard input and writes to standard output. + +For instance, the @code{ministral-3:14b} model +often emphasizes part of the output with @samp{**} characters. +To eliminate these markers, +you could use the command @samp{sed -e 's/[*][*]//g'}. + +@end table + +@subsection Informative output + +@table @samp +@item -h +@itemx --help +@opindex -h@r{, @code{spit} option} +@opindex --help@r{, @code{spit} option} +Display this help and exit. + +@item -V +@itemx --version +@opindex -V@r{, @code{spit} option} +@opindex --version@r{, @code{spit} option} +Output version information and exit. + +@end table + +@subsection Examples + +Machine translation of a single sentence: + +@smallexample +$ echo 'Translate into German: "Welcome to the GNU project!"' \ + | spit --model=ministral-3:14b \ + --postprocess="sed -e 's/[*][*]//g'" +"Willkommen zum GNU-Projekt!" +@end smallexample + +@noindent +The perfect translation would be @code{"Willkommen beim GNU-Projekt!"}. +You can see: some manual adjustment after the machine translation is needed. diff --git a/gettext-tools/machine-translation/README b/gettext-tools/machine-translation/README new file mode 100644 index 000000000..81865174a --- /dev/null +++ b/gettext-tools/machine-translation/README @@ -0,0 +1,62 @@ +This directory contains programs for interfacing to machine translation tools. + +Which types of machine translation tools to support? + + * We don't support machine translation through web services in the cloud, + because + - They are not under the control of the user (the famous SaaS problem: + ). + - They typically have some cost (between $10 and $25 per megabyte, + as of 2025). + * We therefore only support locally running machine translation engines. + +Which kinds of machine translation tools to support? + + * Large Language Models (LLMs) produce good quality translations nowadays + (December 2025), at least regarding French and German. + With 'ollama' there exists an execution engine (so-called "inference engine") + that does not require a graphics card; it can run directly on a CPU. + 'llama.cpp' and 'KoboldCpp' seem to be roughly equivalent to 'ollama', + therefore here we focus on 'ollama'. + * The Argos Translate tools, which are based on neural networks but not LLMs, + https://github.com/argosopentech/argos-translate + https://www.argosopentech.com/ + work: + $ ./argos-translate --from-lang en --to-lang de "You are lucky" + Du hast Glück + and would be theoretically acceptable, but they have two problems: + - Unclear copyright of some of the language models + , + - Translation quality not as good as the one from LLMs. + +Prerequisite: + + * ollama + Homepage: https://ollama.com/ + Source code: https://github.com/ollama/ollama + Binaries exist for various platforms: + - for GNU/Linux, macOS, Windows: https://ollama.com/download + - for various distributions: https://repology.org/project/ollama/versions + - for GNU Guix: https://codeberg.org/tusharhero/ollama-guix + - for Ubuntu: https://snapcraft.io/ollama + System requirements (for ollama with model 'ministral-3:14b'): + - 16 GB RAM, + - 10 GB disk space (1 GB for ollama, 9 GB for the model). + Configuration: + - If you have it on a separate machine, and want to make it accessible + from all machines in the LAN: + Edit /etc/systemd/system/ollama.services : + Add a line: Environment="OLLAMA_HOST=0.0.0.0" + Cf. . + - If you have it running in a virtual machine, make the port 11434 + accessible through port forwarding. + +Programs: + + * spit + is an extension of 'ollama-spit', with an option --to=LANGUAGE for + machine translation of a single input. + + * msgpre + applies machine translation to the untranslated (and optionally, fuzzy) + messages of a PO file. diff --git a/gettext-tools/machine-translation/prototype/OllamaSpit.java b/gettext-tools/machine-translation/prototype/OllamaSpit.java new file mode 100644 index 000000000..28fcf5da7 --- /dev/null +++ b/gettext-tools/machine-translation/prototype/OllamaSpit.java @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2025 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Written by Bruno Haible , 2025. + */ + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.net.URI; + +// Documentation: +// https://docs.oracle.com/en/java/javase/11/docs/api/java.net.http/java/net/http/package-summary.html +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; + +// Documentation: +// https://google.github.io/gson/UserGuide.html +// https://www.javadoc.io/doc/com.google.code.gson/gson/2.8.0/com/google/gson/Gson.html +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; + +/* + * This program passes an input to an ollama instance and prints the response. + */ +public class OllamaSpit { + + private static void usage () { + System.out.println("Usage: spit [OPTION...]"); + System.out.println(); + System.out.println("Passes standard input to an ollama instance and prints the response."); + System.out.println(); + System.out.println("Options:"); + System.out.println(" --url Specifies the ollama server's URL."); + System.out.println(" --model Specifies the model to use."); + System.out.println(); + System.out.println("Informative output:"); + System.out.println(); + System.out.println(" --help Show this help text."); + } + + public static void main (String[] args) throws IOException, InterruptedException { + // Command-line option processing. + String url = "http://localhost:11434"; + String model = null; + { + boolean do_help = false; + int i; + for (i = 0; i < args.length; i++) { + String arg = args[i]; + if (arg.equals("--url") || arg.equals("--ur") || arg.equals("--u")) { + i++; + if (i == args.length) { + System.err.println("Spit: missing argument for --url"); + System.exit(1); + } + url = args[i]; + } else if (arg.startsWith("--url=")) { + url = arg.substring(6); + } else if (arg.equals("--model") || arg.equals("--mode") || arg.equals("--mod") || arg.equals("--mo") || arg.equals("--m")) { + i++; + if (i == args.length) { + System.err.println("Spit: missing argument for --model"); + System.exit(1); + } + model = args[i]; + } else if (arg.startsWith("--model=")) { + model = arg.substring(8); + } else if (arg.equals("--help")) { + do_help = true; + } else if (arg.equals("--")) { + // Stop option processing + i++; + break; + } else if (arg.startsWith("-")) { + System.err.println("Spit: unknown option " + arg); + System.err.println("Try 'Spit --help' for more information."); + System.exit(1); + } else + break; + } + if (do_help) { + usage(); + System.exit(0); + } + if (i < args.length) { + System.err.println("Spit: too many arguments"); + System.err.println("Try 'Spit --help' for more information."); + System.exit(1); + } + } + if (model == null) { + System.err.println("Spit: missing --model option"); + System.exit(1); + } + // Sanitize URL. + if (!url.endsWith("/")) + url = url + "/"; + + // Read the contents of standard input. + String input = new String(System.in.readAllBytes(), StandardCharsets.UTF_8); + + // Documentation of the ollama API: + // + + // Compose the payload. + JsonObject payload = new JsonObject(); + payload.addProperty("model", model); + payload.addProperty("prompt", input); + String payloadAsString = payload.toString(); + + // Make the request to the ollama server. + HttpClient client = HttpClient.newHttpClient(); + HttpRequest request = + HttpRequest.newBuilder(URI.create(url+"api/generate")) + .POST(HttpRequest.BodyPublishers.ofString(payloadAsString)) + .build(); + HttpResponse response = + client.send(request, HttpResponse.BodyHandlers.ofInputStream()); + InputStream responseStream = response.body(); + if (response.statusCode() != 200) { + System.err.println("Status: "+response.statusCode()); + } + if (response.statusCode() >= 400) { + System.err.print("Body: "); + responseStream.transferTo(System.err); + System.err.println(); + System.exit(1); + } + int bufSize = 4096; + byte[] buffer = new byte[bufSize]; + int bufStart = 0; + int bufEnd = 0; + for (;;) { + // Read a line when available. + // But don't force reading more than one line. + String line; + { + // Boy, this is complicated code. There should really be a helper + // method on InputStream for this purpose. + int i; + for (i = bufStart; i < bufEnd; i++) { + if (buffer[i] == (byte)'\n') + break; + } + for (;;) { + if (i < bufEnd) { + // An entire line in buffer. + i++; + line = new String(buffer, bufStart, i-bufStart, StandardCharsets.UTF_8); + bufStart = i; + break; + } + // We need to read something from the stream. + int avail = responseStream.available(); + if (avail == 0) + // If nothing is available, read 1 byte, even if that needs to block. + avail = 1; + if ((bufEnd-bufStart) + avail > bufSize) { + // Grow the buffer. + int newBufSize = (bufEnd-bufStart) + avail; + if (newBufSize < 2*bufSize) + newBufSize = 2*bufSize; + byte[] newBuffer = new byte[newBufSize]; + System.arraycopy(buffer, bufStart, newBuffer, 0, bufEnd-bufStart); + bufSize = newBufSize; + buffer = newBuffer; + bufEnd = bufEnd-bufStart; + bufStart = 0; + } else { + // We can keep the buffer, but may need to move the contents to the + // front. + if (bufEnd + avail > bufSize) { + System.arraycopy(buffer, bufStart, buffer, 0, bufEnd-bufStart); + bufEnd = bufEnd-bufStart; + bufStart = 0; + } + } + // Now bufEnd + avail <= bufSize. + int nBytesRead = responseStream.readNBytes(buffer, bufEnd, avail); + if (nBytesRead == 0) { + // We're at EOF. + if (bufEnd == bufStart) + return; + // Convert the last line (without terminating newline). + line = new String(buffer, bufStart, bufEnd-bufStart, StandardCharsets.UTF_8); + break; + } + // We have read at least one byte. Look if we have a complete line now. + int oldBufEnd = bufEnd; + bufEnd += nBytesRead; + for (i = oldBufEnd; i < bufEnd; i++) { + if (buffer[i] == (byte)'\n') + break; + } + } + } + // We have a line now. It should contain a single JSON object. + JsonElement part = JsonParser.parseString(line); + if (part.isJsonObject()) { + System.out.print(((JsonObject)part).get("response").getAsString()); + System.out.flush(); + } + } + } +} + +/* + * Local Variables: + * compile-command: "javac -d . -cp /usr/share/java/gson.jar OllamaSpit.java" + * run-command: "echo 'Translate into German: "Welcome to the GNU project!"' | java -cp .:/usr/share/java/gson.jar OllamaSpit --model=ministral-3:14b" + * End: + */ diff --git a/gettext-tools/machine-translation/prototype/README b/gettext-tools/machine-translation/prototype/README new file mode 100644 index 000000000..d7e24f25e --- /dev/null +++ b/gettext-tools/machine-translation/prototype/README @@ -0,0 +1,89 @@ +This directory contains a prototype program 'ollama-spit' +that passes an input to an LLM and prints the output. +Since it implements an HTTP client with some JSON processing, it +needs the following dependencies, depending on the programming language: + + Language HTTP client JSON library + + C libcurl libjson-c + + C++ httplib.h json.hpp + + Python request json (built-in) + + Java java.net.http.HttpClient Gson + (built-in) + + Go net/http encoding/json + (built-in) (built-in) + + Shell curl jq + +Example: + $ echo 'Translate into German: "Welcome to the GNU project!"' | ollama-spit --model=ministral-3:14b + "Willkommen zum GNU-Projekt!" + +More elaborate code of the same kind, in various programming languages, +can be found at +. + +The programs behave identically, except for the Shell implementation that +has bad error handling. + +Discussion of the other implementations: + + * Deployment considerations: + + The C program depends on libcurl and libjson-c. libcurl is big (> 700 KB + binary code size, with lots of further dependencies shown by 'ldd'). + But libcurl and libjson-c are the de-facto standards in the C ecosystem: + in Debian, more than 250 packages depend on libcurl4t64, + and more than 100 packages depend on libjson-c5. + (libjansson, which is sometimes considered as an alternative to libjson-c, + is too buggy for serious use: + , + .) + + The C++ program depends on + https://github.com/yhirose/cpp-httplib + https://github.com/nlohmann/json + which are header-only C++ libraries, not widely used in the C++ ecosystem. + The compiled executable has > 500 KB binary code size, but no dependency + to shared libraries other than libstdc++ and libc. + + The Python program depends on the 'requests' module, that is widely used + in the Python ecosystem. The user may need to install it via + $ pip install requests + + The Java program depends on the Gson library. It is widely used in the Java + ecosystem. The user may need to fetch it from + https://mvnrepository.com/artifact/com.google.code.gson/gson + + The Go program would need to depend on the 'pflag' module, that is widely + used on the Go ecosystem. + In Debian, it would also + - either depend on libgo, the Go runtime library, which is 60 MB large, + - or be statically linked, and be more than 25 MB large. + + * Development considerations: + + The C program is a bit clumsy, because libcurl does not provide an easy + way to have separate code paths for the success case and for the error case. + + The C++ program takes 13 seconds to compile with g++. + + The Python program has no issues. + + The Java program has no issues. + + The Go program is a pain to develop and maintain, because the Go authors put + language minimality above developer needs + . + +As a consequence, we will use + - primarily a C implementation and + - as fallback for users who don't like to install many packages before + building GNU gettext: a Python implementation. + (Why Python, not Java? + 1. Because it does not need a Java compiler during "make && make install", + 2. Because Python is more popular and thus more likely to be maintained.) diff --git a/gettext-tools/machine-translation/prototype/ollama-spit.c b/gettext-tools/machine-translation/prototype/ollama-spit.c new file mode 100644 index 000000000..8cc9ee147 --- /dev/null +++ b/gettext-tools/machine-translation/prototype/ollama-spit.c @@ -0,0 +1,440 @@ +/* + * Copyright (C) 2025 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Written by Bruno Haible , 2025. + */ + +/* + * This program passes an input to an ollama instance and prints the response. + */ + +#include +#include +#include +#include +#include + +/* We use JSON-C. + It is multithread-safe, as long as we don't use any of the json_global_* + API functions, nor the json_util_get_last_err function. + Documentation: + */ +#include + +/* We use libcurl. + Documentation: + */ +#include + + +static void +xalloc_die () +{ + fprintf (stderr, "spit: out of memory\n"); + exit (EXIT_FAILURE); +} + +static void +curl_die () +{ + fprintf (stderr, "spit: curl error\n"); + exit (EXIT_FAILURE); +} + +static void +process_response_line (const char *line) +{ + /* Note: As of json-c version 0.15, the jerrno value is unreliable. + See + and . + json-c version 0.17 introduces json_tokener_error_memory, but its value + changes in version 0.18. */ + struct json_object *j; +#if JSON_C_MAJOR_VERSION > 0 || (JSON_C_MAJOR_VERSION == 0 && JSON_C_MINOR_VERSION >= 18) + enum json_tokener_error jerrno = json_tokener_error_memory; + j = json_tokener_parse_verbose (line, &jerrno); + if (j == NULL && jerrno == json_tokener_error_memory) + xalloc_die (); +#else + j = json_tokener_parse (line); +#endif + /* Ignore an empty line. */ + if (j != NULL) + { + /* We expect a JSON object. */ + if (json_object_is_type (j, json_type_object)) + { + /* Output its "response" property. */ + const char *prop = + json_object_get_string(json_object_object_get (j, "response")); + if (prop != NULL) + { + fputs (prop, stdout); + fflush (stdout); + } + } + } +} + +/* A libcurl header callback that determines, during curl_easy_perform, + whether the HTTP request returned an error. */ +static size_t +my_header_callback (char *buffer, size_t one, size_t n, void *userdata) +{ + bool *is_error_p = (bool *) userdata; +#if DEBUG + fprintf (stderr, "in my_header_callback: buffer = %.*s\n", (int) n, buffer); +#endif + if (n >= 5 && memcmp (buffer, "HTTP/", 5) == 0) + { + /* buffer contains a line of the form "HTTP/1.1 code description". + Extract the code. */ + char old = buffer[n - 1]; + buffer[n - 1] = '\0'; + int code; + if (sscanf (buffer, "%*s %d\n", &code) == 1) + { + if (code >= 400) + *is_error_p = true; + } + buffer[n - 1] = old; + } + return n; +} + +struct my_write_locals +{ + bool is_error; + char *body; + size_t body_allocated; + size_t body_start; + size_t body_end; +}; + +/* Makes room for n more bytes in l->body. */ +static void +my_write_grow (struct my_write_locals *l, size_t n) +{ + if ((l->body_end - l->body_start) + n > l->body_allocated) + { + /* Grow the buffer. */ + size_t new_allocated = (l->body_end - l->body_start) + n; + if (new_allocated < 2 * l->body_allocated) + new_allocated = 2 * l->body_allocated; + if (new_allocated < 1024) + new_allocated = 1024; + char *new_body; + if (l->body_start == 0 && l->body_end > 0) + { + new_body = (char *) realloc (l->body, new_allocated); + if (new_body == NULL) + xalloc_die (); + } + else + { + new_body = (char *) malloc (new_allocated); + if (new_body == NULL) + xalloc_die (); + memcpy (new_body, l->body + l->body_start, l->body_end - l->body_start); + free (l->body); + l->body_end = l->body_end - l->body_start; + l->body_start = 0; + } + l->body = new_body; + l->body_allocated = new_allocated; + } + else + { + /* We can keep the buffer, but may need to move the contents to the + front. */ + if (l->body_end + n > l->body_allocated) + { + memmove (l->body, l->body + l->body_start, l->body_end - l->body_start); + l->body_end = l->body_end - l->body_start; + l->body_start = 0; + } + } + /* Here l->body_end + n <= l->body_allocated. */ +} + +/* A libcurl write callback that processes a piece of response body, + depending on whether the HTTP request returned an error. */ +static size_t +my_write_callback (char *buffer, size_t one, size_t n, void *userdata) +{ + struct my_write_locals *l = (struct my_write_locals *) userdata; +#if DEBUG + fprintf (stderr, "in my_write_callback: buffer = %.*s\n", (int) n, buffer); +#endif + + /* Append the buffer's contents to the body. */ + my_write_grow (l, n); + memcpy (l->body + l->body_end, buffer, n); + l->body_end += n; + + if (!l->is_error) + { + /* Process entire lines that are in the buffer. */ + char *newline = (char *) memchr (l->body + l->body_end - n, '\n', n); + while (newline != NULL) + { + /* We have an entire line. */ + *newline = '\0'; + process_response_line (l->body + l->body_start); + l->body_start = (newline + 1) - l->body; + + newline = (char *) memchr (l->body + l->body_start, '\n', + l->body_end - l->body_start); + } + } + + return n; +} + +static void +usage () +{ + printf ("%s", "Usage: spit [OPTION...]\n\ +\n\ +Passes standard input to an ollama instance and prints the response.\n\ +\n\ +Options:\n\ + --url Specifies the ollama server's URL.\n\ + --model Specifies the model to use.\n\ +\n\ +Informative output:\n\ +\n\ + --help Show this help text.\n"); +} + +int +main (int argc, char *argv[]) +{ + /* Command-line option processing. */ + const char *url = "http://localhost:11434"; + const char *model = NULL; + bool do_help = false; + static const struct option long_options[] = + { + { "url", required_argument, NULL, CHAR_MAX + 2 }, + { "model", required_argument, NULL, CHAR_MAX + 3 }, + { "help", no_argument, NULL, CHAR_MAX + 1 }, + { NULL, 0, NULL, 0 } + }; + { + int optc; + while ((optc = getopt_long (argc, argv, "", long_options, NULL)) != EOF) + switch (optc) + { + case '\0': /* Long option. */ + break; + case CHAR_MAX + 1: /* --help */ + do_help = true; + break; + case CHAR_MAX + 2: /* --url */ + url = optarg; + break; + case CHAR_MAX + 3: /* --model */ + model = optarg; + break; + default: + fprintf (stderr, "Try 'spit --help' for more information.\n"); + exit (EXIT_FAILURE); + } + } + if (do_help) + { + usage (); + exit (EXIT_SUCCESS); + } + if (argc > optind) + { + fprintf (stderr, "spit: too many arguments\n"); + fprintf (stderr, "Try 'spit --help' for more information.\n"); + exit (EXIT_FAILURE); + } + if (model == NULL) + { + fprintf (stderr, "spit: missing --model option\n"); + exit (EXIT_FAILURE); + } + + /* Sanitize URL. */ + if (!(strlen (url) > 0 && url[strlen (url) - 1] == '/')) + { + char *new_url = malloc (strlen (url) + 1 + 1); + if (new_url == NULL) + xalloc_die (); + sprintf (new_url, "%s/", url); + url = new_url; + } + + /* Read the contents of standard input. */ + char *input = NULL; + size_t input_allocated = 0; + size_t input_length = 0; + for (;;) + { + int c = fgetc (stdin); + if (c == EOF) + break; + + if (input_length >= input_allocated) + { + size_t new_allocated = 2 * input_allocated + 1; + char *new_input = (char *) realloc (input, new_allocated); + if (new_input == NULL) + xalloc_die (); + input = new_input; + input_allocated = new_allocated; + } + input[input_length++] = c; + } + + /* Documentation of the ollama API: + */ + + /* Compose the payload. */ + struct json_object *payload = json_object_new_object (); + if (payload == NULL) + xalloc_die (); + { + struct json_object *value = json_object_new_string (model); + if (value == NULL) + xalloc_die (); + if (json_object_object_add (payload, "model", value)) + xalloc_die (); + } + { + struct json_object *value = json_object_new_string (input); + if (value == NULL) + xalloc_die (); + if (json_object_object_add (payload, "prompt", value)) + xalloc_die (); + } + const char *payload_as_string = + json_object_to_json_string_ext (payload, JSON_C_TO_STRING_PLAIN + | JSON_C_TO_STRING_NOSLASHESCAPE); + if (payload_as_string == NULL) + xalloc_die (); + + /* Make the request to the ollama server. */ + if (curl_global_init (CURL_GLOBAL_DEFAULT)) + curl_die (); + CURL *curl = curl_easy_init (); + if (!curl) + curl_die (); + { + char *target_url = malloc (strlen (url) + 12 + 1); + if (target_url == NULL) + xalloc_die (); + sprintf (target_url, "%sapi/generate", url); + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_URL, target_url); + } + + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_POST, 1L); + + { + struct curl_slist *headers = NULL; + /* Override the Content-Type header set by CURLOPT_POST. */ + headers = curl_slist_append(headers, "Content-Type: " "application/json"); + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_HTTPHEADER, headers); + } + + /* Set the payload. + Documentation: */ + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload_as_string); + + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 1L); + +#if DEBUG > 1 + /* For debugging: */ + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_VERBOSE, 1L); +#endif + +#if 0 + /* Not reliable, see . */ + curl_easy_setopt (curl, CURLOPT_FAILONERROR, 1L); +#endif + + struct my_write_locals locals; + locals.is_error = false; + locals.body = NULL; + locals.body_allocated = 0; + locals.body_start = 0; + locals.body_end = 0; + + /* Documentation: + + */ + curl_easy_setopt (curl, CURLOPT_HEADERFUNCTION, my_header_callback); + curl_easy_setopt (curl, CURLOPT_HEADERDATA, &locals.is_error); + + /* Documentation: + + */ + curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, my_write_callback); + curl_easy_setopt (curl, CURLOPT_WRITEDATA, &locals); + + /* Documentation: */ + CURLcode ret = curl_easy_perform (curl); + if (ret != CURLE_OK) + { + fprintf (stderr, "spit: curl error %u: %s\n", ret, + /* Documentation: */ + curl_easy_strerror (ret)); + exit (EXIT_FAILURE); + } + /* Documentation: */ + long status_code; + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &status_code); + if (status_code != 200) + fprintf (stderr, "Status: %ld\n", status_code); + if (locals.is_error != (status_code >= 400)) + /* The my_header_callback did not work right. */ + abort (); + if (status_code >= 400) + { + fprintf (stderr, "Body: "); + fwrite (locals.body + locals.body_start, + 1, locals.body_end - locals.body_start, + stderr); + fprintf (stderr, "\n"); + exit (1); + } + /* Most lines have already been processed through my_write_callback. + Now process the last line (without terminating newline). */ + if (locals.body_end > locals.body_start) + { + my_write_grow (&locals, 1); + *(locals.body + locals.body_end) = '\0'; + process_response_line (locals.body + locals.body_start); + } + + return 0; +} + +/* + * Local Variables: + * compile-command: "gcc -Wall -I/usr/include/json-c -O2 -o ollama-spit ollama-spit.c -lcurl -ljson-c" + * run-command: "echo 'Translate into German: "Welcome to the GNU project!"' | ./ollama-spit --model=ministral-3:14b" + * End: + */ diff --git a/gettext-tools/machine-translation/prototype/ollama-spit.go b/gettext-tools/machine-translation/prototype/ollama-spit.go new file mode 100644 index 000000000..284c995ac --- /dev/null +++ b/gettext-tools/machine-translation/prototype/ollama-spit.go @@ -0,0 +1,164 @@ +// +// Copyright (C) 2025 Free Software Foundation, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// Written by Bruno Haible , 2025. + +// This program passes an input to an ollama instance and prints the response. + +package main + +import ( + "bufio" + "bytes" + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "os" + "strings" +) + +func main() { + // Command-line option processing. + // Note: This is incompatible with GNU conventions. + // says + // "Flag parsing stops just before the first non-flag argument + // ("-" is a non-flag argument) or after the terminator "--"." + // which means that options after non-options are not reordered + // like they are by GNU getopt. + // The common workaround is to use https://github.com/spf13/pflag + // instead. + + url_option := flag.String("url", "http://localhost:11434", "the ollama server's URL") + + // Clumsy code is needed when we want an option of type string + // that has no default. The 'flag' package's String and StringVar + // functions reject a default value of nil, pushing developers + // into confusing an option with an empty string value with an + // omitted option. + var model_option *string = nil + flag.Func( "model", "the model to use", + func (s string) error { + model_option = &s + return nil + }) + + do_help_option := flag.Bool ("help", false, "this help text") + + flag.Parse() + + if *do_help_option { + fmt.Println("Usage: spit [OPTION...]") + fmt.Println() + fmt.Println("Passes standard input to an ollama instance and prints the response.") + fmt.Println() + fmt.Println("Options:") + fmt.Println(" --url Specifies the ollama server's URL.") + fmt.Println(" --model Specifies the model to use.") + fmt.Println() + fmt.Println("Informative output:") + fmt.Println() + fmt.Println(" --help Show this help text.") + os.Exit(0) + } + + if model_option == nil { + fmt.Fprintln(os.Stderr, "spit: missing --model option") + os.Exit(1) + } + + if len(flag.Args()) > 0 { + fmt.Fprintln(os.Stderr, "spit: too many arguments") + fmt.Fprintln(os.Stderr, "'Try 'spit --help' for more information.") + os.Exit(1) + } + + // Sanitize URL. + url := *url_option + if !strings.HasSuffix(url, "/") { + url = url + "/" + } + + model := *model_option + + // Read the contents of standard input. + allBytes, err := io.ReadAll(os.Stdin) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + input := string(allBytes) + + // Documentation of the ollama API: + // + + // JSON in Go is a pain: + // 1) There is no way to just create a JSON object and add properties + // to it. We are forced to either use a map[string]any (and lose + // the advantages of type checking) or create a struct that reflects + // the desired shape of the JSON object. + // 2) In this struct, fields whose name starts with a lowercase letter + // are ignored by json.Marshal! Here's the workaround syntax: + type GeneratePayload struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + } + payload := GeneratePayload { + Model: model, + Prompt: input, + } + payloadAsBytes, err := json.Marshal(payload) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + response, err := http.Post(url + "api/generate", + "application/json", bytes.NewReader(payloadAsBytes)) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if response.StatusCode != 200 { + fmt.Fprintln(os.Stderr, "Status:", response.StatusCode) + } + if response.StatusCode >= 400 { + responseBodyBytes, _ := io.ReadAll(response.Body) + fmt.Fprintln(os.Stderr, "Body:", string(responseBodyBytes)) + os.Exit(1) + } + + body := response.Body + reader := bufio.NewReader(body) + for { + line, err := reader.ReadBytes('\n') + if len(line) == 0 && err != nil { + break + } + var part map[string]any + if json.Unmarshal(line, &part) == nil { + fmt.Print(part["response"]) + } + } +} + +/* + * Local Variables: + * compile-command: "gccgo -Wall -O2 -o ollama-spit ollama-spit.go" + * run-command: "echo 'Translate into German: "Welcome to the GNU project!"' | ./ollama-spit --model=ministral-3:14b" + * End: + */ diff --git a/gettext-tools/machine-translation/prototype/ollama-spit.py b/gettext-tools/machine-translation/prototype/ollama-spit.py new file mode 100755 index 000000000..fc74f35ff --- /dev/null +++ b/gettext-tools/machine-translation/prototype/ollama-spit.py @@ -0,0 +1,130 @@ +#! /usr/bin/env python3 +# +# Copyright (C) 2025 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Written by Bruno Haible , 2025. + +# This program passes an input to an ollama instance and prints the response. +# +# Dependencies: request. + +import sys + +# Documentation: https://docs.python.org/3/library/argparse.html +import argparse + +# Documentation: https://requests.readthedocs.io/en/latest/ +import requests + +# Documentation: https://docs.python.org/3/library/json.html +import json + +def main(): + parser = argparse.ArgumentParser( + prog='spit', + usage='spit --help', + add_help=False) + + parser.add_argument('--url', + dest='url', + default='http://localhost:11434') + parser.add_argument('--model', + dest='model', + default=None, + nargs=1) + parser.add_argument('--help', '--hel', '--he', '--h', + dest='help', + default=None, + action='store_true') + # All other arguments are collected. + parser.add_argument('non_option_arguments', + nargs='*') + + # Parse the given arguments. Don't signal an error if non-option arguments + # occur between or after options. + cmdargs, unhandled = parser.parse_known_args() + + # Handle --help, ignoring all other options. + if cmdargs.help != None: + print(''' +Usage: spit [OPTION...] + +Passes standard input to an ollama instance and prints the response. + +Options: + --url Specifies the ollama server's URL. + --model Specifies the model to use. + +Informative output: + + --help Show this help text. +''') + sys.exit(0) + + # Report unhandled arguments. + for arg in unhandled: + if arg.startswith('-'): + message = '%s: Unrecognized option \'%s\'.\n' % ('spit', arg) + message += 'Try \'spit --help\' for more information.\n' + sys.stderr.write(message) + sys.exit(1) + # By now, all unhandled arguments were non-options. + cmdargs.non_option_arguments += unhandled + + if cmdargs.model == None: + sys.stderr.write('%s: missing --model option\n' % 'spit') + sys.exit(1) + + if len(cmdargs.non_option_arguments) > 0: + message = '%s: too many arguments\n' % 'spit' + message += 'Try \'spit --help\' for more information.\n' + sys.stderr.write(message) + sys.exit(1) + + # Sanitize URL. + url = cmdargs.url + if not url.endswith('/'): + url += '/' + + model = cmdargs.model[0] + + # Read the contents of standard input. + input = sys.stdin.read() + + # Documentation of the ollama API: + # + + payload = { 'model': model, 'prompt': input } + # We need the payload in JSON syntax (with double-quotes around the strings), + # not in Python syntax (with single-quotes around the strings): + payload = json.dumps(payload) + + response = requests.post(url + 'api/generate', data=payload, stream=True) + if response.status_code != 200: + print('Status:', response.status_code, file=sys.stderr) + if response.status_code >= 400: + print('Body:', response.text, file=sys.stderr) + sys.exit(1) + # Not needed any more: + #response.raise_for_status() + + for line in response.iter_lines(): + part = json.loads(line.decode('utf-8')) + print(part.get('response', ''), end='', flush=True) + + +if __name__ == '__main__': + main() diff --git a/gettext-tools/machine-translation/prototype/ollama-spit.sh b/gettext-tools/machine-translation/prototype/ollama-spit.sh new file mode 100755 index 000000000..751f9db86 --- /dev/null +++ b/gettext-tools/machine-translation/prototype/ollama-spit.sh @@ -0,0 +1,130 @@ +#! /bin/sh +# +# Copyright (C) 2025 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Written by Bruno Haible , 2025. + +# This program passes an input to an ollama instance and prints the response. +# +# Dependencies: curl, jq. + +progname=$0 + +# func_exit STATUS +# exits with a given status. +func_exit () +{ + exit $1 +} + +# func_usage +# outputs to stdout the --help usage message. +func_usage () +{ + echo "\ +Usage: spit [OPTION...] + +Passes standard input to an ollama instance and prints the response. + +Options: + --url Specifies the ollama server's URL. + --model Specifies the model to use. + +Informative output: + + --help Show this help text." +} + +# Command-line option processing. +# Removes the OPTIONS from the arguments. Sets the variables: +# - url +# - model +{ + url='http://localhost:11434' + model= + + while test $# -gt 0; do + case "$1" in + --url | --ur | --u ) + shift + if test $# = 0; then + echo "$progname: missing argument for --url" 1>&2 + func_exit 1 + fi + url="$1" + shift + ;; + --url=* ) + url=`echo "X$1" | sed -e 's/^X--url=//'` + shift + ;; + --model | --mode | --mod | --mo | --m ) + shift + if test $# = 0; then + echo "$progname: missing argument for --model" 1>&2 + func_exit 1 + fi + model="$1" + shift + ;; + --model=* ) + model=`echo "X$1" | sed -e 's/^X--model=//'` + shift + ;; + --help | --hel | --he | --h ) + func_usage + func_exit $? ;; + -- ) + # Stop option processing + shift + break ;; + -* ) + echo "$progname: unknown option $1" 1>&2 + echo "Try '$progname --help' for more information." 1>&2 + func_exit 1 ;; + * ) + break ;; + esac + done + + if test -z "$model"; then + echo "$progname: missing --model option" 1>&2 + func_exit 1 + fi + + # Sanitize URL. + case "$url" in + */) ;; + *) url="$url/" ;; + esac + + # Read the contents of standard input. + input=`cat` + input_quoted=`echo "$input" | sed -e 's|"|\\\"|g'` + + # Documentation of the ollama API: + # + + curl --no-progress-meter --no-buffer \ + -X POST "$url"api/generate \ + -d '{ "model": "'"$model"'", "prompt": "'"$input_quoted"'" }' \ + | jq --join-output --unbuffered '.response' + + # Note: Error handling is not good here. For example, when passing a + # nonexistent model name, the server answers with HTTP status 404 + # and a response body {"error":""}, that gets output + # to stdout (not stderr!) and then swallowed by 'jq'. +} diff --git a/gettext-tools/man/Makefile.am b/gettext-tools/man/Makefile.am index 6442a4c6c..9dcdd3587 100644 --- a/gettext-tools/man/Makefile.am +++ b/gettext-tools/man/Makefile.am @@ -29,6 +29,7 @@ msgcmp.x msgfmt.x msgmerge.x msgunfmt.x xgettext.x \ msgattrib.x msgcat.x msgcomm.x msgconv.x msgen.x msgexec.x msgfilter.x \ msggrep.x msginit.x msguniq.x \ recode-sr-latin.x \ +spit.x \ gettextize.x autopoint.x # Likewise. @@ -37,7 +38,8 @@ man_MAN1SRC = \ msgcmp.1 msgfmt.1 msgmerge.1 msgunfmt.1 xgettext.1 \ msgattrib.1 msgcat.1 msgcomm.1 msgconv.1 msgen.1 msgexec.1 msgfilter.1 \ msggrep.1 msginit.1 msguniq.1 \ -recode-sr-latin.1 +recode-sr-latin.1 \ +spit.1 man_MAN1WIZARD = \ gettextize.1 man_MAN1AUTOTOOLS = \ @@ -50,6 +52,7 @@ msgcmp.1.html msgfmt.1.html msgmerge.1.html msgunfmt.1.html xgettext.1.html \ msgattrib.1.html msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html \ msgexec.1.html msgfilter.1.html msggrep.1.html msginit.1.html msguniq.1.html \ recode-sr-latin.1.html \ +spit.1.html \ gettextize.1.html autopoint.1.html EXTRA_DIST += help2man $(man_aux) $(man_MANS) $(man_HTML) @@ -119,6 +122,7 @@ msggrep.1: msggrep.x ../src/msggrep.c msginit.1: msginit.x ../src/msginit.c msguniq.1: msguniq.x ../src/msguniq.c recode-sr-latin.1: recode-sr-latin.x ../src/recode-sr-latin.c +spit.1: spit.x ../src/spit.c $(man_MAN1WIZARD): help2man $(top_srcdir)/../.version progname=`echo $@ | sed -e 's/\.in$$//' -e 's/\.1$$//'`; \ @@ -162,6 +166,7 @@ msggrep.1.html: msggrep.1 msginit.1.html: msginit.1 msguniq.1.html: msguniq.1 recode-sr-latin.1.html: recode-sr-latin.1 +spit.1.html: spit.1 gettextize.1.html: gettextize.1 autopoint.1.html: autopoint.1 diff --git a/gettext-tools/man/spit.x b/gettext-tools/man/spit.x new file mode 100644 index 000000000..57d795b4e --- /dev/null +++ b/gettext-tools/man/spit.x @@ -0,0 +1,4 @@ +[NAME] +spit \- translate some text through a Large Language Model +[DESCRIPTION] +.\" Add any additional description here diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in index 42d80f67d..7623c6491 100644 --- a/gettext-tools/po/POTFILES.in +++ b/gettext-tools/po/POTFILES.in @@ -83,6 +83,7 @@ src/read-resources.c src/read-stringtable.c src/read-tcl.c src/recode-sr-latin.c +src/spit.c src/urlget.c src/write-catalog.c src/write-csharp.c diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index 38c70969d..dd424b007 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -189,6 +189,16 @@ msggrep.c Main source for the 'msggrep' program. | +-------------- The 'msgen' program ++-------------- The 'spit' program +| country-table.h +| country-table.c +| Territory names according to ISO 3166. +| spit.c +| Main source for the 'spit' program. +| spit.py.in +| The same program, as a Python script. ++-------------- The 'spit' program + po-time.h po-time.c Create time stamps for use in PO/POT files. diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 380b52f3c..49a60f478 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -30,9 +30,14 @@ bin_PROGRAMS = \ msgcmp msgfmt msgmerge msgunfmt xgettext \ msgattrib msgcat msgcomm msgconv msgen msgexec msgfilter msggrep msginit msguniq \ recode-sr-latin +if BUILD_SPIT_IN_C +bin_PROGRAMS += spit +endif noinst_PROGRAMS = hostname urlget cldr-plurals +noinst_SCRIPTS = spit.py + if INSTALL_PRIVATE_LIBRARIES # Specify that libgettextsrc should be installed in $(libdir). lib_LTLIBRARIES = libgettextsrc.la @@ -65,7 +70,7 @@ noinst_HEADERS = \ write-qt.h \ read-desktop.h write-desktop.h \ write-xml.h \ - po-time.h plural-table.h lang-table.h format.h filters.h \ + po-time.h plural-table.h lang-table.h country-table.h format.h filters.h \ xgettext.h \ if-error.h \ rc-str-list.h xg-pos.h xg-encoding.h xg-mixed-string.h xg-formatstring.h \ @@ -409,6 +414,10 @@ else msguniq_SOURCES = ../woe32dll/c++msguniq.cc endif recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c +if BUILD_SPIT_IN_C +spit_SOURCES = spit.c country-table.c +spit_CFLAGS = $(AM_CFLAGS) $(INCJSON_C) $(INCCURL) +endif hostname_SOURCES = hostname.c urlget_SOURCES = urlget.c cldr_plurals_SOURCES = cldr-plural.y cldr-plural-exp.c cldr-plurals.c @@ -527,6 +536,9 @@ msgfilter_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD) msggrep_LDADD = $(LIBGREP) libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD) msginit_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD) msguniq_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD) +if BUILD_SPIT_IN_C +spit_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(LIBJSON_C) $(LIBCURL) $(WOE32_LDADD) +endif hostname_LDADD = $(LDADD) $(GETADDRINFO_LIB) # Specify when to relink the programs. @@ -546,6 +558,9 @@ msggrep_DEPENDENCIES = $(LIBGREP) libgettextsrc.la ../gnulib-lib/libgettextlib.l msginit_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD) msguniq_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD) recode_sr_latin_DEPENDENCIES = $(OTHERPROGDEPENDENCIES) +if BUILD_SPIT_IN_C +spit_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD) +endif hostname_DEPENDENCIES = $(OTHERPROGDEPENDENCIES) urlget_DEPENDENCIES = $(OTHERPROGDEPENDENCIES) @@ -566,6 +581,9 @@ msggrep_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(bindir_c_make) msginit_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(bindir_c_make) msguniq_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(bindir_c_make) recode_sr_latin_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(bindir_c_make) +if BUILD_SPIT_IN_C +spit_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(bindir_c_make) +endif hostname_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(pkglibexecdir_c_make) urlget_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(pkglibexecdir_c_make) cldr_plurals_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=$(pkglibexecdir_c_make) @@ -586,6 +604,9 @@ msggrep_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)` msginit_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)` msguniq_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)` recode_sr_latin_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)` +if BUILD_SPIT_IN_C +spit_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)` +endif hostname_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(pkglibexecdir)` urlget_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(pkglibexecdir)` cldr_plurals_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(pkglibexecdir)` @@ -771,9 +792,13 @@ EXTRA_DIST += cldr-plural.c cldr-plural.h built-sources: $(BUILT_SOURCES) -# Special rules for installation of auxiliary programs. +# Special rules for installation of 'spit' and auxiliary programs. install-exec-local: +if !BUILD_SPIT_IN_C + $(MKDIR_P) $(DESTDIR)$(bindir) + $(INSTALL_SCRIPT) spit.py $(DESTDIR)$(bindir)/spit +endif $(MKDIR_P) $(DESTDIR)$(pkglibexecdir) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) hostname$(EXEEXT) $(DESTDIR)$(pkglibexecdir)/hostname$(EXEEXT) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) urlget$(EXEEXT) $(DESTDIR)$(pkglibexecdir)/urlget$(EXEEXT) @@ -782,16 +807,22 @@ install-exec-local: $(INSTALL_SCRIPT) $(srcdir)/project-id $(DESTDIR)$(pkglibexecdir)/project-id installdirs-local: +if !BUILD_SPIT_IN_C + $(MKDIR_P) $(DESTDIR)$(bindir) +endif $(MKDIR_P) $(DESTDIR)$(pkglibexecdir) uninstall-local: +if !BUILD_SPIT_IN_C + $(RM) $(DESTDIR)$(bindir)/spit +endif $(RM) $(DESTDIR)$(pkglibexecdir)/hostname$(EXEEXT) $(RM) $(DESTDIR)$(pkglibexecdir)/urlget$(EXEEXT) $(RM) $(DESTDIR)$(pkglibexecdir)/cldr-plurals$(EXEEXT) $(RM) $(DESTDIR)$(pkglibexecdir)/user-email $(RM) $(DESTDIR)$(pkglibexecdir)/project-id -DISTCLEANFILES += user-email +DISTCLEANFILES += spit.py user-email # Special rules for Java compilation. diff --git a/gettext-tools/src/country-table.c b/gettext-tools/src/country-table.c new file mode 100644 index 000000000..efeed2d2e --- /dev/null +++ b/gettext-tools/src/country-table.c @@ -0,0 +1,277 @@ +/* Table of territories. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + +#include + +/* Specification. */ +#include "country-table.h" + +/* Derived from ISO 3166. */ +struct country_table_entry country_table[] = + { + { "AD", "Andorra" }, + { "AE", "United Arab Emirates" }, + { "AF", "Afghanistan" }, + { "AG", "Antigua and Barbuda" }, + { "AI", "Anguilla" }, + { "AL", "Albania" }, + { "AM", "Armenia" }, + { "AO", "Angola" }, + { "AQ", "Antarctica" }, + { "AR", "Argentina" }, + { "AS", "American Samoa" }, + { "AT", "Austria" }, + { "AU", "Australia" }, + { "AW", "Aruba" }, + { "AX", "Åland Islands" }, + { "AZ", "Azerbaijan" }, + { "BA", "Bosnia and Herzegovina" }, + { "BB", "Barbados" }, + { "BD", "Bangladesh" }, + { "BE", "Belgium" }, + { "BF", "Burkina Faso" }, + { "BG", "Bulgaria" }, + { "BH", "Bahrain" }, + { "BI", "Burundi" }, + { "BJ", "Benin" }, + { "BL", "Saint Barthélemy" }, + { "BM", "Bermuda" }, + { "BN", "Brunei Darussalam" }, + { "BO", "Bolivia, Plurinational State of" }, + { "BQ", "Bonaire, Sint Eustatius and Saba" }, + { "BR", "Brazil" }, + { "BS", "Bahamas" }, + { "BT", "Bhutan" }, + { "BV", "Bouvet Island" }, + { "BW", "Botswana" }, + { "BY", "Belarus" }, + { "BZ", "Belize" }, + { "CA", "Canada" }, + { "CC", "Cocos (Keeling) Islands" }, + { "CD", "Congo, Democratic Republic of the" }, + { "CF", "Central African Republic" }, + { "CG", "Congo" }, + { "CH", "Switzerland" }, + { "CI", "Côte d'Ivoire" }, + { "CK", "Cook Islands" }, + { "CL", "Chile" }, + { "CM", "Cameroon" }, + { "CN", "China" }, + { "CO", "Colombia" }, + { "CR", "Costa Rica" }, + { "CU", "Cuba" }, + { "CV", "Cabo Verde" }, + { "CW", "Curaçao" }, + { "CX", "Christmas Island" }, + { "CY", "Cyprus" }, + { "CZ", "Czechia" }, + { "DE", "Germany" }, + { "DJ", "Djibouti" }, + { "DK", "Denmark" }, + { "DM", "Dominica" }, + { "DO", "Dominican Republic" }, + { "DZ", "Algeria" }, + { "EC", "Ecuador" }, + { "EE", "Estonia" }, + { "EG", "Egypt" }, + { "EH", "Western Sahara" }, + { "ER", "Eritrea" }, + { "ES", "Spain" }, + { "ET", "Ethiopia" }, + { "FI", "Finland" }, + { "FJ", "Fiji" }, + { "FK", "Falkland Islands (Malvinas)" }, + { "FM", "Micronesia, Federated States of" }, + { "FO", "Faroe Islands" }, + { "FR", "France" }, + { "GA", "Gabon" }, + { "GB", "United Kingdom of Great Britain and Northern Ireland" }, + { "GD", "Grenada" }, + { "GE", "Georgia" }, + { "GF", "French Guiana" }, + { "GG", "Guernsey" }, + { "GH", "Ghana" }, + { "GI", "Gibraltar" }, + { "GL", "Greenland" }, + { "GM", "Gambia" }, + { "GN", "Guinea" }, + { "GP", "Guadeloupe" }, + { "GQ", "Equatorial Guinea" }, + { "GR", "Greece" }, + { "GS", "South Georgia and the South Sandwich Islands" }, + { "GT", "Guatemala" }, + { "GU", "Guam" }, + { "GW", "Guinea-Bissau" }, + { "GY", "Guyana" }, + { "HK", "Hong Kong" }, + { "HM", "Heard Island and McDonald Islands" }, + { "HN", "Honduras" }, + { "HR", "Croatia" }, + { "HT", "Haiti" }, + { "HU", "Hungary" }, + { "ID", "Indonesia" }, + { "IE", "Ireland" }, + { "IL", "Israel" }, + { "IM", "Isle of Man" }, + { "IN", "India" }, + { "IO", "British Indian Ocean Territory" }, + { "IQ", "Iraq" }, + { "IR", "Iran, Islamic Republic of" }, + { "IS", "Iceland" }, + { "IT", "Italy" }, + { "JE", "Jersey" }, + { "JM", "Jamaica" }, + { "JO", "Jordan" }, + { "JP", "Japan" }, + { "KE", "Kenya" }, + { "KG", "Kyrgyzstan" }, + { "KH", "Cambodia" }, + { "KI", "Kiribati" }, + { "KM", "Comoros" }, + { "KN", "Saint Kitts and Nevis" }, + { "KP", "Korea, Democratic People's Republic of" }, + { "KR", "Korea, Republic of" }, + { "KW", "Kuwait" }, + { "KY", "Cayman Islands" }, + { "KZ", "Kazakhstan" }, + { "LA", "Lao People's Democratic Republic" }, + { "LB", "Lebanon" }, + { "LC", "Saint Lucia" }, + { "LI", "Liechtenstein" }, + { "LK", "Sri Lanka" }, + { "LR", "Liberia" }, + { "LS", "Lesotho" }, + { "LT", "Lithuania" }, + { "LU", "Luxembourg" }, + { "LV", "Latvia" }, + { "LY", "Libya" }, + { "MA", "Morocco" }, + { "MC", "Monaco" }, + { "MD", "Moldova, Republic of" }, + { "ME", "Montenegro" }, + { "MF", "Saint Martin (French part)" }, + { "MG", "Madagascar" }, + { "MH", "Marshall Islands" }, + { "MK", "North Macedonia" }, + { "ML", "Mali" }, + { "MM", "Myanmar" }, + { "MN", "Mongolia" }, + { "MO", "Macao" }, + { "MP", "Northern Mariana Islands" }, + { "MQ", "Martinique" }, + { "MR", "Mauritania" }, + { "MS", "Montserrat" }, + { "MT", "Malta" }, + { "MU", "Mauritius" }, + { "MV", "Maldives" }, + { "MW", "Malawi" }, + { "MX", "Mexico" }, + { "MY", "Malaysia" }, + { "MZ", "Mozambique" }, + { "NA", "Namibia" }, + { "NC", "New Caledonia" }, + { "NE", "Niger" }, + { "NF", "Norfolk Island" }, + { "NG", "Nigeria" }, + { "NI", "Nicaragua" }, + { "NL", "Netherlands, Kingdom of the" }, + { "NO", "Norway" }, + { "NP", "Nepal" }, + { "NR", "Nauru" }, + { "NU", "Niue" }, + { "NZ", "New Zealand" }, + { "OM", "Oman" }, + { "PA", "Panama" }, + { "PE", "Peru" }, + { "PF", "French Polynesia" }, + { "PG", "Papua New Guinea" }, + { "PH", "Philippines" }, + { "PK", "Pakistan" }, + { "PL", "Poland" }, + { "PM", "Saint Pierre and Miquelon" }, + { "PN", "Pitcairn" }, + { "PR", "Puerto Rico" }, + { "PS", "Palestine, State of" }, + { "PT", "Portugal" }, + { "PW", "Palau" }, + { "PY", "Paraguay" }, + { "QA", "Qatar" }, + { "RE", "Réunion" }, + { "RO", "Romania" }, + { "RS", "Serbia" }, + { "RU", "Russian Federation" }, + { "RW", "Rwanda" }, + { "SA", "Saudi Arabia" }, + { "SB", "Solomon Islands" }, + { "SC", "Seychelles" }, + { "SD", "Sudan" }, + { "SE", "Sweden" }, + { "SG", "Singapore" }, + { "SH", "Saint Helena, Ascension and Tristan da Cunha" }, + { "SI", "Slovenia" }, + { "SJ", "Svalbard and Jan Mayen" }, + { "SK", "Slovakia" }, + { "SL", "Sierra Leone" }, + { "SM", "San Marino" }, + { "SN", "Senegal" }, + { "SO", "Somalia" }, + { "SR", "Suriname" }, + { "SS", "South Sudan" }, + { "ST", "Sao Tome and Principe" }, + { "SV", "El Salvador" }, + { "SX", "Sint Maarten (Dutch part)" }, + { "SY", "Syrian Arab Republic" }, + { "SZ", "Eswatini" }, + { "TC", "Turks and Caicos Islands" }, + { "TD", "Chad" }, + { "TF", "French Southern Territories" }, + { "TG", "Togo" }, + { "TH", "Thailand" }, + { "TJ", "Tajikistan" }, + { "TK", "Tokelau" }, + { "TL", "Timor-Leste" }, + { "TM", "Turkmenistan" }, + { "TN", "Tunisia" }, + { "TO", "Tonga" }, + { "TR", "Türkiye" }, + { "TT", "Trinidad and Tobago" }, + { "TV", "Tuvalu" }, + { "TW", "Taiwan, Province of China" }, + { "TZ", "Tanzania, United Republic of" }, + { "UA", "Ukraine" }, + { "UG", "Uganda" }, + { "UM", "United States Minor Outlying Islands" }, + { "US", "United States of America" }, + { "UY", "Uruguay" }, + { "UZ", "Uzbekistan" }, + { "VA", "Holy See" }, + { "VC", "Saint Vincent and the Grenadines" }, + { "VE", "Venezuela, Bolivarian Republic of" }, + { "VG", "Virgin Islands (British)" }, + { "VI", "Virgin Islands (U.S.)" }, + { "VN", "Viet Nam" }, + { "VU", "Vanuatu" }, + { "WF", "Wallis and Futuna" }, + { "WS", "Samoa" }, + { "YE", "Yemen" }, + { "YT", "Mayotte" }, + { "ZA", "South Africa" }, + { "ZM", "Zambia" }, + { "ZW", "Zimbabwe" } + }; +const size_t country_table_size = sizeof (country_table) / sizeof (country_table[0]); diff --git a/gettext-tools/src/country-table.h b/gettext-tools/src/country-table.h new file mode 100644 index 000000000..0f80a6825 --- /dev/null +++ b/gettext-tools/src/country-table.h @@ -0,0 +1,45 @@ +/* Table of territories. + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + +#ifndef _COUNTRY_TABLE_H +#define _COUNTRY_TABLE_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct country_table_entry +{ + const char *code; + const char *english; +}; + +extern LIBGETTEXTSRC_DLL_VARIABLE struct country_table_entry country_table[]; +extern LIBGETTEXTSRC_DLL_VARIABLE const size_t country_table_size; + + +#ifdef __cplusplus +} +#endif + + +#endif /* _COUNTRY_TABLE_H */ diff --git a/gettext-tools/src/spit.c b/gettext-tools/src/spit.c new file mode 100644 index 000000000..df2b08a81 --- /dev/null +++ b/gettext-tools/src/spit.c @@ -0,0 +1,666 @@ +/* + * Copyright (C) 2025 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Written by Bruno Haible , 2025. + */ + +/* + * This program passes an input to an ollama instance and prints the response. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* We use JSON-C. + It is multithread-safe, as long as we don't use any of the json_global_* + API functions, nor the json_util_get_last_err function. + Documentation: + */ +#include + +/* We use libcurl. + Documentation: + */ +#include + +#include +#include "options.h" +#include "closeout.h" +#include "progname.h" +#include "relocatable.h" +#include "basename-lgpl.h" +#include "xalloc.h" +#include "xvasprintf.h" +#include "read-file.h" +#include "full-write.h" +#include "spawn-pipe.h" +#include "wait-process.h" +#include "lang-table.h" +#include "country-table.h" +#include "propername.h" +#include "gettext.h" + +#define _(str) gettext (str) + + +/* Returns the English name of a language (lowercase ISO 639 code), + or NULL if unknown. */ +static const char * +englishname_of_language (const char *language) +{ + for (size_t i = 0; i < language_table_size; i++) + if (strcmp (language_table[i].code, language) == 0) + return language_table[i].english; + + return NULL; +} + +/* Returns the English name of a country (uppercase ISO 3166 code), + or NULL if unknown. */ +static const char * +englishname_of_country (const char *country) +{ + for (size_t i = 0; i < country_table_size; i++) + if (strcmp (country_table[i].code, country) == 0) + return country_table[i].english; + + return NULL; +} + +/* Returns a name or description of a catalog name. */ +static const char * +language_in_english (const char *catalogname) +{ + const char *underscore = strchr (catalogname, '_'); + if (underscore != NULL) + { + /* Treat a few cases specially. */ + for (size_t i = 0; i < language_variant_table_size; i++) + if (strcmp (language_variant_table[i].code, catalogname) == 0) + return language_variant_table[i].english; + + /* Decompose "ll_CC" into "ll" and "CC". */ + char *language = xstrdup (catalogname); + language[underscore - catalogname] = '\0'; + + const char *country = underscore + 1; + + const char *english_language = englishname_of_language (language); + if (english_language != NULL) + { + const char *english_country = englishname_of_country (country); + if (english_country != NULL) + return xasprintf ("%s (as spoken in %s)", english_language, english_country); + else + return english_language; + } + else + return catalogname; + } + else + { + /* It's a simple language name. */ + const char *english_language = englishname_of_language (catalogname); + if (english_language != NULL) + return english_language; + else + return catalogname; + } +} + +static void +curl_die () +{ + error (EXIT_FAILURE, 0, "%s", _("curl error")); +} + +static void +process_response_line (const char *line, int out_fd) +{ + /* Note: As of json-c version 0.15, the jerrno value is unreliable. + See + and . + json-c version 0.17 introduces json_tokener_error_memory, but its value + changes in version 0.18. */ + struct json_object *j; +#if JSON_C_MAJOR_VERSION > 0 || (JSON_C_MAJOR_VERSION == 0 && JSON_C_MINOR_VERSION >= 18) + enum json_tokener_error jerrno = json_tokener_error_memory; + j = json_tokener_parse_verbose (line, &jerrno); + if (j == NULL && jerrno == json_tokener_error_memory) + xalloc_die (); +#else + j = json_tokener_parse (line); +#endif + /* Ignore an empty line. */ + if (j != NULL) + { + /* We expect a JSON object. */ + if (json_object_is_type (j, json_type_object)) + { + /* Output its "response" property. */ + const char *prop = + json_object_get_string(json_object_object_get (j, "response")); + if (prop != NULL) + { + size_t prop_length = strlen (prop); + if (full_write (out_fd, prop, prop_length) < prop_length) + if (errno != EPIPE) + error (EXIT_FAILURE, errno, _("write to subprocess failed")); + } + } + } +} + +/* A libcurl header callback that determines, during curl_easy_perform, + whether the HTTP request returned an error. */ +static size_t +my_header_callback (char *buffer, size_t one, size_t n, void *userdata) +{ + bool *is_error_p = (bool *) userdata; +#if DEBUG + fprintf (stderr, "in my_header_callback: buffer = %.*s\n", (int) n, buffer); +#endif + if (n >= 5 && memcmp (buffer, "HTTP/", 5) == 0) + { + /* buffer contains a line of the form "HTTP/1.1 code description". + Extract the code. */ + char old = buffer[n - 1]; + buffer[n - 1] = '\0'; + int code; + if (sscanf (buffer, "%*s %d\n", &code) == 1) + { + if (code >= 400) + *is_error_p = true; + } + buffer[n - 1] = old; + } + return n; +} + +struct my_write_locals +{ + int out_fd; + bool is_error; + char *body; + size_t body_allocated; + size_t body_start; + size_t body_end; +}; + +/* Makes room for n more bytes in l->body. */ +static void +my_write_grow (struct my_write_locals *l, size_t n) +{ + if ((l->body_end - l->body_start) + n > l->body_allocated) + { + /* Grow the buffer. */ + size_t new_allocated = (l->body_end - l->body_start) + n; + if (new_allocated < 2 * l->body_allocated) + new_allocated = 2 * l->body_allocated; + if (new_allocated < 1024) + new_allocated = 1024; + char *new_body; + if (l->body_start == 0 && l->body_end > 0) + { + new_body = (char *) realloc (l->body, new_allocated); + if (new_body == NULL) + xalloc_die (); + } + else + { + new_body = (char *) malloc (new_allocated); + if (new_body == NULL) + xalloc_die (); + memcpy (new_body, l->body + l->body_start, l->body_end - l->body_start); + free (l->body); + l->body_end = l->body_end - l->body_start; + l->body_start = 0; + } + l->body = new_body; + l->body_allocated = new_allocated; + } + else + { + /* We can keep the buffer, but may need to move the contents to the + front. */ + if (l->body_end + n > l->body_allocated) + { + memmove (l->body, l->body + l->body_start, l->body_end - l->body_start); + l->body_end = l->body_end - l->body_start; + l->body_start = 0; + } + } + /* Here l->body_end + n <= l->body_allocated. */ +} + +/* A libcurl write callback that processes a piece of response body, + depending on whether the HTTP request returned an error. */ +static size_t +my_write_callback (char *buffer, size_t one, size_t n, void *userdata) +{ + struct my_write_locals *l = (struct my_write_locals *) userdata; +#if DEBUG + fprintf (stderr, "in my_write_callback: buffer = %.*s\n", (int) n, buffer); +#endif + + /* Append the buffer's contents to the body. */ + my_write_grow (l, n); + memcpy (l->body + l->body_end, buffer, n); + l->body_end += n; + + if (!l->is_error) + { + /* Process entire lines that are in the buffer. */ + char *newline = (char *) memchr (l->body + l->body_end - n, '\n', n); + while (newline != NULL) + { + /* We have an entire line. */ + *newline = '\0'; + process_response_line (l->body + l->body_start, l->out_fd); + l->body_start = (newline + 1) - l->body; + + newline = (char *) memchr (l->body + l->body_start, '\n', + l->body_end - l->body_start); + } + } + + return n; +} + +/* Make the HTTP POST request to the given URL, sending its output + to the file descriptor FD. */ +static void +do_request (const char *url, const char *payload_as_string, int fd) +{ + if (curl_global_init (CURL_GLOBAL_DEFAULT)) + curl_die (); + CURL *curl = curl_easy_init (); + if (!curl) + curl_die (); + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_URL, url); + + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_POST, 1L); + + { + struct curl_slist *headers = NULL; + /* Override the Content-Type header set by CURLOPT_POST. */ + headers = curl_slist_append(headers, "Content-Type: " "application/json"); + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_HTTPHEADER, headers); + } + + /* Set the payload. + Documentation: */ + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload_as_string); + + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 1L); + +#if DEBUG > 1 + /* For debugging: */ + /* Documentation: */ + curl_easy_setopt (curl, CURLOPT_VERBOSE, 1L); +#endif + +#if 0 + /* Not reliable, see . */ + curl_easy_setopt (curl, CURLOPT_FAILONERROR, 1L); +#endif + + struct my_write_locals locals; + locals.out_fd = fd; + locals.is_error = false; + locals.body = NULL; + locals.body_allocated = 0; + locals.body_start = 0; + locals.body_end = 0; + + /* Documentation: + + */ + curl_easy_setopt (curl, CURLOPT_HEADERFUNCTION, my_header_callback); + curl_easy_setopt (curl, CURLOPT_HEADERDATA, &locals.is_error); + + /* Documentation: + + */ + curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, my_write_callback); + curl_easy_setopt (curl, CURLOPT_WRITEDATA, &locals); + + /* Documentation: */ + CURLcode ret = curl_easy_perform (curl); + if (ret != CURLE_OK) + error (EXIT_FAILURE, 0, _("curl error %u: %s"), ret, + /* Documentation: */ + curl_easy_strerror (ret)); + + /* Documentation: */ + long status_code; + curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &status_code); + if (status_code != 200) + fprintf (stderr, "Status: %ld\n", status_code); + if (locals.is_error != (status_code >= 400)) + /* The my_header_callback did not work right. */ + abort (); + if (status_code >= 400) + { + /* In this case, print the response body to stderr, not to fd. */ + fprintf (stderr, "Body: "); + fwrite (locals.body + locals.body_start, + 1, locals.body_end - locals.body_start, + stderr); + fprintf (stderr, "\n"); + exit (EXIT_FAILURE); + } + + /* Most lines have already been processed through my_write_callback. + Now process the last line (without terminating newline). */ + if (locals.body_end > locals.body_start) + { + my_write_grow (&locals, 1); + *(locals.body + locals.body_end) = '\0'; + process_response_line (locals.body + locals.body_start, locals.out_fd); + } +} + +/* Display usage information and exit. */ +static void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try '%s --help' for more information.\n"), + program_name); + else + { + printf (_("\ +Usage: %s [OPTION...]\n"), + program_name); + printf ("\n"); + printf (_("\ +Passes standard input to a Large Language Model (LLM) instance and prints\n\ +the response.\n\ +With the %s option, it translates standard input to the specified language\n\ +through a Large Language Model (LLM) and prints the translation.\n"), + "--to"); + printf ("\n"); + printf (_("\ +Warning: The output might not be what you expect.\n\ +It might be of the wrong form, be of poor quality, or reflect some biases.\n")); + printf ("\n"); + printf (_("\ +Options:\n")); + printf (_("\ + --species=TYPE Specifies the type of LLM. The default and only\n\ + valid value is '%s'.\n"), + "ollama"); + printf (_("\ + --url=URL Specifies the URL of the server that runs the LLM.\n")); + printf (_("\ + -m, --model=MODEL Specifies the model to use.\n")); + printf (_("\ + --to=LANGUAGE Specifies the target language.\n")); + printf (_("\ + --prompt=TEXT Specifies the prompt to use before standard input.\n\ + This option overrides the --to option.\n")); + printf (_("\ + --postprocess=COMMAND Specifies a command to post-process the output.\n")); + printf ("\n"); + printf (_("\ +Informative output:\n")); + printf ("\n"); + printf (_("\ + -h, --help Display this help and exit.\n")); + printf (_("\ + -V, --version Output version information and exit.\n")); + printf ("\n"); + /* TRANSLATORS: The first placeholder is the web address of the Savannah + project of this package. The second placeholder is the bug-reporting + email address for this package. Please add _another line_ saying + "Report translation bugs to <...>\n" with the address for translation + bugs (typically your translation team's web or email address). */ + printf (_("\ +Report bugs in the bug tracker at <%s>\n\ +or by email to <%s>.\n"), + "https://savannah.gnu.org/projects/gettext", + "bug-gettext@gnu.org"); + } + + exit (status); +} + +int +main (int argc, char **argv) +{ + /* Set program name for messages. */ + set_program_name (argv[0]); + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, relocate (LOCALEDIR)); + bindtextdomain ("gnulib", relocate (GNULIB_LOCALEDIR)); + textdomain (PACKAGE); + + /* Ensure that write errors on stdout are detected. */ + atexit (close_stdout); + + /* Default values for command line options. */ + bool do_help = false; + bool do_version = false; + const char *species = "ollama"; + const char *url = "http://localhost:11434"; + const char *model = NULL; + const char *to_language = NULL; + const char *prompt = NULL; + const char *postprocess = NULL; + + /* Parse command line options. */ + BEGIN_ALLOW_OMITTING_FIELD_INITIALIZERS + static const struct program_option options[] = + { + { "help", 'h', no_argument }, + { "model", 'm', required_argument }, + { "postprocess", CHAR_MAX + 5, required_argument }, + { "prompt", CHAR_MAX + 4, required_argument }, + { "species", CHAR_MAX + 1, required_argument }, + { "to", CHAR_MAX + 3, required_argument }, + { "url", CHAR_MAX + 2, required_argument }, + { "version", 'V', no_argument }, + }; + END_ALLOW_OMITTING_FIELD_INITIALIZERS + start_options (argc, argv, options, MOVE_OPTIONS_FIRST, 0); + { + int opt; + while ((opt = get_next_option ()) != -1) + switch (opt) + { + case '\0': /* Long option with key == 0. */ + break; + + case 'h': /* --help */ + do_help = true; + break; + + case 'V': /* --version */ + do_version = true; + break; + + case CHAR_MAX + 1: /* --species */ + species = optarg; + break; + + case CHAR_MAX + 2: /* --url */ + url = optarg; + break; + + case 'm': /* --model */ + model = optarg; + break; + + case CHAR_MAX + 3: /* --to */ + to_language = optarg; + break; + + case CHAR_MAX + 4: /* --prompt */ + prompt = optarg; + break; + + case CHAR_MAX + 5: /* --postprocess */ + postprocess = optarg; + break; + + default: + usage (EXIT_FAILURE); + break; + } + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", last_component (program_name), + PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +License GPLv3+: GNU GPL version 3 or later <%s>\n\ +This is free software: you are free to change and redistribute it.\n\ +There is NO WARRANTY, to the extent permitted by law.\n\ +"), + "2025", "https://gnu.org/licenses/gpl.html"); + printf (_("Written by %s.\n"), proper_name ("Bruno Haible")); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test for extraneous arguments. */ + if (optind != argc) + error (EXIT_FAILURE, 0, _("too many arguments")); + + /* Check --species option. */ + if (strcmp (species, "ollama") != 0) + error (EXIT_FAILURE, 0, _("invalid value for %s option: %s"), + "--species", species); + + /* Check --model option. */ + if (model == NULL) + error (EXIT_FAILURE, 0, _("missing %s option"), + "--model"); + + /* Sanitize URL. */ + if (!(strlen (url) > 0 && url[strlen (url) - 1] == '/')) + url = xasprintf ("%s/", url); + + /* Read the contents of standard input. */ + errno = 0; + size_t input_length; + char *input = fread_file (stdin, 0, &input_length); + if (input == NULL) + error (EXIT_FAILURE, errno, _("error reading standard input")); + + /* Compute a default prompt. */ + if (prompt == NULL && to_language != NULL) + prompt = xasprintf ("Translate into %s:", language_in_english (to_language)); + + /* Prepend the prompt. */ + if (prompt != NULL) + input = xasprintf ("%s\n%s", prompt, input); + + /* Documentation of the ollama API: + */ + + url = xasprintf ("%sapi/generate", url); + + /* Compose the payload. */ + struct json_object *payload = json_object_new_object (); + if (payload == NULL) + xalloc_die (); + { + struct json_object *value = json_object_new_string (model); + if (value == NULL) + xalloc_die (); + if (json_object_object_add (payload, "model", value)) + xalloc_die (); + } + { + struct json_object *value = json_object_new_string (input); + if (value == NULL) + xalloc_die (); + if (json_object_object_add (payload, "prompt", value)) + xalloc_die (); + } + const char *payload_as_string = + json_object_to_json_string_ext (payload, JSON_C_TO_STRING_PLAIN + | JSON_C_TO_STRING_NOSLASHESCAPE); + if (payload_as_string == NULL) + xalloc_die (); + + /* Make the request to the ollama server. */ + if (postprocess != NULL) + { + /* Open a pipe to a subprocess. */ + const char *sub_argv[4]; + sub_argv[0] = BOURNE_SHELL; + sub_argv[1] = "-c"; + sub_argv[2] = postprocess; + sub_argv[3] = NULL; + int fd[1]; + pid_t child = create_pipe_out (BOURNE_SHELL, BOURNE_SHELL, sub_argv, NULL, + NULL, NULL, false, true, true, fd); + + /* Ignore SIGPIPE here. We don't care if the subprocesses terminates + successfully without having read all of the input that we feed it. */ + void (*orig_sigpipe_handler)(int); + orig_sigpipe_handler = signal (SIGPIPE, SIG_IGN); + + do_request (url, payload_as_string, fd[0]); + + close (fd[0]); + + signal (SIGPIPE, orig_sigpipe_handler); + + /* Remove zombie process from process list, and retrieve exit status. */ + int exitstatus = + wait_subprocess (child, BOURNE_SHELL, true, false, true, true, NULL); + + return exitstatus; + } + else + { + do_request (url, payload_as_string, STDOUT_FILENO); + + return EXIT_SUCCESS; + } +} + +/* + * Local Variables: + * run-command: "echo 'Translate into German: "Welcome to the GNU project!"' | ./spit --model=ministral-3:14b" + * End: + */ diff --git a/gettext-tools/src/spit.py.in b/gettext-tools/src/spit.py.in new file mode 100644 index 000000000..bf988baa1 --- /dev/null +++ b/gettext-tools/src/spit.py.in @@ -0,0 +1,800 @@ +#! /usr/bin/env python3 +# +# Copyright (C) 2001-2025 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Written by Bruno Haible , 2025. + +# This program passes an input to an ollama instance and prints the response. +# +# Dependencies: request. + +import sys + +# Documentation: https://docs.python.org/3/library/argparse.html +import argparse + +# Documentation: https://requests.readthedocs.io/en/latest/ +import requests + +# Documentation: https://docs.python.org/3/library/json.html +import json + +# Documentation: https://docs.python.org/3/library/subprocess.html +import subprocess + +# Converted from lang-table.c: +language_table = { + "aa": "Afar", + "ab": "Abkhazian", + "ace": "Achinese", + "ae": "Avestan", + "af": "Afrikaans", + "ak": "Akan", + "am": "Amharic", + "an": "Aragonese", + "ang": "Old English", + "ar": "Arabic", + "arn": "Mapudungun", + "as": "Assamese", + "ast": "Asturian", + "av": "Avaric", + "awa": "Awadhi", + "ay": "Aymara", + "az": "Azerbaijani", + "ba": "Bashkir", + "bal": "Baluchi", + "ban": "Balinese", + "be": "Belarusian", + "bej": "Beja", + "bem": "Bemba", + "bg": "Bulgarian", + "bh": "Bihari", + "bho": "Bhojpuri", + "bi": "Bislama", + "bik": "Bikol", + "bin": "Bini", + "bm": "Bambara", + "bn": "Bengali", + "bo": "Tibetan", + "br": "Breton", + "bs": "Bosnian", + "bug": "Buginese", + "ca": "Catalan", + "ce": "Chechen", + "ceb": "Cebuano", + "ch": "Chamorro", + "co": "Corsican", + "cr": "Cree", + "crh": "Crimean Tatar", + "cs": "Czech", + "csb": "Kashubian", + "cu": "Church Slavic", + "cv": "Chuvash", + "cy": "Welsh", + "da": "Danish", + "de": "German", + "din": "Dinka", + "doi": "Dogri", + "dsb": "Lower Sorbian", + "dv": "Divehi", + "dz": "Dzongkha", + "ee": "Ewe", + "el": "Greek", + "en": "English", + "eo": "Esperanto", + "es": "Spanish", + "et": "Estonian", + "eu": "Basque", + "fa": "Persian", + "ff": "Fulah", + "fi": "Finnish", + "fil": "Filipino", + "fj": "Fijian", + "fo": "Faroese", + "fon": "Fon", + "fr": "French", + "fur": "Friulian", + "fy": "Western Frisian", + "ga": "Irish", + "gd": "Scottish Gaelic", + "gl": "Galician", + "gn": "Guarani", + "gon": "Gondi", + "gsw": "Swiss German", # can also be "Alsatian" + "gu": "Gujarati", + "gv": "Manx", + "ha": "Hausa", + "he": "Hebrew", + "hi": "Hindi", + "hil": "Hiligaynon", + "hmn": "Hmong", + "ho": "Hiri Motu", + "hr": "Croatian", + "hsb": "Upper Sorbian", + "ht": "Haitian", + "hu": "Hungarian", + "hy": "Armenian", + "hz": "Herero", + "ia": "Interlingua", + "id": "Indonesian", + "ie": "Interlingue", + "ig": "Igbo", + "ii": "Sichuan Yi", + "ik": "Inupiak", + "ilo": "Iloko", + "is": "Icelandic", + "it": "Italian", + "iu": "Inuktitut", + "ja": "Japanese", + "jab": "Hyam", + "jv": "Javanese", + "ka": "Georgian", + "kab": "Kabyle", + "kaj": "Jju", + "kam": "Kamba", + "kbd": "Kabardian", + "kcg": "Tyap", + "kdm": "Kagoma", + "kg": "Kongo", + "ki": "Kikuyu", + "kj": "Kuanyama", + "kk": "Kazakh", + "kl": "Kalaallisut", + "km": "Central Khmer", + "kmb": "Kimbundu", + "kn": "Kannada", + "ko": "Korean", + "kr": "Kanuri", + "kru": "Kurukh", + "ks": "Kashmiri", + "ku": "Kurdish", + "kv": "Komi", + "kw": "Cornish", + "ky": "Kirghiz", + "kok": "Konkani", + "la": "Latin", + "lb": "Letzeburgesch", + "lg": "Ganda", + "li": "Limburgish", + "ln": "Lingala", + "lo": "Laotian", + "lt": "Lithuanian", + "lu": "Luba-Katanga", + "lua": "Luba-Lulua", + "luo": "Luo", + "lv": "Latvian", + "mad": "Madurese", + "mag": "Magahi", + "mai": "Maithili", + "mak": "Makasar", + "man": "Mandingo", + "men": "Mende", + "mg": "Malagasy", + "mh": "Marshallese", + "mi": "Maori", + "min": "Minangkabau", + "mk": "Macedonian", + "ml": "Malayalam", + "mn": "Mongolian", + "mni": "Manipuri", + "mo": "Moldavian", + "moh": "Mohawk", + "mos": "Mossi", + "mr": "Marathi", + "ms": "Malay", + "mt": "Maltese", + "mwr": "Marwari", + "my": "Burmese", + "myn": "Mayan", + "na": "Nauru", + "nap": "Neapolitan", + "nah": "Nahuatl", + "nb": "Norwegian Bokmal", + "nd": "North Ndebele", + "nds": "Low Saxon", + "ne": "Nepali", + "ng": "Ndonga", + "nl": "Dutch", + "nn": "Norwegian Nynorsk", + "no": "Norwegian", + "nr": "South Ndebele", + "nso": "Northern Sotho", + "nv": "Navajo", + "ny": "Nyanja", + "nym": "Nyamwezi", + "nyn": "Nyankole", + "oc": "Occitan", + "oj": "Ojibwa", + "om": "(Afan) Oromo", + "or": "Oriya", + "os": "Ossetian", + "pa": "Punjabi", + "pag": "Pangasinan", + "pam": "Pampanga", + "pap": "Papiamento", + "pbb": "Páez", + "pi": "Pali", + "pl": "Polish", + "ps": "Pashto", + "pt": "Portuguese", + "qu": "Quechua", + "raj": "Rajasthani", + "rm": "Romansh", + "rn": "Kirundi", + "ro": "Romanian", + "ru": "Russian", + "rw": "Kinyarwanda", + "sa": "Sanskrit", + "sah": "Yakut", + "sas": "Sasak", + "sat": "Santali", + "sc": "Sardinian", + "scn": "Sicilian", + "sd": "Sindhi", + "se": "Northern Sami", + "sg": "Sango", + "shn": "Shan", + "si": "Sinhala", + "sid": "Sidamo", + "sk": "Slovak", + "sl": "Slovenian", + "sm": "Samoan", + "sma": "Southern Sami", + "smj": "Lule Sami", + "smn": "Inari Sami", + "sms": "Skolt Sami", + "sn": "Shona", + "so": "Somali", + "sq": "Albanian", + "sr": "Serbian", + "srr": "Serer", + "ss": "Siswati", + "st": "Sesotho", + "su": "Sundanese", + "suk": "Sukuma", + "sus": "Susu", + "sv": "Swedish", + "sw": "Swahili", + "ta": "Tamil", + "te": "Telugu", + "tem": "Timne", + "tet": "Tetum", + "tg": "Tajik", + "th": "Thai", + "ti": "Tigrinya", + "tiv": "Tiv", + "tk": "Turkmen", + "tl": "Tagalog", + "tn": "Setswana", + "to": "Tonga", + "tr": "Turkish", + "ts": "Tsonga", + "tt": "Tatar", + "tum": "Tumbuka", + "tw": "Twi", + "ty": "Tahitian", + "ug": "Uighur", + "uk": "Ukrainian", + "umb": "Umbundu", + "ur": "Urdu", + "uz": "Uzbek", + "ve": "Venda", + "vi": "Vietnamese", + "vo": "Volapuk", + "wal": "Walamo", + "war": "Waray", + "wen": "Sorbian", + "wo": "Wolof", + "xh": "Xhosa", + "yao": "Yao", + "yi": "Yiddish", + "yo": "Yoruba", + "za": "Zhuang", + "zh": "Chinese", + "zu": "Zulu", + "zap": "Zapotec", +} +language_variant_table = { + "de_AT": "Austrian", + "en_GB": "English (British)", + "es_AR": "Argentinian", + "es_IC": "Spanish (Canary Islands)", + "pt_BR": "Brazilian Portuguese", + "zh_CN": "Chinese (simplified)", + "zh_HK": "Chinese (Hong Kong)", + "zh_TW": "Chinese (traditional)", +} + +# Converted from country-table.c: +country_table = { + "AD": "Andorra", + "AE": "United Arab Emirates", + "AF": "Afghanistan", + "AG": "Antigua and Barbuda", + "AI": "Anguilla", + "AL": "Albania", + "AM": "Armenia", + "AO": "Angola", + "AQ": "Antarctica", + "AR": "Argentina", + "AS": "American Samoa", + "AT": "Austria", + "AU": "Australia", + "AW": "Aruba", + "AX": "Åland Islands", + "AZ": "Azerbaijan", + "BA": "Bosnia and Herzegovina", + "BB": "Barbados", + "BD": "Bangladesh", + "BE": "Belgium", + "BF": "Burkina Faso", + "BG": "Bulgaria", + "BH": "Bahrain", + "BI": "Burundi", + "BJ": "Benin", + "BL": "Saint Barthélemy", + "BM": "Bermuda", + "BN": "Brunei Darussalam", + "BO": "Bolivia, Plurinational State of", + "BQ": "Bonaire, Sint Eustatius and Saba", + "BR": "Brazil", + "BS": "Bahamas", + "BT": "Bhutan", + "BV": "Bouvet Island", + "BW": "Botswana", + "BY": "Belarus", + "BZ": "Belize", + "CA": "Canada", + "CC": "Cocos (Keeling) Islands", + "CD": "Congo, Democratic Republic of the", + "CF": "Central African Republic", + "CG": "Congo", + "CH": "Switzerland", + "CI": "Côte d'Ivoire", + "CK": "Cook Islands", + "CL": "Chile", + "CM": "Cameroon", + "CN": "China", + "CO": "Colombia", + "CR": "Costa Rica", + "CU": "Cuba", + "CV": "Cabo Verde", + "CW": "Curaçao", + "CX": "Christmas Island", + "CY": "Cyprus", + "CZ": "Czechia", + "DE": "Germany", + "DJ": "Djibouti", + "DK": "Denmark", + "DM": "Dominica", + "DO": "Dominican Republic", + "DZ": "Algeria", + "EC": "Ecuador", + "EE": "Estonia", + "EG": "Egypt", + "EH": "Western Sahara", + "ER": "Eritrea", + "ES": "Spain", + "ET": "Ethiopia", + "FI": "Finland", + "FJ": "Fiji", + "FK": "Falkland Islands (Malvinas)", + "FM": "Micronesia, Federated States of", + "FO": "Faroe Islands", + "FR": "France", + "GA": "Gabon", + "GB": "United Kingdom of Great Britain and Northern Ireland", + "GD": "Grenada", + "GE": "Georgia", + "GF": "French Guiana", + "GG": "Guernsey", + "GH": "Ghana", + "GI": "Gibraltar", + "GL": "Greenland", + "GM": "Gambia", + "GN": "Guinea", + "GP": "Guadeloupe", + "GQ": "Equatorial Guinea", + "GR": "Greece", + "GS": "South Georgia and the South Sandwich Islands", + "GT": "Guatemala", + "GU": "Guam", + "GW": "Guinea-Bissau", + "GY": "Guyana", + "HK": "Hong Kong", + "HM": "Heard Island and McDonald Islands", + "HN": "Honduras", + "HR": "Croatia", + "HT": "Haiti", + "HU": "Hungary", + "ID": "Indonesia", + "IE": "Ireland", + "IL": "Israel", + "IM": "Isle of Man", + "IN": "India", + "IO": "British Indian Ocean Territory", + "IQ": "Iraq", + "IR": "Iran, Islamic Republic of", + "IS": "Iceland", + "IT": "Italy", + "JE": "Jersey", + "JM": "Jamaica", + "JO": "Jordan", + "JP": "Japan", + "KE": "Kenya", + "KG": "Kyrgyzstan", + "KH": "Cambodia", + "KI": "Kiribati", + "KM": "Comoros", + "KN": "Saint Kitts and Nevis", + "KP": "Korea, Democratic People's Republic of", + "KR": "Korea, Republic of", + "KW": "Kuwait", + "KY": "Cayman Islands", + "KZ": "Kazakhstan", + "LA": "Lao People's Democratic Republic", + "LB": "Lebanon", + "LC": "Saint Lucia", + "LI": "Liechtenstein", + "LK": "Sri Lanka", + "LR": "Liberia", + "LS": "Lesotho", + "LT": "Lithuania", + "LU": "Luxembourg", + "LV": "Latvia", + "LY": "Libya", + "MA": "Morocco", + "MC": "Monaco", + "MD": "Moldova, Republic of", + "ME": "Montenegro", + "MF": "Saint Martin (French part)", + "MG": "Madagascar", + "MH": "Marshall Islands", + "MK": "North Macedonia", + "ML": "Mali", + "MM": "Myanmar", + "MN": "Mongolia", + "MO": "Macao", + "MP": "Northern Mariana Islands", + "MQ": "Martinique", + "MR": "Mauritania", + "MS": "Montserrat", + "MT": "Malta", + "MU": "Mauritius", + "MV": "Maldives", + "MW": "Malawi", + "MX": "Mexico", + "MY": "Malaysia", + "MZ": "Mozambique", + "NA": "Namibia", + "NC": "New Caledonia", + "NE": "Niger", + "NF": "Norfolk Island", + "NG": "Nigeria", + "NI": "Nicaragua", + "NL": "Netherlands, Kingdom of the", + "NO": "Norway", + "NP": "Nepal", + "NR": "Nauru", + "NU": "Niue", + "NZ": "New Zealand", + "OM": "Oman", + "PA": "Panama", + "PE": "Peru", + "PF": "French Polynesia", + "PG": "Papua New Guinea", + "PH": "Philippines", + "PK": "Pakistan", + "PL": "Poland", + "PM": "Saint Pierre and Miquelon", + "PN": "Pitcairn", + "PR": "Puerto Rico", + "PS": "Palestine, State of", + "PT": "Portugal", + "PW": "Palau", + "PY": "Paraguay", + "QA": "Qatar", + "RE": "Réunion", + "RO": "Romania", + "RS": "Serbia", + "RU": "Russian Federation", + "RW": "Rwanda", + "SA": "Saudi Arabia", + "SB": "Solomon Islands", + "SC": "Seychelles", + "SD": "Sudan", + "SE": "Sweden", + "SG": "Singapore", + "SH": "Saint Helena, Ascension and Tristan da Cunha", + "SI": "Slovenia", + "SJ": "Svalbard and Jan Mayen", + "SK": "Slovakia", + "SL": "Sierra Leone", + "SM": "San Marino", + "SN": "Senegal", + "SO": "Somalia", + "SR": "Suriname", + "SS": "South Sudan", + "ST": "Sao Tome and Principe", + "SV": "El Salvador", + "SX": "Sint Maarten (Dutch part)", + "SY": "Syrian Arab Republic", + "SZ": "Eswatini", + "TC": "Turks and Caicos Islands", + "TD": "Chad", + "TF": "French Southern Territories", + "TG": "Togo", + "TH": "Thailand", + "TJ": "Tajikistan", + "TK": "Tokelau", + "TL": "Timor-Leste", + "TM": "Turkmenistan", + "TN": "Tunisia", + "TO": "Tonga", + "TR": "Türkiye", + "TT": "Trinidad and Tobago", + "TV": "Tuvalu", + "TW": "Taiwan, Province of China", + "TZ": "Tanzania, United Republic of", + "UA": "Ukraine", + "UG": "Uganda", + "UM": "United States Minor Outlying Islands", + "US": "United States of America", + "UY": "Uruguay", + "UZ": "Uzbekistan", + "VA": "Holy See", + "VC": "Saint Vincent and the Grenadines", + "VE": "Venezuela, Bolivarian Republic of", + "VG": "Virgin Islands (British)", + "VI": "Virgin Islands (U.S.)", + "VN": "Viet Nam", + "VU": "Vanuatu", + "WF": "Wallis and Futuna", + "WS": "Samoa", + "YE": "Yemen", + "YT": "Mayotte", + "ZA": "South Africa", + "ZM": "Zambia", + "ZW": "Zimbabwe", +} + +def englishname_of_language(language): + '''Returns the English name of a language (lowercase ISO 639 code), + or None if unknown.''' + return language_table.get(language, None) + +def englishname_of_country(country): + '''Returns the English name of a country (uppercase ISO 3166 code), + or None if unknown.''' + return country_table.get(country, None) + +def language_in_english(catalogname): + '''Returns a name or description of a catalog name.''' + underscore = catalogname.find('_') + if underscore >= 0: + # Treat a few cases specially. + english = language_variant_table.get(catalogname, None) + if english != None: + return english + # Decompose "ll_CC" into "ll" and "CC". + language = catalogname[:underscore] + country = catalogname[underscore+1:] + english_language = englishname_of_language(language) + if english_language != None: + english_country = englishname_of_country(country) + if english_country != None: + return "%s (as spoken in %s)" % (english_language, english_country) + else: + return english_language + else: + return catalogname + else: + # It's a simple language name. + english_language = englishname_of_language(catalogname) + if english_language != None: + return english_language + else: + return catalogname + +def do_request(url, payload, stream): + '''Make the HTTP POST request to the given URL, sending its output + to the stream STREAM.''' + response = requests.post(url, data=payload, stream=True) + if response.status_code != 200: + print('Status:', response.status_code, file=sys.stderr) + if response.status_code >= 400: + print('Body:', response.text, file=sys.stderr) + sys.exit(1) + # Not needed any more: + #response.raise_for_status() + + for line in response.iter_lines(): + part = json.loads(line.decode('utf-8')) + print(part.get('response', ''), end='', flush=True, file=stream) + +def main(): + parser = argparse.ArgumentParser( + prog='spit', + usage='spit --help', + add_help=False) + + parser.add_argument('--species', + dest='species', + default='ollama') + parser.add_argument('--url', + dest='url', + default='http://localhost:11434') + parser.add_argument('--model', '-m', + dest='model', + default=None, + nargs=1) + parser.add_argument('--to', + dest='to_language', + default=None, + nargs=1) + parser.add_argument('--prompt', + dest='prompt', + default=None, + nargs=1) + parser.add_argument('--postprocess', + dest='postprocess', + default=None, + nargs=1) + parser.add_argument('--help', '--hel', '--he', '--h', '-h', + dest='help', + default=None, + action='store_true') + parser.add_argument('--version', '--versio', '--versi', '--vers', '--ver', '--ve', '--v', '-V', + dest='version', + default=None, + action='store_true') + # All other arguments are collected. + parser.add_argument('non_option_arguments', + nargs='*') + + # Parse the given arguments. Don't signal an error if non-option arguments + # occur between or after options. + cmdargs, unhandled = parser.parse_known_args() + + # Handle --version, ignoring all other options. + if cmdargs.version != None: + print(''' +spit (GNU gettext-tools) @VERSION@ +Copyright (C) 2025 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +Written by Bruno Haible. +''') + sys.exit(0) + + # Handle --help, ignoring all other options. + if cmdargs.help != None: + print(''' +Usage: spit [OPTION...] + +Passes standard input to a Large Language Model (LLM) instance and prints +the response. +With the --to option, it translates standard input to the specified language +through a Large Language Model (LLM) and prints the translation. + +Warning: The output might not be what you expect. +It might be of the wrong form, be of poor quality, or reflect some biases. + +Options: + --species=TYPE Specifies the type of LLM. The default and only + valid value is 'ollama'. + --url=URL Specifies the URL of the server that runs the LLM. + -m, --model=MODEL Specifies the model to use. + --to=LANGUAGE Specifies the target language. + --prompt=TEXT Specifies the prompt to use before standard input. + This option overrides the --to option. + --postprocess=COMMAND Specifies a command to post-process the output. + +Informative output: + + -h, --help Display this help and exit. + -V, --version Output version information and exit. + +Report bugs in the bug tracker at +or by email to . +''') + sys.exit(0) + + # Report unhandled arguments. + for arg in unhandled: + if arg.startswith('-'): + message = '%s: Unrecognized option \'%s\'.\n' % ('spit', arg) + message += 'Try \'spit --help\' for more information.\n' + sys.stderr.write(message) + sys.exit(1) + # By now, all unhandled arguments were non-options. + cmdargs.non_option_arguments += unhandled + + # Test for extraneous arguments. + if len(cmdargs.non_option_arguments) > 0: + message = '%s: too many arguments\n' % 'spit' + message += 'Try \'spit --help\' for more information.\n' + sys.stderr.write(message) + sys.exit(1) + + # Check --species option. + if cmdargs.species != 'ollama': + sys.stderr.write('%s: invalid value for --species option: %s' % ('spit', cmdargs.species)) + sys.exit(1) + + # Check --model option. + if cmdargs.model == None: + sys.stderr.write('%s: missing --model option\n' % 'spit') + sys.exit(1) + + model = cmdargs.model[0] + + to_language = None + if cmdargs.to_language != None: + to_language = cmdargs.to_language[0] + + prompt = None + if cmdargs.prompt != None: + prompt = cmdargs.prompt[0] + + postprocess = None + if cmdargs.postprocess != None: + postprocess = cmdargs.postprocess[0] + + # Sanitize URL. + url = cmdargs.url + if not url.endswith('/'): + url += '/' + + # Read the contents of standard input. + input = sys.stdin.read() + + # Compute a default prompt. + if prompt == None and to_language != None: + prompt = 'Translate into ' + language_in_english(to_language) + ':' + + # Prepend the prompt. + if prompt != None: + input = prompt + '\n' + input + + # For debugging. + #print(input) + + # Documentation of the ollama API: + # + + url = url + 'api/generate' + + payload = { 'model': model, 'prompt': input } + # We need the payload in JSON syntax (with double-quotes around the strings), + # not in Python syntax (with single-quotes around the strings): + payload = json.dumps(payload) + + # Make the request to the ollama server. + if postprocess != None: + pipe = subprocess.Popen(["sh", "-c", postprocess], + stdin=subprocess.PIPE, text=True) + try: + do_request(url, payload, pipe.stdin) + finally: + pipe.stdin.close() + pipe.wait() + else: + do_request(url, payload, sys.stdout) + +if __name__ == '__main__': + main()