/* idn.c --- Command line interface to libidn.
* Copyright (C) 2003-2015 Simon Josefsson
*
* This file is part of GNU Libidn.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include
#include
#include
#include
#include
#include
/* Gnulib headers. */
#include "error.h"
#include "gettext.h"
#define _(String) dgettext (PACKAGE, String)
#define N_(String) gettext_noop (String)
#include "progname.h"
#include "version-etc.h"
/* Libidn headers. */
#include
#include
#include
#ifdef WITH_TLD
# include
#endif
#include "idn_cmd.h"
#define GREETING \
"Copyright 2002-2015 Simon Josefsson.\n" \
"GNU Libidn is free software with ABSOLUTELY NO WARRANTY. For more\n" \
"information about these matters, see .\n"
const char version_etc_copyright[] =
/* Do *not* mark this string for translation. %s is a copyright
symbol suitable for this locale, and %d is the copyright
year. */
"Copyright %s %d Simon Josefsson.";
static void
usage (int status)
{
if (status != EXIT_SUCCESS)
fprintf (stderr, _("Try `%s --help' for more information.\n"),
program_name);
else
{
printf (_("\
Usage: %s [OPTION]... [STRINGS]...\n\
"), program_name);
fputs (_("\
Internationalized Domain Name (IDN) convert STRINGS, or standard input.\n\
\n\
"), stdout);
fputs (_("\
Command line interface to the internationalized domain name library.\n\
\n\
All strings are expected to be encoded in the preferred charset used\n\
by your locale. Use `--debug' to find out what this charset is. You\n\
can override the charset used by setting environment variable CHARSET.\n\
\n\
To process a string that starts with `-', for example `-foo', use `--'\n\
to signal the end of parameters, as in `idn --quiet -a -- -foo'.\n\
\n\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
fputs (_("\
-h, --help Print help and exit\n\
-V, --version Print version and exit\n\
"), stdout);
fputs (_("\
-s, --stringprep Prepare string according to nameprep profile\n\
-d, --punycode-decode Decode Punycode\n\
-e, --punycode-encode Encode Punycode\n\
-a, --idna-to-ascii Convert to ACE according to IDNA (default mode)\n\
-u, --idna-to-unicode Convert from ACE according to IDNA\n\
"), stdout);
fputs (_("\
--allow-unassigned Toggle IDNA AllowUnassigned flag (default off)\n\
--usestd3asciirules Toggle IDNA UseSTD3ASCIIRules flag (default off)\n\
"), stdout);
fputs (_("\
--no-tld Don't check string for TLD specific rules\n\
Only for --idna-to-ascii and --idna-to-unicode\n\
"), stdout);
fputs (_("\
-n, --nfkc Normalize string according to Unicode v3.2 NFKC\n\
"), stdout);
fputs (_("\
-p, --profile=STRING Use specified stringprep profile instead\n\
Valid stringprep profiles: `Nameprep',\n\
`iSCSI', `Nodeprep', `Resourceprep', \n\
`trace', `SASLprep'\n\
"), stdout);
fputs (_("\
--debug Print debugging information\n\
--quiet Silent operation\n\
"), stdout);
emit_bug_reporting_address ();
}
exit (status);
}
int
main (int argc, char *argv[])
{
struct gengetopt_args_info args_info;
char readbuf[BUFSIZ];
char *p, *r;
uint32_t *q;
unsigned cmdn = 0;
int rc;
setlocale (LC_ALL, "");
set_program_name (argv[0]);
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
if (cmdline_parser (argc, argv, &args_info) != 0)
return EXIT_FAILURE;
if (args_info.version_given)
{
version_etc (stdout, "idn", PACKAGE_NAME, VERSION,
"Simon Josefsson", (char *) NULL);
return EXIT_SUCCESS;
}
if (args_info.help_given)
usage (EXIT_SUCCESS);
/* Backwards compatibility: -n has always been the documented short
form for --nfkc but, before v1.10, -k was the implemented short
form. We now accept both to avoid documentation changes. */
if (args_info.hidden_nfkc_given)
args_info.nfkc_given = 1;
if (!args_info.stringprep_given &&
!args_info.punycode_encode_given && !args_info.punycode_decode_given &&
!args_info.idna_to_ascii_given && !args_info.idna_to_unicode_given &&
!args_info.nfkc_given)
args_info.idna_to_ascii_given = 1;
if ((args_info.stringprep_given ? 1 : 0) +
(args_info.punycode_encode_given ? 1 : 0) +
(args_info.punycode_decode_given ? 1 : 0) +
(args_info.idna_to_ascii_given ? 1 : 0) +
(args_info.idna_to_unicode_given ? 1 : 0) +
(args_info.nfkc_given ? 1 : 0) != 1)
{
error (0, 0, _("only one of -s, -e, -d, -a, -u or -n can be specified"));
usage (EXIT_FAILURE);
}
if (!args_info.quiet_given
&& args_info.inputs_num == 0
&& isatty (fileno (stdin)))
fprintf (stderr, "%s %s\n" GREETING, PACKAGE, VERSION);
if (args_info.debug_given)
fprintf (stderr, _("Charset `%s'.\n"), stringprep_locale_charset ());
if (!args_info.quiet_given
&& args_info.inputs_num == 0
&& isatty (fileno (stdin)))
fprintf (stderr, _("Type each input string on a line by itself, "
"terminated by a newline character.\n"));
do
{
if (cmdn < args_info.inputs_num)
{
strncpy (readbuf, args_info.inputs[cmdn++], BUFSIZ - 1);
readbuf[BUFSIZ - 1] = '\0';
}
else if (fgets (readbuf, BUFSIZ, stdin) == NULL)
{
if (feof (stdin))
break;
error (EXIT_FAILURE, errno, _("input error"));
}
if (readbuf[strlen (readbuf) - 1] == '\n')
readbuf[strlen (readbuf) - 1] = '\0';
if (args_info.stringprep_given)
{
p = stringprep_locale_to_utf8 (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from %s to UTF-8"),
stringprep_locale_charset ());
q = stringprep_utf8_to_ucs4 (p, -1, NULL);
if (!q)
{
free (p);
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
}
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "input[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
free (q);
rc = stringprep_profile (p, &r,
args_info.profile_given ?
args_info.profile_arg : "Nameprep", 0);
free (p);
if (rc != STRINGPREP_OK)
error (EXIT_FAILURE, 0, _("stringprep_profile: %s"),
stringprep_strerror (rc));
q = stringprep_utf8_to_ucs4 (r, -1, NULL);
if (!q)
{
free (r);
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
}
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "output[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
free (q);
p = stringprep_utf8_to_locale (r);
free (r);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from UTF-8 to %s"),
stringprep_locale_charset ());
fprintf (stdout, "%s\n", p);
free (p);
}
if (args_info.punycode_encode_given)
{
size_t len, len2;
p = stringprep_locale_to_utf8 (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from %s to UTF-8"),
stringprep_locale_charset ());
q = stringprep_utf8_to_ucs4 (p, -1, &len);
free (p);
if (!q)
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
if (args_info.debug_given)
{
size_t i;
for (i = 0; i < len; i++)
fprintf (stderr, "input[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
len2 = BUFSIZ - 1;
rc = punycode_encode (len, q, NULL, &len2, readbuf);
free (q);
if (rc != PUNYCODE_SUCCESS)
error (EXIT_FAILURE, 0, _("punycode_encode: %s"),
punycode_strerror (rc));
readbuf[len2] = '\0';
p = stringprep_utf8_to_locale (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from UTF-8 to %s"),
stringprep_locale_charset ());
fprintf (stdout, "%s\n", p);
free (p);
}
if (args_info.punycode_decode_given)
{
size_t len;
len = BUFSIZ;
q = (uint32_t *) malloc (len * sizeof (q[0]));
if (!q)
error (EXIT_FAILURE, ENOMEM, N_("malloc"));
rc = punycode_decode (strlen (readbuf), readbuf, &len, q, NULL);
if (rc != PUNYCODE_SUCCESS)
{
free (q);
error (EXIT_FAILURE, 0, _("punycode_decode: %s"),
punycode_strerror (rc));
}
if (args_info.debug_given)
{
size_t i;
for (i = 0; i < len; i++)
fprintf (stderr, "output[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
q[len] = 0;
r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
free (q);
if (!r)
error (EXIT_FAILURE, 0,
_("could not convert from UCS-4 to UTF-8"));
p = stringprep_utf8_to_locale (r);
free (r);
if (!r)
error (EXIT_FAILURE, 0, _("could not convert from UTF-8 to %s"),
stringprep_locale_charset ());
fprintf (stdout, "%s\n", p);
free (p);
}
if (args_info.idna_to_ascii_given)
{
p = stringprep_locale_to_utf8 (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from %s to UTF-8"),
stringprep_locale_charset ());
q = stringprep_utf8_to_ucs4 (p, -1, NULL);
free (p);
if (!q)
error (EXIT_FAILURE, 0,
_("could not convert from UCS-4 to UTF-8"));
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "input[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
rc = idna_to_ascii_4z (q, &p,
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
IDNA_USE_STD3_ASCII_RULES : 0));
free (q);
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_ascii_4z: %s"),
idna_strerror (rc));
#ifdef WITH_TLD
if (args_info.tld_flag && !args_info.no_tld_flag)
{
size_t errpos;
rc = idna_to_unicode_8z4z (p, &q,
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
IDNA_USE_STD3_ASCII_RULES : 0));
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z (TLD): %s"),
idna_strerror (rc));
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "tld[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
rc = tld_check_4z (q, &errpos, NULL);
free (q);
if (rc == TLD_INVALID)
error (EXIT_FAILURE, 0, _("tld_check_4z (position %lu): %s"),
(unsigned long) errpos, tld_strerror (rc));
if (rc != TLD_SUCCESS)
error (EXIT_FAILURE, 0, _("tld_check_4z: %s"),
tld_strerror (rc));
}
#endif
if (args_info.debug_given)
{
size_t i;
for (i = 0; p[i]; i++)
fprintf (stderr, "output[%lu] = U+%04x\n",
(unsigned long) i, p[i]);
}
fprintf (stdout, "%s\n", p);
free (p);
}
if (args_info.idna_to_unicode_given)
{
p = stringprep_locale_to_utf8 (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from %s to UTF-8"),
stringprep_locale_charset ());
q = stringprep_utf8_to_ucs4 (p, -1, NULL);
if (!q)
{
free (p);
error (EXIT_FAILURE, 0,
_("could not convert from UCS-4 to UTF-8"));
}
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "input[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
free (q);
rc = idna_to_unicode_8z4z (p, &q,
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
IDNA_USE_STD3_ASCII_RULES : 0));
free (p);
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z: %s"),
idna_strerror (rc));
if (args_info.debug_given)
{
size_t i;
for (i = 0; q[i]; i++)
fprintf (stderr, "output[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
}
#ifdef WITH_TLD
if (args_info.tld_flag)
{
size_t errpos;
rc = tld_check_4z (q, &errpos, NULL);
if (rc == TLD_INVALID)
{
free (q);
error (EXIT_FAILURE, 0,
_("tld_check_4z (position %lu): %s"),
(unsigned long) errpos, tld_strerror (rc));
}
if (rc != TLD_SUCCESS)
{
free (q);
error (EXIT_FAILURE, 0, _("tld_check_4z: %s"),
tld_strerror (rc));
}
}
#endif
r = stringprep_ucs4_to_utf8 (q, -1, NULL, NULL);
free (q);
if (!r)
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
p = stringprep_utf8_to_locale (r);
free (r);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from UTF-8 to %s"),
stringprep_locale_charset ());
fprintf (stdout, "%s\n", p);
free (p);
}
if (args_info.nfkc_given)
{
p = stringprep_locale_to_utf8 (readbuf);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from %s to UTF-8"),
stringprep_locale_charset ());
if (args_info.debug_given)
{
size_t i;
q = stringprep_utf8_to_ucs4 (p, -1, NULL);
if (!q)
{
free (p);
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
}
for (i = 0; q[i]; i++)
fprintf (stderr, "input[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
free (q);
}
r = stringprep_utf8_nfkc_normalize (p, -1);
free (p);
if (!r)
error (EXIT_FAILURE, 0, _("could not do NFKC normalization"));
if (args_info.debug_given)
{
size_t i;
q = stringprep_utf8_to_ucs4 (r, -1, NULL);
if (!q)
{
free (r);
error (EXIT_FAILURE, 0,
_("could not convert from UTF-8 to UCS-4"));
}
for (i = 0; q[i]; i++)
fprintf (stderr, "output[%lu] = U+%04x\n",
(unsigned long) i, q[i]);
free (q);
}
p = stringprep_utf8_to_locale (r);
free (r);
if (!p)
error (EXIT_FAILURE, 0, _("could not convert from UTF-8 to %s"),
stringprep_locale_charset ());
fprintf (stdout, "%s\n", p);
free (p);
}
fflush (stdout);
}
while (!feof (stdin) && !ferror (stdin) && (args_info.inputs_num == 0 ||
cmdn < args_info.inputs_num));
return EXIT_SUCCESS;
}