git.cworth.org Git - tar/blob - lib/human.c

   1 /* human.c -- print human readable file size
   2
   3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4    2005, 2006, 2007 Free Software Foundation, Inc.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 /* Written by Paul Eggert and Larry McVoy.  */
  20
  21 #include <config.h>
  22
  23 #include "human.h"
  24
  25 #include <locale.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29
  30 #include <argmatch.h>
  31 #include <error.h>
  32 #include <intprops.h>
  33
  34 /* The maximum length of a suffix like "KiB".  */
  35 #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
  36
  37 static const char power_letter[] =
  38 {
  39   0,    /* not used */
  40   'K',  /* kibi ('k' for kilo is a special case) */
  41   'M',  /* mega or mebi */
  42   'G',  /* giga or gibi */
  43   'T',  /* tera or tebi */
  44   'P',  /* peta or pebi */
  45   'E',  /* exa or exbi */
  46   'Z',  /* zetta or 2**70 */
  47   'Y'   /* yotta or 2**80 */
  48 };
  49
  50
  51 /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
  52    possible, adjust VALUE according to the style.  */
  53
  54 static long double
  55 adjust_value (int inexact_style, long double value)
  56 {
  57   /* Do not use the floorl or ceill functions, as that would mean
  58      checking for their presence and possibly linking with the
  59      standard math library, which is a porting pain.  So leave the
  60      value alone if it is too large to easily round.  */
  61   if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
  62     {
  63       uintmax_t u = value;
  64       value = u + (inexact_style == human_ceiling && u != value);
  65     }
  66
  67   return value;
  68 }
  69
  70 /* Group the digits of NUMBER according to the grouping rules of the
  71    current locale.  NUMBER contains NUMBERLEN digits.  Modify the
  72    bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
  73    each byte inserted.  Return the starting address of the modified
  74    number.
  75
  76    To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
  77    lconv' from <locale.h>.  */
  78
  79 static char *
  80 group_number (char *number, size_t numberlen,
  81               char const *grouping, char const *thousands_sep)
  82 {
  83   register char *d;
  84   size_t grouplen = SIZE_MAX;
  85   size_t thousands_seplen = strlen (thousands_sep);
  86   size_t i = numberlen;
  87
  88   /* The maximum possible value for NUMBERLEN is the number of digits
  89      in the square of the largest uintmax_t, so double the size needed.  */
  90   char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
  91
  92   memcpy (buf, number, numberlen);
  93   d = number + numberlen;
  94
  95   for (;;)
  96     {
  97       unsigned char g = *grouping;
  98
  99       if (g)
 100         {
 101           grouplen = g < CHAR_MAX ? g : i;
 102           grouping++;
 103         }
 104
 105       if (i < grouplen)
 106         grouplen = i;
 107
 108       d -= grouplen;
 109       i -= grouplen;
 110       memcpy (d, buf + i, grouplen);
 111       if (i == 0)
 112         return d;
 113
 114       d -= thousands_seplen;
 115       memcpy (d, thousands_sep, thousands_seplen);
 116     }
 117 }
 118
 119 /* Convert N to a human readable format in BUF, using the options OPTS.
 120
 121    N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
 122    be nonnegative.
 123
 124    Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
 125    must be positive.
 126
 127    Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
 128    to determine whether to take the ceiling or floor of any result
 129    that cannot be expressed exactly.
 130
 131    If (OPTS & human_group_digits), group the thousands digits
 132    according to the locale, e.g., `1,000,000' in an American English
 133    locale.
 134
 135    If (OPTS & human_autoscale), deduce the output block size
 136    automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
 137    output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
 138    of 1000 otherwise.  For example, assuming powers of 1024, 8500
 139    would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
 140    so on.  Numbers smaller than the power aren't modified.
 141    human_autoscale is normally used together with human_SI.
 142
 143    If (OPTS & human_space_before_unit), use a space to separate the
 144    number from any suffix that is appended as described below.
 145
 146    If (OPTS & human_SI), append an SI prefix indicating which power is
 147    being used.  If in addition (OPTS & human_B), append "B" (if base
 148    1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
 149    human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
 150    power of 1024 or of 1000, depending on (OPTS &
 151    human_base_1024).  */
 152
 153 char *
 154 human_readable (uintmax_t n, char *buf, int opts,
 155                 uintmax_t from_block_size, uintmax_t to_block_size)
 156 {
 157   int inexact_style =
 158     opts & (human_round_to_nearest | human_floor | human_ceiling);
 159   unsigned int base = opts & human_base_1024 ? 1024 : 1000;
 160   uintmax_t amt;
 161   int tenths;
 162   int exponent = -1;
 163   int exponent_max = sizeof power_letter - 1;
 164   char *p;
 165   char *psuffix;
 166   char const *integerlim;
 167
 168   /* 0 means adjusted N == AMT.TENTHS;
 169      1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
 170      2 means adjusted N == AMT.TENTHS + 0.05;
 171      3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
 172   int rounding;
 173
 174   char const *decimal_point = ".";
 175   size_t decimal_pointlen = 1;
 176   char const *grouping = "";
 177   char const *thousands_sep = "";
 178   struct lconv const *l = localeconv ();
 179   size_t pointlen = strlen (l->decimal_point);
 180   if (0 < pointlen && pointlen <= MB_LEN_MAX)
 181     {
 182       decimal_point = l->decimal_point;
 183       decimal_pointlen = pointlen;
 184     }
 185   grouping = l->grouping;
 186   if (strlen (l->thousands_sep) <= MB_LEN_MAX)
 187     thousands_sep = l->thousands_sep;
 188
 189   psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
 190   p = psuffix;
 191
 192   /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
 193      units.  If this can be done exactly with integer arithmetic, do
 194      not use floating point operations.  */
 195   if (to_block_size <= from_block_size)
 196     {
 197       if (from_block_size % to_block_size == 0)
 198         {
 199           uintmax_t multiplier = from_block_size / to_block_size;
 200           amt = n * multiplier;
 201           if (amt / multiplier == n)
 202             {
 203               tenths = 0;
 204               rounding = 0;
 205               goto use_integer_arithmetic;
 206             }
 207         }
 208     }
 209   else if (from_block_size != 0 && to_block_size % from_block_size == 0)
 210     {
 211       uintmax_t divisor = to_block_size / from_block_size;
 212       uintmax_t r10 = (n % divisor) * 10;
 213       uintmax_t r2 = (r10 % divisor) * 2;
 214       amt = n / divisor;
 215       tenths = r10 / divisor;
 216       rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
 217       goto use_integer_arithmetic;
 218     }
 219
 220   {
 221     /* Either the result cannot be computed easily using uintmax_t,
 222        or from_block_size is zero.  Fall back on floating point.
 223        FIXME: This can yield answers that are slightly off.  */
 224
 225     long double dto_block_size = to_block_size;
 226     long double damt = n * (from_block_size / dto_block_size);
 227     size_t buflen;
 228     size_t nonintegerlen;
 229
 230     if (! (opts & human_autoscale))
 231       {
 232         sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
 233         buflen = strlen (buf);
 234         nonintegerlen = 0;
 235       }
 236     else
 237       {
 238         long double e = 1;
 239         exponent = 0;
 240
 241         do
 242           {
 243             e *= base;
 244             exponent++;
 245           }
 246         while (e * base <= damt && exponent < exponent_max);
 247
 248         damt /= e;
 249
 250         sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
 251         buflen = strlen (buf);
 252         nonintegerlen = decimal_pointlen + 1;
 253
 254         if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
 255             || ((opts & human_suppress_point_zero)
 256                 && buf[buflen - 1] == '0'))
 257           {
 258             sprintf (buf, "%.0Lf",
 259                      adjust_value (inexact_style, damt * 10) / 10);
 260             buflen = strlen (buf);
 261             nonintegerlen = 0;
 262           }
 263       }
 264
 265     p = psuffix - buflen;
 266     memmove (p, buf, buflen);
 267     integerlim = p + buflen - nonintegerlen;
 268   }
 269   goto do_grouping;
 270
 271  use_integer_arithmetic:
 272   {
 273     /* The computation can be done exactly, with integer arithmetic.
 274
 275        Use power of BASE notation if requested and if adjusted AMT is
 276        large enough.  */
 277
 278     if (opts & human_autoscale)
 279       {
 280         exponent = 0;
 281
 282         if (base <= amt)
 283           {
 284             do
 285               {
 286                 unsigned int r10 = (amt % base) * 10 + tenths;
 287                 unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
 288                 amt /= base;
 289                 tenths = r10 / base;
 290                 rounding = (r2 < base
 291                             ? (r2 + rounding) != 0
 292                             : 2 + (base < r2 + rounding));
 293                 exponent++;
 294               }
 295             while (base <= amt && exponent < exponent_max);
 296
 297             if (amt < 10)
 298               {
 299                 if (inexact_style == human_round_to_nearest
 300                     ? 2 < rounding + (tenths & 1)
 301                     : inexact_style == human_ceiling && 0 < rounding)
 302                   {
 303                     tenths++;
 304                     rounding = 0;
 305
 306                     if (tenths == 10)
 307                       {
 308                         amt++;
 309                         tenths = 0;
 310                       }
 311                   }
 312
 313                 if (amt < 10
 314                     && (tenths || ! (opts & human_suppress_point_zero)))
 315                   {
 316                     *--p = '0' + tenths;
 317                     p -= decimal_pointlen;
 318                     memcpy (p, decimal_point, decimal_pointlen);
 319                     tenths = rounding = 0;
 320                   }
 321               }
 322           }
 323       }
 324
 325     if (inexact_style == human_round_to_nearest
 326         ? 5 < tenths + (0 < rounding + (amt & 1))
 327         : inexact_style == human_ceiling && 0 < tenths + rounding)
 328       {
 329         amt++;
 330
 331         if ((opts & human_autoscale)
 332             && amt == base && exponent < exponent_max)
 333           {
 334             exponent++;
 335             if (! (opts & human_suppress_point_zero))
 336               {
 337                 *--p = '0';
 338                 p -= decimal_pointlen;
 339                 memcpy (p, decimal_point, decimal_pointlen);
 340               }
 341             amt = 1;
 342           }
 343       }
 344
 345     integerlim = p;
 346
 347     do
 348       {
 349         int digit = amt % 10;
 350         *--p = digit + '0';
 351       }
 352     while ((amt /= 10) != 0);
 353   }
 354
 355  do_grouping:
 356   if (opts & human_group_digits)
 357     p = group_number (p, integerlim - p, grouping, thousands_sep);
 358
 359   if (opts & human_SI)
 360     {
 361       if (exponent < 0)
 362         {
 363           uintmax_t power;
 364           exponent = 0;
 365           for (power = 1; power < to_block_size; power *= base)
 366             if (++exponent == exponent_max)
 367               break;
 368         }
 369
 370       if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
 371         *psuffix++ = ' ';
 372
 373       if (exponent)
 374         *psuffix++ = (! (opts & human_base_1024) && exponent == 1
 375                       ? 'k'
 376                       : power_letter[exponent]);
 377
 378       if (opts & human_B)
 379         {
 380           if ((opts & human_base_1024) && exponent)
 381             *psuffix++ = 'i';
 382           *psuffix++ = 'B';
 383         }
 384     }
 385
 386   *psuffix = '\0';
 387
 388   return p;
 389 }
 390
 391
 392 /* The default block size used for output.  This number may change in
 393    the future as disks get larger.  */
 394 #ifndef DEFAULT_BLOCK_SIZE
 395 # define DEFAULT_BLOCK_SIZE 1024
 396 #endif
 397
 398 static char const *const block_size_args[] = { "human-readable", "si", 0 };
 399 static int const block_size_opts[] =
 400   {
 401     human_autoscale + human_SI + human_base_1024,
 402     human_autoscale + human_SI
 403   };
 404
 405 static uintmax_t
 406 default_block_size (void)
 407 {
 408   return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
 409 }
 410
 411 static strtol_error
 412 humblock (char const *spec, uintmax_t *block_size, int *options)
 413 {
 414   int i;
 415   int opts = 0;
 416
 417   if (! spec
 418       && ! (spec = getenv ("BLOCK_SIZE"))
 419       && ! (spec = getenv ("BLOCKSIZE")))
 420     *block_size = default_block_size ();
 421   else
 422     {
 423       if (*spec == '\'')
 424         {
 425           opts |= human_group_digits;
 426           spec++;
 427         }
 428
 429       if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
 430         {
 431           opts |= block_size_opts[i];
 432           *block_size = 1;
 433         }
 434       else
 435         {
 436           char *ptr;
 437           strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
 438                                        "eEgGkKmMpPtTyYzZ0");
 439           if (e != LONGINT_OK)
 440             {
 441               *options = 0;
 442               return e;
 443             }
 444           for (; ! ('0' <= *spec && *spec <= '9'); spec++)
 445             if (spec == ptr)
 446               {
 447                 opts |= human_SI;
 448                 if (ptr[-1] == 'B')
 449                   opts |= human_B;
 450                 if (ptr[-1] != 'B' || ptr[-2] == 'i')
 451                   opts |= human_base_1024;
 452                 break;
 453               }
 454         }
 455     }
 456
 457   *options = opts;
 458   return LONGINT_OK;
 459 }
 460
 461 enum strtol_error
 462 human_options (char const *spec, int *opts, uintmax_t *block_size)
 463 {
 464   strtol_error e = humblock (spec, block_size, opts);
 465   if (*block_size == 0)
 466     {
 467       *block_size = default_block_size ();
 468       e = LONGINT_INVALID;
 469     }
 470   return e;
 471 }