Update snappy to version 1.0.5.

author José Fonseca <jose.r.fonseca@gmail.com>

Wed, 14 Mar 2012 23:51:45 +0000 (23:51 +0000)

committer José Fonseca <jose.r.fonseca@gmail.com>

Thu, 15 Mar 2012 00:10:15 +0000 (00:10 +0000)
author José Fonseca <jose.r.fonseca@gmail.com>
Wed, 14 Mar 2012 23:51:45 +0000 (23:51 +0000)
committer José Fonseca <jose.r.fonseca@gmail.com>
Thu, 15 Mar 2012 00:10:15 +0000 (00:10 +0000)
diff --git a/thirdparty/snappy/ChangeLog b/thirdparty/snappy/ChangeLog

index 5e7ccccb760863639e804550ebd172d9f6355b04..f79491b5c76b01d50227c57289e16fd49d069cfd 100644 (file)
--- a/thirdparty/snappy/ChangeLog
+++ b/thirdparty/snappy/ChangeLog
@@ -1,3 +1,419 @@
+------------------------------------------------------------------------
+r60 | snappy.mirrorbot@gmail.com | 2012-02-23 18:00:36 +0100 (Thu, 23 Feb 2012) | 57 lines
+
+For 32-bit platforms, do not try to accelerate multiple neighboring
+32-bit loads with a 64-bit load during compression (it's not a win).
+
+The main target for this optimization is ARM, but 32-bit x86 gets
+a small gain, too, although there is noise in the microbenchmarks.
+It's a no-op for 64-bit x86. It does not affect decompression.
+
+Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from
+Ubuntu/Linaro), -O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9
+-mthumb-interwork, minimum 1000 iterations:
+
+  Benchmark            Time(ns)    CPU(ns) Iterations
+  ---------------------------------------------------
+  BM_ZFlat/0            1158277    1160000       1000 84.2MB/s  html (23.57 %)    [ +4.3%]
+  BM_ZFlat/1           14861782   14860000       1000 45.1MB/s  urls (50.89 %)    [ +1.1%]
+  BM_ZFlat/2             393595     390000       1000 310.5MB/s  jpg (99.88 %)    [ +0.0%]
+  BM_ZFlat/3             650583     650000       1000 138.4MB/s  pdf (82.13 %)    [ +3.1%]
+  BM_ZFlat/4            4661480    4660000       1000 83.8MB/s  html4 (23.55 %)   [ +4.3%]
+  BM_ZFlat/5             491973     490000       1000 47.9MB/s  cp (48.12 %)      [ +2.0%]
+  BM_ZFlat/6             193575     192678       1038 55.2MB/s  c (42.40 %)       [ +9.0%]
+  BM_ZFlat/7              62343      62754       3187 56.5MB/s  lsp (48.37 %)     [ +2.6%]
+  BM_ZFlat/8           17708468   17710000       1000 55.5MB/s  xls (41.34 %)     [ -0.3%]
+  BM_ZFlat/9            3755345    3760000       1000 38.6MB/s  txt1 (59.81 %)    [ +8.2%]
+  BM_ZFlat/10           3324217    3320000       1000 36.0MB/s  txt2 (64.07 %)    [ +4.2%]
+  BM_ZFlat/11          10139932   10140000       1000 40.1MB/s  txt3 (57.11 %)    [ +6.4%]
+  BM_ZFlat/12          13532109   13530000       1000 34.0MB/s  txt4 (68.35 %)    [ +5.0%]
+  BM_ZFlat/13           4690847    4690000       1000 104.4MB/s  bin (18.21 %)    [ +4.1%]
+  BM_ZFlat/14            830682     830000       1000 43.9MB/s  sum (51.88 %)     [ +1.2%]
+  BM_ZFlat/15             84784      85011       2235 47.4MB/s  man (59.36 %)     [ +1.1%]
+  BM_ZFlat/16           1293254    1290000       1000 87.7MB/s  pb (23.15 %)      [ +2.3%]
+  BM_ZFlat/17           2775155    2780000       1000 63.2MB/s  gaviota (38.27 %) [+12.2%]
+
+Core i7 in 32-bit mode (only one run and 100 iterations, though, so noisy):
+
+  Benchmark            Time(ns)    CPU(ns) Iterations
+  ---------------------------------------------------
+  BM_ZFlat/0             227582     223464       3043 437.0MB/s  html (23.57 %)    [ +7.4%]
+  BM_ZFlat/1            2982430    2918455        233 229.4MB/s  urls (50.89 %)    [ +2.9%]
+  BM_ZFlat/2              46967      46658      15217 2.5GB/s  jpg (99.88 %)       [ +0.0%]
+  BM_ZFlat/3             115298     114864       5833 783.2MB/s  pdf (82.13 %)     [ +1.5%]
+  BM_ZFlat/4             913440     899743        778 434.2MB/s  html4 (23.55 %)   [ +0.3%]
+  BM_ZFlat/5             110302     108571       7000 216.1MB/s  cp (48.12 %)      [ +0.0%]
+  BM_ZFlat/6              44409      43372      15909 245.2MB/s  c (42.40 %)       [ +0.8%]
+  BM_ZFlat/7              15713      15643      46667 226.9MB/s  lsp (48.37 %)     [ +2.7%]
+  BM_ZFlat/8            2625539    2602230        269 377.4MB/s  xls (41.34 %)     [ +1.4%]
+  BM_ZFlat/9             808884     811429        875 178.8MB/s  txt1 (59.81 %)    [ -3.9%]
+  BM_ZFlat/10            709532     700000       1000 170.5MB/s  txt2 (64.07 %)    [ +0.0%]
+  BM_ZFlat/11           2177682    2162162        333 188.2MB/s  txt3 (57.11 %)    [ -1.4%]
+  BM_ZFlat/12           2849640    2840000        250 161.8MB/s  txt4 (68.35 %)    [ -1.4%]
+  BM_ZFlat/13            849760     835476        778 585.8MB/s  bin (18.21 %)     [ +1.2%]
+  BM_ZFlat/14            165940     164571       4375 221.6MB/s  sum (51.88 %)     [ +1.4%]
+  BM_ZFlat/15             20939      20571      35000 196.0MB/s  man (59.36 %)     [ +2.1%]
+  BM_ZFlat/16            239209     236544       2917 478.1MB/s  pb (23.15 %)      [ +4.2%]
+  BM_ZFlat/17            616206     610000       1000 288.2MB/s  gaviota (38.27 %) [ -1.6%]
+
+R=sanjay
+
+------------------------------------------------------------------------
+r59 | snappy.mirrorbot@gmail.com | 2012-02-21 18:02:17 +0100 (Tue, 21 Feb 2012) | 107 lines
+
+Enable the use of unaligned loads and stores for ARM-based architectures 
+where they are available (ARMv7 and higher). This gives a significant 
+speed boost on ARM, both for compression and decompression. 
+It should not affect x86 at all. 
+ 
+There are more changes possible to speed up ARM, but it might not be 
+that easy to do without hurting x86 or making the code uglier. 
+Also, we de not try to use NEON yet. 
+ 
+Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from Ubuntu/Linaro), 
+-O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9 -mthumb-interwork: 
+ 
+Benchmark            Time(ns)    CPU(ns) Iterations
+---------------------------------------------------
+BM_UFlat/0             524806     529100        378 184.6MB/s  html            [+33.6%]
+BM_UFlat/1            5139790    5200000        100 128.8MB/s  urls            [+28.8%]
+BM_UFlat/2              86540      84166       1901 1.4GB/s  jpg               [ +0.6%]
+BM_UFlat/3             215351     210176        904 428.0MB/s  pdf             [+29.8%]
+BM_UFlat/4            2144490    2100000        100 186.0MB/s  html4           [+33.3%]
+BM_UFlat/5             194482     190000       1000 123.5MB/s  cp              [+36.2%]
+BM_UFlat/6              91843      90175       2107 117.9MB/s  c               [+38.6%]
+BM_UFlat/7              28535      28426       6684 124.8MB/s  lsp             [+34.7%]
+BM_UFlat/8            9206600    9200000        100 106.7MB/s  xls             [+42.4%]
+BM_UFlat/9            1865273    1886792        106 76.9MB/s  txt1             [+32.5%]
+BM_UFlat/10           1576809    1587301        126 75.2MB/s  txt2             [+32.3%]
+BM_UFlat/11           4968450    4900000        100 83.1MB/s  txt3             [+32.7%]
+BM_UFlat/12           6673970    6700000        100 68.6MB/s  txt4             [+32.8%]
+BM_UFlat/13           2391470    2400000        100 203.9MB/s  bin             [+29.2%]
+BM_UFlat/14            334601     344827        522 105.8MB/s  sum             [+30.6%]
+BM_UFlat/15             37404      38080       5252 105.9MB/s  man             [+33.8%]
+BM_UFlat/16            535470     540540        370 209.2MB/s  pb              [+31.2%]
+BM_UFlat/17           1875245    1886792        106 93.2MB/s  gaviota          [+37.8%]
+BM_UValidate/0         178425     179533       1114 543.9MB/s  html            [ +2.7%]
+BM_UValidate/1        2100450    2000000        100 334.8MB/s  urls            [ +5.0%]
+BM_UValidate/2           1039       1044     172413 113.3GB/s  jpg             [ +3.4%]
+BM_UValidate/3          59423      59470       3363 1.5GB/s  pdf               [ +7.8%]
+BM_UValidate/4         760716     766283        261 509.8MB/s  html4           [ +6.5%]
+BM_ZFlat/0            1204632    1204819        166 81.1MB/s  html (23.57 %)   [+32.8%]
+BM_ZFlat/1           15656190   15600000        100 42.9MB/s  urls (50.89 %)   [+27.6%]
+BM_ZFlat/2             403336     410677        487 294.8MB/s  jpg (99.88 %)   [+16.5%]
+BM_ZFlat/3             664073     671140        298 134.0MB/s  pdf (82.13 %)   [+28.4%]
+BM_ZFlat/4            4961940    4900000        100 79.7MB/s  html4 (23.55 %)  [+30.6%]
+BM_ZFlat/5             500664     501253        399 46.8MB/s  cp (48.12 %)     [+33.4%]
+BM_ZFlat/6             217276     215982        926 49.2MB/s  c (42.40 %)      [+25.0%]
+BM_ZFlat/7              64122      65487       3054 54.2MB/s  lsp (48.37 %)    [+36.1%]
+BM_ZFlat/8           18045730   18000000        100 54.6MB/s  xls (41.34 %)    [+34.4%]
+BM_ZFlat/9            4051530    4000000        100 36.3MB/s  txt1 (59.81 %)   [+25.0%]
+BM_ZFlat/10           3451800    3500000        100 34.1MB/s  txt2 (64.07 %)   [+25.7%]
+BM_ZFlat/11          11052340   11100000        100 36.7MB/s  txt3 (57.11 %)   [+24.3%]
+BM_ZFlat/12          14538690   14600000        100 31.5MB/s  txt4 (68.35 %)   [+24.7%]
+BM_ZFlat/13           5041850    5000000        100 97.9MB/s  bin (18.21 %)    [+32.0%]
+BM_ZFlat/14            908840     909090        220 40.1MB/s  sum (51.88 %)    [+22.2%]
+BM_ZFlat/15             86921      86206       1972 46.8MB/s  man (59.36 %)    [+42.2%]
+BM_ZFlat/16           1312315    1315789        152 86.0MB/s  pb (23.15 %)     [+34.5%]
+BM_ZFlat/17           3173120    3200000        100 54.9MB/s  gaviota (38.27%) [+28.1%]
+
+
+The move from 64-bit to 32-bit operations for the copies also affected 32-bit x86;
+positive on the decompression side, and slightly negative on the compression side
+(unless that is noise; I only ran once):
+
+Benchmark              Time(ns)    CPU(ns) Iterations
+-----------------------------------------------------
+BM_UFlat/0                86279      86140       7778 1.1GB/s  html             [ +7.5%]
+BM_UFlat/1               839265     822622        778 813.9MB/s  urls           [ +9.4%]
+BM_UFlat/2                 9180       9143      87500 12.9GB/s  jpg             [ +1.2%]
+BM_UFlat/3                35080      35000      20000 2.5GB/s  pdf              [+10.1%]
+BM_UFlat/4               350318     345000       2000 1.1GB/s  html4            [ +7.0%]
+BM_UFlat/5                33808      33472      21212 701.0MB/s  cp             [ +9.0%]
+BM_UFlat/6                15201      15214      46667 698.9MB/s  c              [+14.9%]
+BM_UFlat/7                 4652       4651     159091 762.9MB/s  lsp            [ +7.5%]
+BM_UFlat/8              1285551    1282528        538 765.7MB/s  xls            [+10.7%]
+BM_UFlat/9               282510     281690       2414 514.9MB/s  txt1           [+13.6%]
+BM_UFlat/10              243494     239286       2800 498.9MB/s  txt2           [+14.4%]
+BM_UFlat/11              743625     740000       1000 550.0MB/s  txt3           [+14.3%]
+BM_UFlat/12              999441     989717        778 464.3MB/s  txt4           [+16.1%]
+BM_UFlat/13              412402     410076       1707 1.2GB/s  bin              [ +7.3%]
+BM_UFlat/14               54876      54000      10000 675.3MB/s  sum            [+13.0%]
+BM_UFlat/15                6146       6100     100000 660.8MB/s  man            [+14.8%]
+BM_UFlat/16               90496      90286       8750 1.2GB/s  pb               [ +4.0%]
+BM_UFlat/17              292650     292000       2500 602.0MB/s  gaviota        [+18.1%]
+BM_UValidate/0            49620      49699      14286 1.9GB/s  html             [ +0.0%]
+BM_UValidate/1           501371     500000       1000 1.3GB/s  urls             [ +0.0%]
+BM_UValidate/2              232        227    3043478 521.5GB/s  jpg            [ +1.3%]
+BM_UValidate/3            17250      17143      43750 5.1GB/s  pdf              [ -1.3%]
+BM_UValidate/4           198643     200000       3500 1.9GB/s  html4            [ -0.9%]
+BM_ZFlat/0               227128     229415       3182 425.7MB/s  html (23.57 %) [ -1.4%]
+BM_ZFlat/1              2970089    2960000        250 226.2MB/s  urls (50.89 %) [ -1.9%]
+BM_ZFlat/2                45683      44999      15556 2.6GB/s  jpg (99.88 %)    [ +2.2%]
+BM_ZFlat/3               114661     113136       6364 795.1MB/s  pdf (82.13 %)  [ -1.5%]
+BM_ZFlat/4               919702     914286        875 427.2MB/s  html4 (23.55%) [ -1.3%]
+BM_ZFlat/5               108189     108422       6364 216.4MB/s  cp (48.12 %)   [ -1.2%]
+BM_ZFlat/6                44525      44000      15909 241.7MB/s  c (42.40 %)    [ -2.9%]
+BM_ZFlat/7                15973      15857      46667 223.8MB/s  lsp (48.37 %)  [ +0.0%]
+BM_ZFlat/8              2677888    2639405        269 372.1MB/s  xls (41.34 %)  [ -1.4%]
+BM_ZFlat/9               800715     780000       1000 186.0MB/s  txt1 (59.81 %) [ -0.4%]
+BM_ZFlat/10              700089     700000       1000 170.5MB/s  txt2 (64.07 %) [ -2.9%]
+BM_ZFlat/11             2159356    2138365        318 190.3MB/s  txt3 (57.11 %) [ -0.3%]
+BM_ZFlat/12             2796143    2779923        259 165.3MB/s  txt4 (68.35 %) [ -1.4%]
+BM_ZFlat/13              856458     835476        778 585.8MB/s  bin (18.21 %)  [ -0.1%]
+BM_ZFlat/14              166908     166857       4375 218.6MB/s  sum (51.88 %)  [ -1.4%]
+BM_ZFlat/15               21181      20857      35000 193.3MB/s  man (59.36 %)  [ -0.8%]
+BM_ZFlat/16              244009     239973       2917 471.3MB/s  pb (23.15 %)   [ -1.4%]
+BM_ZFlat/17              596362     590000       1000 297.9MB/s  gaviota (38.27%) [ +0.0%]
+
+R=sanjay
+
+------------------------------------------------------------------------
+r58 | snappy.mirrorbot@gmail.com | 2012-02-11 23:11:22 +0100 (Sat, 11 Feb 2012) | 9 lines
+
+Lower the size allocated in the "corrupted input" unit test from 256 MB
+to 2 MB. This fixes issues with running the unit test on platforms with
+little RAM (e.g. some ARM boards).
+
+Also, reactivate the 2 MB test for 64-bit platforms; there's no good
+reason why it shouldn't be.
+
+R=sanjay
+
+------------------------------------------------------------------------
+r57 | snappy.mirrorbot@gmail.com | 2012-01-08 18:55:48 +0100 (Sun, 08 Jan 2012) | 2 lines
+
+Minor refactoring to accomodate changes in Google's internal code tree.
+
+------------------------------------------------------------------------
+r56 | snappy.mirrorbot@gmail.com | 2012-01-04 14:10:46 +0100 (Wed, 04 Jan 2012) | 19 lines
+
+Fix public issue r57: Fix most warnings with -Wall, mostly signed/unsigned
+warnings. There are still some in the unit test, but the main .cc file should
+be clean. We haven't enabled -Wall for the default build, since the unit test
+is still not clean.
+
+This also fixes a real bug in the open-source implementation of
+ReadFileToStringOrDie(); it would not detect errors correctly.
+
+I had to go through some pains to avoid performance loss as the types
+were changed; I think there might still be some with 32-bit if and only if LFS
+is enabled (ie., size_t is 64-bit), but for regular 32-bit and 64-bit I can't
+see any losses, and I've diffed the generated GCC assembler between the old and
+new code without seeing any significant choices. If anything, it's ever so
+slightly faster.
+
+This may or may not enable compression of very large blocks (>2^32 bytes)
+when size_t is 64-bit, but I haven't checked, and it is still not a supported
+case.
+
+------------------------------------------------------------------------
+r55 | snappy.mirrorbot@gmail.com | 2012-01-04 11:46:39 +0100 (Wed, 04 Jan 2012) | 6 lines
+
+Add a framing format description. We do not have any implementation of this at
+the current point, but there seems to be enough of a general interest in the
+topic (cf. public bug #34).
+
+R=csilvers,sanjay
+
+------------------------------------------------------------------------
+r54 | snappy.mirrorbot@gmail.com | 2011-12-05 22:27:26 +0100 (Mon, 05 Dec 2011) | 81 lines
+
+Speed up decompression by moving the refill check to the end of the loop.
+
+This seems to work because in most of the branches, the compiler can evaluate
+“ip_limit_ - ip” in a more efficient way than reloading ip_limit_ from memory
+(either by already having the entire expression in a register, or reconstructing
+it from “avail”, or something else). Memory loads, even from L1, are seemingly
+costly in the big picture at the current decompression speeds.
+
+Microbenchmarks (64-bit, opt mode):
+
+Westmere (Intel Core i7):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0       74492      74491     187894 1.3GB/s  html      [ +5.9%]
+  BM_UFlat/1      712268     712263      19644 940.0MB/s  urls    [ +3.8%]
+  BM_UFlat/2       10591      10590    1000000 11.2GB/s  jpg      [ -6.8%]
+  BM_UFlat/3       29643      29643     469915 3.0GB/s  pdf       [ +7.9%]
+  BM_UFlat/4      304669     304667      45930 1.3GB/s  html4     [ +4.8%]
+  BM_UFlat/5       28508      28507     490077 823.1MB/s  cp      [ +4.0%]
+  BM_UFlat/6       12415      12415    1000000 856.5MB/s  c       [ +8.6%]
+  BM_UFlat/7        3415       3415    4084723 1039.0MB/s  lsp    [+18.0%]
+  BM_UFlat/8      979569     979563      14261 1002.5MB/s  xls    [ +5.8%]
+  BM_UFlat/9      230150     230148      60934 630.2MB/s  txt1    [ +5.2%]
+  BM_UFlat/10     197167     197166      71135 605.5MB/s  txt2    [ +4.7%]
+  BM_UFlat/11     607394     607390      23041 670.1MB/s  txt3    [ +5.6%]
+  BM_UFlat/12     808502     808496      17316 568.4MB/s  txt4    [ +5.0%]
+  BM_UFlat/13     372791     372788      37564 1.3GB/s  bin       [ +3.3%]
+  BM_UFlat/14      44541      44541     313969 818.8MB/s  sum     [ +5.7%]
+  BM_UFlat/15       4833       4833    2898697 834.1MB/s  man     [ +4.8%]
+  BM_UFlat/16      79855      79855     175356 1.4GB/s  pb        [ +4.8%]
+  BM_UFlat/17     245845     245843      56838 715.0MB/s  gaviota [ +5.8%]
+
+Clovertown (Intel Core 2):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0      107911     107890     100000 905.1MB/s  html    [ +2.2%]
+  BM_UFlat/1     1011237    1011041      10000 662.3MB/s  urls    [ +2.5%]
+  BM_UFlat/2       26775      26770     523089 4.4GB/s  jpg       [ +0.0%]
+  BM_UFlat/3       48103      48095     290618 1.8GB/s  pdf       [ +3.4%]
+  BM_UFlat/4      437724     437644      31937 892.6MB/s  html4   [ +2.1%]
+  BM_UFlat/5       39607      39600     358284 592.5MB/s  cp      [ +2.4%]
+  BM_UFlat/6       18227      18224     768191 583.5MB/s  c       [ +2.7%]
+  BM_UFlat/7        5171       5170    2709437 686.4MB/s  lsp     [ +3.9%]
+  BM_UFlat/8     1560291    1559989       8970 629.5MB/s  xls     [ +3.6%]
+  BM_UFlat/9      335401     335343      41731 432.5MB/s  txt1    [ +3.0%]
+  BM_UFlat/10     287014     286963      48758 416.0MB/s  txt2    [ +2.8%]
+  BM_UFlat/11     888522     888356      15752 458.1MB/s  txt3    [ +2.9%]
+  BM_UFlat/12    1186600    1186378      10000 387.3MB/s  txt4    [ +3.1%]
+  BM_UFlat/13     572295     572188      24468 855.4MB/s  bin     [ +2.1%]
+  BM_UFlat/14      64060      64049     218401 569.4MB/s  sum     [ +4.1%]
+  BM_UFlat/15       7264       7263    1916168 555.0MB/s  man     [ +1.4%]
+  BM_UFlat/16     108853     108836     100000 1039.1MB/s  pb     [ +1.7%]
+  BM_UFlat/17     364289     364223      38419 482.6MB/s  gaviota [ +4.9%]
+
+Barcelona (AMD Opteron):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0      103900     103871     100000 940.2MB/s  html    [ +8.3%]
+  BM_UFlat/1     1000435    1000107      10000 669.5MB/s  urls    [ +6.6%]
+  BM_UFlat/2       24659      24652     567362 4.8GB/s  jpg       [ +0.1%]
+  BM_UFlat/3       48206      48193     291121 1.8GB/s  pdf       [ +5.0%]
+  BM_UFlat/4      421980     421850      33174 926.0MB/s  html4   [ +7.3%]
+  BM_UFlat/5       40368      40357     346994 581.4MB/s  cp      [ +8.7%]
+  BM_UFlat/6       19836      19830     708695 536.2MB/s  c       [ +8.0%]
+  BM_UFlat/7        6100       6098    2292774 581.9MB/s  lsp     [ +9.0%]
+  BM_UFlat/8     1693093    1692514       8261 580.2MB/s  xls     [ +8.0%]
+  BM_UFlat/9      365991     365886      38225 396.4MB/s  txt1    [ +7.1%]
+  BM_UFlat/10     311330     311238      44950 383.6MB/s  txt2    [ +7.6%]
+  BM_UFlat/11     975037     974737      14376 417.5MB/s  txt3    [ +6.9%]
+  BM_UFlat/12    1303558    1303175      10000 352.6MB/s  txt4    [ +7.3%]
+  BM_UFlat/13     517448     517290      27144 946.2MB/s  bin     [ +5.5%]
+  BM_UFlat/14      66537      66518     210352 548.3MB/s  sum     [ +7.5%]
+  BM_UFlat/15       7976       7974    1760383 505.6MB/s  man     [ +5.6%]
+  BM_UFlat/16     103121     103092     100000 1097.0MB/s  pb     [ +8.7%]
+  BM_UFlat/17     391431     391314      35733 449.2MB/s  gaviota [ +6.5%]
+
+R=sanjay
+
+------------------------------------------------------------------------
+r53 | snappy.mirrorbot@gmail.com | 2011-11-23 12:14:17 +0100 (Wed, 23 Nov 2011) | 88 lines
+
+Speed up decompression by making the fast path for literals faster.
+
+We do the fast-path step as soon as possible; in fact, as soon as we know the
+literal length. Since we usually hit the fast path, we can then skip the checks
+for long literals and available input space (beyond what the fast path check
+already does).
+
+Note that this changes the decompression Writer API; however, it does not
+change the ABI, since writers are always templatized and as such never
+cross compilation units. The new API is slightly more general, in that it
+doesn't hard-code the value 16. Note that we also take care to check
+for len <= 16 first, since the other two checks almost always succeed
+(so we don't want to waste time checking for them until we have to).
+
+The improvements are most marked on Nehalem, but are generally positive
+on other platforms as well. All microbenchmarks are 64-bit, opt.
+
+Clovertown (Core 2):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0      110226     110224     100000 886.0MB/s  html    [ +1.5%]
+  BM_UFlat/1     1036523    1036508      10000 646.0MB/s  urls    [ -0.8%]
+  BM_UFlat/2       26775      26775     522570 4.4GB/s  jpg       [ +0.0%]
+  BM_UFlat/3       49738      49737     280974 1.8GB/s  pdf       [ +0.3%]
+  BM_UFlat/4      446790     446792      31334 874.3MB/s  html4   [ +0.8%]
+  BM_UFlat/5       40561      40562     350424 578.5MB/s  cp      [ +1.3%]
+  BM_UFlat/6       18722      18722     746903 568.0MB/s  c       [ +1.4%]
+  BM_UFlat/7        5373       5373    2608632 660.5MB/s  lsp     [ +8.3%]
+  BM_UFlat/8     1615716    1615718       8670 607.8MB/s  xls     [ +2.0%]
+  BM_UFlat/9      345278     345281      40481 420.1MB/s  txt1    [ +1.4%]
+  BM_UFlat/10     294855     294855      47452 404.9MB/s  txt2    [ +1.6%]
+  BM_UFlat/11     914263     914263      15316 445.2MB/s  txt3    [ +1.1%]
+  BM_UFlat/12    1222694    1222691      10000 375.8MB/s  txt4    [ +1.4%]
+  BM_UFlat/13     584495     584489      23954 837.4MB/s  bin     [ -0.6%]
+  BM_UFlat/14      66662      66662     210123 547.1MB/s  sum     [ +1.2%]
+  BM_UFlat/15       7368       7368    1881856 547.1MB/s  man     [ +4.0%]
+  BM_UFlat/16     110727     110726     100000 1021.4MB/s  pb     [ +2.3%]
+  BM_UFlat/17     382138     382141      36616 460.0MB/s  gaviota [ -0.7%]
+
+Westmere (Core i7):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0       78861      78853     177703 1.2GB/s  html      [ +2.1%]
+  BM_UFlat/1      739560     739491      18912 905.4MB/s  urls    [ +3.4%]
+  BM_UFlat/2        9867       9866    1419014 12.0GB/s  jpg      [ +3.4%]
+  BM_UFlat/3       31989      31986     438385 2.7GB/s  pdf       [ +0.2%]
+  BM_UFlat/4      319406     319380      43771 1.2GB/s  html4     [ +1.9%]
+  BM_UFlat/5       29639      29636     472862 791.7MB/s  cp      [ +5.2%]
+  BM_UFlat/6       13478      13477    1000000 789.0MB/s  c       [ +2.3%]
+  BM_UFlat/7        4030       4029    3475364 880.7MB/s  lsp     [ +8.7%]
+  BM_UFlat/8     1036585    1036492      10000 947.5MB/s  xls     [ +6.9%]
+  BM_UFlat/9      242127     242105      57838 599.1MB/s  txt1    [ +3.0%]
+  BM_UFlat/10     206499     206480      67595 578.2MB/s  txt2    [ +3.4%]
+  BM_UFlat/11     641635     641570      21811 634.4MB/s  txt3    [ +2.4%]
+  BM_UFlat/12     848847     848769      16443 541.4MB/s  txt4    [ +3.1%]
+  BM_UFlat/13     384968     384938      36366 1.2GB/s  bin       [ +0.3%]
+  BM_UFlat/14      47106      47101     297770 774.3MB/s  sum     [ +4.4%]
+  BM_UFlat/15       5063       5063    2772202 796.2MB/s  man     [ +7.7%]
+  BM_UFlat/16      83663      83656     167697 1.3GB/s  pb        [ +1.8%]
+  BM_UFlat/17     260224     260198      53823 675.6MB/s  gaviota [ -0.5%]
+
+Barcelona (Opteron):
+
+  Benchmark     Time(ns)    CPU(ns) Iterations
+  --------------------------------------------
+  BM_UFlat/0      112490     112457     100000 868.4MB/s  html    [ -0.4%]
+  BM_UFlat/1     1066719    1066339      10000 627.9MB/s  urls    [ +1.0%]
+  BM_UFlat/2       24679      24672     563802 4.8GB/s  jpg       [ +0.7%]
+  BM_UFlat/3       50603      50589     277285 1.7GB/s  pdf       [ +2.6%]
+  BM_UFlat/4      452982     452849      30900 862.6MB/s  html4   [ -0.2%]
+  BM_UFlat/5       43860      43848     319554 535.1MB/s  cp      [ +1.2%]
+  BM_UFlat/6       21419      21413     653573 496.6MB/s  c       [ +1.0%]
+  BM_UFlat/7        6646       6645    2105405 534.1MB/s  lsp     [ +0.3%]
+  BM_UFlat/8     1828487    1827886       7658 537.3MB/s  xls     [ +2.6%]
+  BM_UFlat/9      391824     391714      35708 370.3MB/s  txt1    [ +2.2%]
+  BM_UFlat/10     334913     334816      41885 356.6MB/s  txt2    [ +1.7%]
+  BM_UFlat/11    1042062    1041674      10000 390.7MB/s  txt3    [ +1.1%]
+  BM_UFlat/12    1398902    1398456      10000 328.6MB/s  txt4    [ +1.7%]
+  BM_UFlat/13     545706     545530      25669 897.2MB/s  bin     [ -0.4%]
+  BM_UFlat/14      71512      71505     196035 510.0MB/s  sum     [ +1.4%]
+  BM_UFlat/15       8422       8421    1665036 478.7MB/s  man     [ +2.6%]
+  BM_UFlat/16     112053     112048     100000 1009.3MB/s  pb     [ -0.4%]
+  BM_UFlat/17     416723     416713      33612 421.8MB/s  gaviota [ -2.0%]
+
+R=sanjay
+
+------------------------------------------------------------------------
+r52 | snappy.mirrorbot@gmail.com | 2011-11-08 15:46:39 +0100 (Tue, 08 Nov 2011) | 5 lines
+
+Fix public issue #53: Update the README to the API we actually open-sourced
+with.
+
+R=sanjay
+
+------------------------------------------------------------------------
+r51 | snappy.mirrorbot@gmail.com | 2011-10-05 14:27:12 +0200 (Wed, 05 Oct 2011) | 5 lines
+
+In the format description, use a clearer example to emphasize that varints are
+stored in little-endian. Patch from Christian von Roques.
+
+R=csilvers
+
+------------------------------------------------------------------------
+r50 | snappy.mirrorbot@gmail.com | 2011-09-15 21:34:06 +0200 (Thu, 15 Sep 2011) | 4 lines
+
+Release Snappy 1.0.4.
+
+R=sanjay
+
  ------------------------------------------------------------------------
  r49 | snappy.mirrorbot@gmail.com | 2011-09-15 11:50:05 +0200 (Thu, 15 Sep 2011) | 5 lines
  
  ------------------------------------------------------------------------
  r49 | snappy.mirrorbot@gmail.com | 2011-09-15 11:50:05 +0200 (Thu, 15 Sep 2011) | 5 lines
  
@@ -173,8 +589,8 @@ Remove an unneeded goto in the decompressor; it turns out that the
  state of ip_ after decompression (or attempted decompresion) is
  completely irrelevant, so we don't need the trailer.
  
  state of ip_ after decompression (or attempted decompresion) is
  completely irrelevant, so we don't need the trailer.
  
-Performance is, as expected, mostly flat -- there's a curious ~3–5%
-loss in the “lsp” test, but that test case is so short it is hard to say
+Performance is, as expected, mostly flat -- there's a curious ~3-5%
+loss in the "lsp" test, but that test case is so short it is hard to say
  anything definitive about why (most likely, it's some sort of
  unrelated effect).
  
  anything definitive about why (most likely, it's some sort of
  unrelated effect).
  
@@ -195,7 +611,7 @@ performance, so changing Step() into a function that decodes as much as it can
  before it saves ip_ back and returns. (Note that Step() was already inlined,
  so it is not the manual inlining that buys the performance here.)
  
  before it saves ip_ back and returns. (Note that Step() was already inlined,
  so it is not the manual inlining that buys the performance here.)
  
-The wins are about 3–6% for Core 2, 6–13% on Core i7 and 5–12% on Opteron
+The wins are about 3-6% for Core 2, 6-13% on Core i7 and 5-12% on Opteron
  (for plain array-to-array decompression, in 64-bit opt mode).
  
  There is a tiny difference in the behavior here; if an invalid literal is
  (for plain array-to-array decompression, in 64-bit opt mode).
  
  There is a tiny difference in the behavior here; if an invalid literal is
diff --git a/thirdparty/snappy/NEWS b/thirdparty/snappy/NEWS

index 11d1e955b9789c193c7feb44e8fea761a4f70e9b..60bbd17da0bbc7225dd82d3abc5c3b536639156a 100644 (file)
--- a/thirdparty/snappy/NEWS
+++ b/thirdparty/snappy/NEWS
@@ -1,3 +1,34 @@
+Snappy v1.0.5, February 24th 2012:
+
+  * More speed improvements. Exactly how big will depend on
+    the architecture:
+
+    - 3–10% faster decompression for the base case (x86-64).
+
+    - ARMv7 and higher can now use unaligned accesses,
+      and will see about 30% faster decompression and
+      20–40% faster compression.
+
+    - 32-bit platforms (ARM and 32-bit x86) will see 2–5%
+      faster compression.
+
+    These are all cumulative (e.g., ARM gets all three speedups).
+
+  * Fixed an issue where the unit test would crash on system
+    with less than 256 MB address space available,
+    e.g. some embedded platforms.
+
+  * Added a framing format description, for use over e.g. HTTP,
+    or for a command-line compressor. We do not have any
+    implementations of this at the current point, but there seems
+    to be enough of a general interest in the topic.
+    Also make the format description slightly clearer.
+
+  * Remove some compile-time warnings in -Wall
+    (mostly signed/unsigned comparisons), for easier embedding
+    into projects that use -Wall -Werror.
+
+
  Snappy v1.0.4, September 15th 2011:
  
    * Speeded up the decompressor somewhat; typically about 2–8%
  Snappy v1.0.4, September 15th 2011:
  
    * Speeded up the decompressor somewhat; typically about 2–8%
diff --git a/thirdparty/snappy/README b/thirdparty/snappy/README

index df8f0e178e2b4c86f617256849b27cfa395f1f34..3bc8888f991b76bd5f6b69d273b2d3de34d1ad9d 100644 (file)
--- a/thirdparty/snappy/README
+++ b/thirdparty/snappy/README
@@ -76,11 +76,11 @@ your calling file, and link against the compiled library.
  
  There are many ways to call Snappy, but the simplest possible is
  
  
  There are many ways to call Snappy, but the simplest possible is
  
-  snappy::Compress(input, &output);
+  snappy::Compress(input.data(), input.size(), &output);
  
  and similarly
  
  
  and similarly
  
-  snappy::Uncompress(input, &output);
+  snappy::Uncompress(input.data(), input.size(), &output);
  
  where "input" and "output" are both instances of std::string.
  
  
  where "input" and "output" are both instances of std::string.
  
diff --git a/thirdparty/snappy/format_description.txt b/thirdparty/snappy/format_description.txt

index 43d7a98238c4b3518fd63c6fc9291a4d41493e16..20db66c1f26b3f1850d7105344bc43216cbda7fb 100644 (file)
--- a/thirdparty/snappy/format_description.txt
+++ b/thirdparty/snappy/format_description.txt
@@ -1,5 +1,5 @@
  Snappy compressed format description
  Snappy compressed format description
-Last revised: 2011-08-09
+Last revised: 2011-10-05
  
  
  This is not a formal specification, but should suffice to explain most
  
  
  This is not a formal specification, but should suffice to explain most
@@ -21,8 +21,8 @@ The stream starts with the uncompressed length (up to a maximum of 2^32 - 1),
  stored as a little-endian varint. Varints consist of a series of bytes,
  where the lower 7 bits are data and the upper bit is set iff there are
  more bytes to be read. In other words, an uncompressed length of 64 would
  stored as a little-endian varint. Varints consist of a series of bytes,
  where the lower 7 bits are data and the upper bit is set iff there are
  more bytes to be read. In other words, an uncompressed length of 64 would
-be stored as 0x40, and an uncompressed length of 2097151 (0x1FFFFF)
-would be stored as 0xFF 0xFF 0x7F.
+be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
+would be stored as 0xFE 0xFF 0x7F.
  
  
  2. The compressed stream itself
  
  
  2. The compressed stream itself
diff --git a/thirdparty/snappy/framing_format.txt b/thirdparty/snappy/framing_format.txt

new file mode 100644 (file)

index 0000000..08fda03
--- /dev/null
+++ b/thirdparty/snappy/framing_format.txt
@@ -0,0 +1,124 @@
+Snappy framing format description
+Last revised: 2011-12-15
+
+This format decribes a framing format for Snappy, allowing compressing to
+files or streams that can then more easily be decompressed without having
+to hold the entire stream in memory. It also provides data checksums to
+help verify integrity. It does not provide metadata checksums, so it does
+not protect against e.g. all forms of truncations.
+
+Implementation of the framing format is optional for Snappy compressors and
+decompressor; it is not part of the Snappy core specification.
+
+
+1. General structure
+
+The file consists solely of chunks, lying back-to-back with no padding
+in between. Each chunk consists first a single byte of chunk identifier,
+then a two-byte little-endian length of the chunk in bytes (from 0 to 65535,
+inclusive), and then the data if any. The three bytes of chunk header is not
+counted in the data length.
+
+The different chunk types are listed below. The first chunk must always
+be the stream identifier chunk (see section 4.1, below). The stream
+ends when the file ends -- there is no explicit end-of-file marker.
+
+
+2. File type identification
+
+The following identifiers for this format are recommended where appropriate.
+However, note that none have been registered officially, so this is only to
+be taken as a guideline. We use "Snappy framed" to distinguish between this
+format and raw Snappy data.
+
+  File extension:         .sz
+  MIME type:              application/x-snappy-framed
+  HTTP Content-Encoding:  x-snappy-framed
+
+
+3. Checksum format
+
+Some chunks have data protected by a checksum (the ones that do will say so
+explicitly). The checksums are always masked CRC-32Cs.
+
+A description of CRC-32C can be found in RFC 3720, section 12.1, with
+examples in section B.4.
+
+Checksums are not stored directly, but masked, as checksumming data and
+then its own checksum can be problematic. The masking is the same as used
+in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
+0xa282ead8 (using wraparound as normal for unsigned integers). This is
+equivalent to the following C code:
+
+  uint32_t mask_checksum(uint32_t x) {
+    return ((x >> 15) | (x << 17)) + 0xa282ead8;
+  }
+
+Note that the masking is reversible.
+
+The checksum is always stored as a four bytes long integer, in little-endian.
+
+
+4. Chunk types
+
+The currently supported chunk types are described below. The list may
+be extended in the future.
+
+
+4.1. Stream identifier (chunk type 0xff)
+
+The stream identifier is always the first element in the stream.
+It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that
+a valid Snappy framed stream always starts with the bytes
+
+  0xff 0x06 0x00 0x73 0x4e 0x61 0x50 0x70 0x59
+
+The stream identifier chunk can come multiple times in the stream besides
+the first; if such a chunk shows up, it should simply be ignored, assuming
+it has the right length and contents. This allows for easy concatenation of
+compressed files without the need for re-framing.
+
+
+4.2. Compressed data (chunk type 0x00)
+
+Compressed data chunks contain a normal Snappy compressed bitstream;
+see the compressed format specification. The compressed data is preceded by
+the CRC-32C (see section 3) of the _uncompressed_ data.
+
+Note that the data portion of the chunk, i.e., the compressed contents,
+can be at most 65531 bytes (2^16 - 1, minus the checksum).
+However, we place an additional restriction that the uncompressed data
+in a chunk must be no longer than 32768 bytes. This allows consumers to
+easily use small fixed-size buffers.
+
+
+4.3. Uncompressed data (chunk type 0x01)
+
+Uncompressed data chunks allow a compressor to send uncompressed,
+raw data; this is useful if, for instance, uncompressible or
+near-incompressible data is detected, and faster decompression is desired.
+
+As in the compressed chunks, the data is preceded by its own masked
+CRC-32C (see section 3).
+
+An uncompressed data chunk, like compressed data chunks, should contain
+no more than 32768 data bytes, so the maximum legal chunk length with the
+checksum is 32772.
+
+
+4.4. Reserved unskippable chunks (chunk types 0x02-0x7f)
+
+These are reserved for future expansion. A decoder that sees such a chunk
+should immediately return an error, as it must assume it cannot decode the
+stream correctly.
+
+Future versions of this specification may define meanings for these chunks.
+
+
+4.5. Reserved skippable chunks (chunk types 0x80-0xfe)
+
+These are also reserved for future expansion, but unlike the chunks
+described in 4.4, a decoder seeing these must skip them and continue
+decoding.
+
+Future versions of this specification may define meanings for these chunks.
diff --git a/thirdparty/snappy/snappy-sinksource.cc b/thirdparty/snappy/snappy-sinksource.cc

index 1017895f962d6ad536893f804a4cd0bf0bdd673e..5844552cb7e1bb7739cacb1d359360fa6a5620b4 100644 (file)
--- a/thirdparty/snappy/snappy-sinksource.cc
+++ b/thirdparty/snappy/snappy-sinksource.cc
@@ -68,5 +68,4 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
    return dest_;
  }
  
    return dest_;
  }
  
-
  }
  }
diff --git a/thirdparty/snappy/snappy-sinksource.h b/thirdparty/snappy/snappy-sinksource.h

index 430baeabb0ed98e3e0c3137f73a2fd574d65abf9..faabfa1e6f1fd8f397fc9c5ee8de966c52c0c880 100644 (file)
--- a/thirdparty/snappy/snappy-sinksource.h
+++ b/thirdparty/snappy/snappy-sinksource.h
@@ -60,6 +60,7 @@ class Sink {
    // The default implementation always returns the scratch buffer.
    virtual char* GetAppendBuffer(size_t length, char* scratch);
  
    // The default implementation always returns the scratch buffer.
    virtual char* GetAppendBuffer(size_t length, char* scratch);
  
+
   private:
    // No copying
    Sink(const Sink&);
   private:
    // No copying
    Sink(const Sink&);
diff --git a/thirdparty/snappy/snappy-stubs-internal.h b/thirdparty/snappy/snappy-stubs-internal.h

index 021528893357ae5b610788a8763e6b27bc1910fc..6033cdfb41a10df6424dbb478aaa1e149f7203cc 100644 (file)
--- a/thirdparty/snappy/snappy-stubs-internal.h
+++ b/thirdparty/snappy/snappy-stubs-internal.h
@@ -86,10 +86,9 @@ using namespace std;
  // version (anyone who wants to regenerate it can just do the call
  // themselves within main()).
  #define DEFINE_bool(flag_name, default_value, description) \
  // version (anyone who wants to regenerate it can just do the call
  // themselves within main()).
  #define DEFINE_bool(flag_name, default_value, description) \
-  bool FLAGS_ ## flag_name = default_value;
+  bool FLAGS_ ## flag_name = default_value
  #define DECLARE_bool(flag_name) \
  #define DECLARE_bool(flag_name) \
-  extern bool FLAGS_ ## flag_name;
-#define REGISTER_MODULE_INITIALIZER(name, code)
+  extern bool FLAGS_ ## flag_name
  
  namespace snappy {
  
  
  namespace snappy {
  
@@ -179,6 +178,8 @@ class LogMessageVoidify {
  
  // Potentially unaligned loads and stores.
  
  
  // Potentially unaligned loads and stores.
  
+// x86 and PowerPC can simply do these loads and stores native.
+
  #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
  
  #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
  #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
  
  #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
@@ -189,6 +190,47 @@ class LogMessageVoidify {
  #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
  #define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
  
  #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
  #define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
  
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode). There's no simple #define that says just
+// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
+// sub-architectures.
+//
+// This is a mess, but there's not much we can do about it.
+
+#elif defined(__arm__) && \
+      !defined(__ARM_ARCH_5__) && \
+      !defined(__ARM_ARCH_5T__) && \
+      !defined(__ARM_ARCH_5TE__) && \
+      !defined(__ARM_ARCH_5TEJ__) && \
+      !defined(__ARM_ARCH_6__) && \
+      !defined(__ARM_ARCH_6J__) && \
+      !defined(__ARM_ARCH_6K__) && \
+      !defined(__ARM_ARCH_6Z__) && \
+      !defined(__ARM_ARCH_6ZK__) && \
+      !defined(__ARM_ARCH_6T2__)
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+
+// TODO(user): NEON supports unaligned 64-bit loads and stores.
+// See if that would be more efficient on platforms supporting it,
+// at least for copies.
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+  uint64 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  memcpy(p, &v, sizeof v);
+}
+
  #else
  
  // These functions are provided for architectures that don't support
  #else
  
  // These functions are provided for architectures that don't support
@@ -226,6 +268,20 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
  
  #endif
  
  
  #endif
  
+// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
+// on some platforms, in particular ARM.
+inline void UnalignedCopy64(const void *src, void *dst) {
+  if (sizeof(void *) == 8) {
+    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
+  } else {
+    const char *src_char = reinterpret_cast<const char *>(src);
+    char *dst_char = reinterpret_cast<char *>(dst);
+
+    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
+    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
+  }
+}
+
  // The following guarantees declaration of the byte swap functions.
  #ifdef WORDS_BIGENDIAN
  
  // The following guarantees declaration of the byte swap functions.
  #ifdef WORDS_BIGENDIAN
  
diff --git a/thirdparty/snappy/snappy-stubs-public.h b/thirdparty/snappy/snappy-stubs-public.h

index d439cb4dee1cdd831939fcf6227acc4a8f7d4d26..9ee4ca52b357503b7d706008e30e5886af8658b2 100644 (file)
--- a/thirdparty/snappy/snappy-stubs-public.h
+++ b/thirdparty/snappy/snappy-stubs-public.h
@@ -46,7 +46,7 @@
  
  #define SNAPPY_MAJOR 1
  #define SNAPPY_MINOR 0
  
  #define SNAPPY_MAJOR 1
  #define SNAPPY_MINOR 0
-#define SNAPPY_PATCHLEVEL 4
+#define SNAPPY_PATCHLEVEL 5
  #define SNAPPY_VERSION \
      ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
  
  #define SNAPPY_VERSION \
      ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
  
diff --git a/thirdparty/snappy/snappy-test.cc b/thirdparty/snappy/snappy-test.cc

index 2c503886e47f4d66892d012cc6f0860d06392247..223cd92d5ba334761e2540cdfd1a472685ce9ab2 100644 (file)
--- a/thirdparty/snappy/snappy-test.cc
+++ b/thirdparty/snappy/snappy-test.cc
@@ -353,7 +353,6 @@ int ZLib::CompressAtMostOrAll(Bytef *dest, uLongf *destLen,
    // compression.
    err = deflate(&comp_stream_, flush_mode);
  
    // compression.
    err = deflate(&comp_stream_, flush_mode);
  
-  const uLong source_bytes_consumed = *sourceLen - comp_stream_.avail_in;
    *sourceLen = comp_stream_.avail_in;
  
    if ((err == Z_STREAM_END || err == Z_OK)
    *sourceLen = comp_stream_.avail_in;
  
    if ((err == Z_STREAM_END || err == Z_OK)
@@ -397,7 +396,6 @@ int ZLib::CompressChunkOrAll(Bytef *dest, uLongf *destLen,
  int ZLib::Compress(Bytef *dest, uLongf *destLen,
                     const Bytef *source, uLong sourceLen) {
    int err;
  int ZLib::Compress(Bytef *dest, uLongf *destLen,
                     const Bytef *source, uLong sourceLen) {
    int err;
-  const uLongf orig_destLen = *destLen;
    if ( (err=CompressChunkOrAll(dest, destLen, source, sourceLen,
                                 Z_FINISH)) != Z_OK )
      return err;
    if ( (err=CompressChunkOrAll(dest, destLen, source, sourceLen,
                                 Z_FINISH)) != Z_OK )
      return err;
diff --git a/thirdparty/snappy/snappy-test.h b/thirdparty/snappy/snappy-test.h

index 649f26e7ac1fd2608aeb23f93bf7ab8101525829..ef6a95539d1e454091f5a9be82f2d84b62073a79 100644 (file)
--- a/thirdparty/snappy/snappy-test.h
+++ b/thirdparty/snappy/snappy-test.h
@@ -135,7 +135,7 @@ namespace File {
      while (!feof(fp)) {
        char buf[4096];
        size_t ret = fread(buf, 1, 4096, fp);
      while (!feof(fp)) {
        char buf[4096];
        size_t ret = fread(buf, 1, 4096, fp);
-      if (ret == -1) {
+      if (ret == 0 && ferror(fp)) {
          perror("fread");
          exit(1);
        }
          perror("fread");
          exit(1);
        }
diff --git a/thirdparty/snappy/snappy.cc b/thirdparty/snappy/snappy.cc

index c79edb58a7fe50db0d35ff724d042f369fa99570..4d4eb42a4998e819bf91078c9878e5ff95d80608 100644 (file)
--- a/thirdparty/snappy/snappy.cc
+++ b/thirdparty/snappy/snappy.cc
@@ -140,12 +140,12 @@ const int kMaxIncrementCopyOverflow = 10;
  
  static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
    while (op - src < 8) {
  
  static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
    while (op - src < 8) {
-    UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+    UnalignedCopy64(src, op);
      len -= op - src;
      op += op - src;
    }
    while (len > 0) {
      len -= op - src;
      op += op - src;
    }
    while (len > 0) {
-    UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+    UnalignedCopy64(src, op);
      src += 8;
      op += 8;
      len -= 8;
      src += 8;
      op += 8;
      len -= 8;
@@ -172,8 +172,8 @@ static inline char* EmitLiteral(char* op,
      //   - The output will always have 32 spare bytes (see
      //     MaxCompressedLength).
      if (allow_fast_path && len <= 16) {
      //   - The output will always have 32 spare bytes (see
      //     MaxCompressedLength).
      if (allow_fast_path && len <= 16) {
-      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
-      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8));
+      UnalignedCopy64(literal, op);
+      UnalignedCopy64(literal + 8, op + 8);
        return op + len;
      }
    } else {
        return op + len;
      }
    } else {
@@ -194,13 +194,13 @@ static inline char* EmitLiteral(char* op,
    return op + len;
  }
  
    return op + len;
  }
  
-static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
+static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
    DCHECK_LE(len, 64);
    DCHECK_GE(len, 4);
    DCHECK_LT(offset, 65536);
  
    if ((len < 12) && (offset < 2048)) {
    DCHECK_LE(len, 64);
    DCHECK_GE(len, 4);
    DCHECK_LT(offset, 65536);
  
    if ((len < 12) && (offset < 2048)) {
-    int len_minus_4 = len - 4;
+    size_t len_minus_4 = len - 4;
      assert(len_minus_4 < 8);            // Must fit in 3 bits
      *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
      *op++ = offset & 0xff;
      assert(len_minus_4 < 8);            // Must fit in 3 bits
      *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
      *op++ = offset & 0xff;
@@ -212,7 +212,7 @@ static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
    return op;
  }
  
    return op;
  }
  
-static inline char* EmitCopy(char* op, int offset, int len) {
+static inline char* EmitCopy(char* op, size_t offset, int len) {
    // Emit 64 byte copies but make sure to keep at least four bytes reserved
    while (len >= 68) {
      op = EmitCopyLessThan64(op, offset, 64);
    // Emit 64 byte copies but make sure to keep at least four bytes reserved
    while (len >= 68) {
      op = EmitCopyLessThan64(op, offset, 64);
@@ -249,7 +249,7 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
    // compression, and if the input is short, we won't need that
    // many hash table entries anyway.
    assert(kMaxHashTableSize >= 256);
    // compression, and if the input is short, we won't need that
    // many hash table entries anyway.
    assert(kMaxHashTableSize >= 256);
-  int htsize = 256;
+  size_t htsize = 256;
    while (htsize < kMaxHashTableSize && htsize < input_size) {
      htsize <<= 1;
    }
    while (htsize < kMaxHashTableSize && htsize < input_size) {
      htsize <<= 1;
    }
@@ -272,16 +272,49 @@ uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
  }
  }  // end namespace internal
  
  }
  }  // end namespace internal
  
-// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
+// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
  // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
  // empirically found that overlapping loads such as
  //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
  // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
  // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
  // empirically found that overlapping loads such as
  //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
  // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
+//
+// We have different versions for 64- and 32-bit; ideally we would avoid the
+// two functions and just inline the UNALIGNED_LOAD64 call into
+// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
+// enough to avoid loading the value multiple times then. For 64-bit, the load
+// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
+// done at GetUint32AtOffset() time.
+
+#ifdef ARCH_K8
+
+typedef uint64 EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+  return UNALIGNED_LOAD64(ptr);
+}
+
  static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
  static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
-  DCHECK(0 <= offset && offset <= 4) << offset;
+  DCHECK_GE(offset, 0);
+  DCHECK_LE(offset, 4);
    return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
  }
  
    return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
  }
  
+#else
+
+typedef const char* EightBytesReference;
+
+static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+  return ptr;
+}
+
+static inline uint32 GetUint32AtOffset(const char* v, int offset) {
+  DCHECK_GE(offset, 0);
+  DCHECK_LE(offset, 4);
+  return UNALIGNED_LOAD32(v + offset);
+}
+
+#endif
+
  // Flat array compression that does not emit the "uncompressed length"
  // prefix. Compresses "input" string to the "*op" buffer.
  //
  // Flat array compression that does not emit the "uncompressed length"
  // prefix. Compresses "input" string to the "*op" buffer.
  //
@@ -304,14 +337,14 @@ char* CompressFragment(const char* input,
    CHECK_LE(input_size, kBlockSize);
    CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
    const int shift = 32 - Bits::Log2Floor(table_size);
    CHECK_LE(input_size, kBlockSize);
    CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
    const int shift = 32 - Bits::Log2Floor(table_size);
-  DCHECK_EQ(kuint32max >> shift, table_size - 1);
+  DCHECK_EQ(static_cast<int>(kuint32max >> shift), table_size - 1);
    const char* ip_end = input + input_size;
    const char* base_ip = ip;
    // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
    // [next_emit, ip_end) after the main loop.
    const char* next_emit = ip;
  
    const char* ip_end = input + input_size;
    const char* base_ip = ip;
    // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
    // [next_emit, ip_end) after the main loop.
    const char* next_emit = ip;
  
-  const int kInputMarginBytes = 15;
+  const size_t kInputMarginBytes = 15;
    if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
      const char* ip_limit = input + input_size - kInputMarginBytes;
  
    if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
      const char* ip_limit = input + input_size - kInputMarginBytes;
  
@@ -378,7 +411,7 @@ char* CompressFragment(const char* input,
        // though we don't yet know how big the literal will be.  We handle that
        // by proceeding to the next iteration of the main loop.  We also can exit
        // this loop via goto if we get close to exhausting the input.
        // though we don't yet know how big the literal will be.  We handle that
        // by proceeding to the next iteration of the main loop.  We also can exit
        // this loop via goto if we get close to exhausting the input.
-      uint64 input_bytes = 0;
+      EightBytesReference input_bytes;
        uint32 candidate_bytes = 0;
  
        do {
        uint32 candidate_bytes = 0;
  
        do {
@@ -387,7 +420,7 @@ char* CompressFragment(const char* input,
          const char* base = ip;
          int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
          ip += matched;
          const char* base = ip;
          int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
          ip += matched;
-        int offset = base - candidate;
+        size_t offset = base - candidate;
          DCHECK_EQ(0, memcmp(base, candidate, matched));
          op = EmitCopy(op, offset, matched);
          // We could immediately start working at ip now, but to improve
          DCHECK_EQ(0, memcmp(base, candidate, matched));
          op = EmitCopy(op, offset, matched);
          // We could immediately start working at ip now, but to improve
@@ -397,7 +430,7 @@ char* CompressFragment(const char* input,
          if (PREDICT_FALSE(ip >= ip_limit)) {
            goto emit_remainder;
          }
          if (PREDICT_FALSE(ip >= ip_limit)) {
            goto emit_remainder;
          }
-        input_bytes = UNALIGNED_LOAD64(insert_tail);
+        input_bytes = GetEightBytesAt(insert_tail);
          uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
          table[prev_hash] = ip - base_ip - 1;
          uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
          uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
          table[prev_hash] = ip - base_ip - 1;
          uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
@@ -435,12 +468,26 @@ char* CompressFragment(const char* input,
  //   bool CheckLength() const;
  //
  //   // Called repeatedly during decompression
  //   bool CheckLength() const;
  //
  //   // Called repeatedly during decompression
-//   bool Append(const char* ip, uint32 length, bool allow_fast_path);
-//   bool AppendFromSelf(uint32 offset, uint32 length);
-// };
+//   bool Append(const char* ip, size_t length);
+//   bool AppendFromSelf(uint32 offset, size_t length);
  //
  //
-// "allow_fast_path" is a parameter that says if there is at least 16
-// readable bytes in "ip". It is currently only used by SnappyArrayWriter.
+//   // The difference between TryFastAppend and Append is that TryFastAppend
+//   // is allowed to read up to <available> bytes from the input buffer,
+//   // whereas Append is allowed to read <length>.
+//   //
+//   // Also, TryFastAppend is allowed to return false, declining the append,
+//   // without it being a fatal error -- just "return false" would be
+//   // a perfectly legal implementation of TryFastAppend. The intention
+//   // is for TryFastAppend to allow a fast path in the common case of
+//   // a small append.
+//   //
+//   // NOTE(user): TryFastAppend must always return decline (return false)
+//   // if <length> is 61 or more, as in this case the literal length is not
+//   // decoded fully. In practice, this should not be a big problem,
+//   // as it is unlikely that one would implement a fast path accepting
+//   // this much data.
+//   bool TryFastAppend(const char* ip, size_t available, size_t length);
+// };
  
  // -----------------------------------------------------------------------
  // Lookup table for decompression code.  Generated by ComputeTable() below.
  
  // -----------------------------------------------------------------------
  // Lookup table for decompression code.  Generated by ComputeTable() below.
@@ -587,7 +634,6 @@ static void ComputeTable() {
      CHECK_EQ(dst[i], char_table[i]);
    }
  }
      CHECK_EQ(dst[i], char_table[i]);
    }
  }
-REGISTER_MODULE_INITIALIZER(snappy, ComputeTable());
  #endif /* !NDEBUG */
  
  // Helper class for decompression
  #endif /* !NDEBUG */
  
  // Helper class for decompression
@@ -655,29 +701,41 @@ class SnappyDecompressor {
    template <class Writer>
    void DecompressAllTags(Writer* writer) {
      const char* ip = ip_;
    template <class Writer>
    void DecompressAllTags(Writer* writer) {
      const char* ip = ip_;
-    for ( ;; ) {
-      if (ip_limit_ - ip < 5) {
-        ip_ = ip;
-        if (!RefillTag()) return;
-        ip = ip_;
-      }
  
  
+    // We could have put this refill fragment only at the beginning of the loop.
+    // However, duplicating it at the end of each branch gives the compiler more
+    // scope to optimize the <ip_limit_ - ip> expression based on the local
+    // context, which overall increases speed.
+    #define MAYBE_REFILL() \
+        if (ip_limit_ - ip < 5) { \
+          ip_ = ip; \
+          if (!RefillTag()) return; \
+          ip = ip_; \
+        }
+
+    MAYBE_REFILL();
+    for ( ;; ) {
        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
  
        if ((c & 0x3) == LITERAL) {
        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
  
        if ((c & 0x3) == LITERAL) {
-        uint32 literal_length = c >> 2;
-        if (PREDICT_FALSE(literal_length >= 60)) {
+        size_t literal_length = (c >> 2) + 1u;
+        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
+          DCHECK_LT(literal_length, 61);
+          ip += literal_length;
+          MAYBE_REFILL();
+          continue;
+        }
+        if (PREDICT_FALSE(literal_length >= 61)) {
            // Long literal.
            // Long literal.
-          const uint32 literal_length_length = literal_length - 59;
+          const size_t literal_length_length = literal_length - 60;
            literal_length =
            literal_length =
-              LittleEndian::Load32(ip) & wordmask[literal_length_length];
+              (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
            ip += literal_length_length;
          }
            ip += literal_length_length;
          }
-        ++literal_length;
  
  
-        uint32 avail = ip_limit_ - ip;
+        size_t avail = ip_limit_ - ip;
          while (avail < literal_length) {
          while (avail < literal_length) {
-          if (!writer->Append(ip, avail, false)) return;
+          if (!writer->Append(ip, avail)) return;
            literal_length -= avail;
            reader_->Skip(peeked_);
            size_t n;
            literal_length -= avail;
            reader_->Skip(peeked_);
            size_t n;
@@ -687,11 +745,11 @@ class SnappyDecompressor {
            if (avail == 0) return;  // Premature end of input
            ip_limit_ = ip + avail;
          }
            if (avail == 0) return;  // Premature end of input
            ip_limit_ = ip + avail;
          }
-        bool allow_fast_path = (avail >= 16);
-        if (!writer->Append(ip, literal_length, allow_fast_path)) {
+        if (!writer->Append(ip, literal_length)) {
            return;
          }
          ip += literal_length;
            return;
          }
          ip += literal_length;
+        MAYBE_REFILL();
        } else {
          const uint32 entry = char_table[c];
          const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
        } else {
          const uint32 entry = char_table[c];
          const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
@@ -705,8 +763,11 @@ class SnappyDecompressor {
          if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
            return;
          }
          if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
            return;
          }
+        MAYBE_REFILL();
        }
      }
        }
      }
+
+#undef MAYBE_REFILL
    }
  };
  
    }
  };
  
@@ -777,6 +838,15 @@ static bool InternalUncompress(Source* r,
    SnappyDecompressor decompressor(r);
    uint32 uncompressed_len = 0;
    if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
    SnappyDecompressor decompressor(r);
    uint32 uncompressed_len = 0;
    if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
+  return InternalUncompressAllTags(
+      &decompressor, writer, uncompressed_len, max_len);
+}
+
+template <typename Writer>
+static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
+                                      Writer* writer,
+                                      uint32 uncompressed_len,
+                                      uint32 max_len) {
    // Protect against possible DoS attack
    if (static_cast<uint64>(uncompressed_len) > max_len) {
      return false;
    // Protect against possible DoS attack
    if (static_cast<uint64>(uncompressed_len) > max_len) {
      return false;
@@ -785,8 +855,8 @@ static bool InternalUncompress(Source* r,
    writer->SetExpectedLength(uncompressed_len);
  
    // Process the entire input
    writer->SetExpectedLength(uncompressed_len);
  
    // Process the entire input
-  decompressor.DecompressAllTags(writer);
-  return (decompressor.eof() && writer->CheckLength());
+  decompressor->DecompressAllTags(writer);
+  return (decompressor->eof() && writer->CheckLength());
  }
  
  bool GetUncompressedLength(Source* source, uint32* result) {
  }
  
  bool GetUncompressedLength(Source* source, uint32* result) {
@@ -796,7 +866,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
  
  size_t Compress(Source* reader, Sink* writer) {
    size_t written = 0;
  
  size_t Compress(Source* reader, Sink* writer) {
    size_t written = 0;
-  int N = reader->Available();
+  size_t N = reader->Available();
    char ulength[Varint::kMax32];
    char* p = Varint::Encode32(ulength, N);
    writer->Append(ulength, p-ulength);
    char ulength[Varint::kMax32];
    char* p = Varint::Encode32(ulength, N);
    writer->Append(ulength, p-ulength);
@@ -811,10 +881,10 @@ size_t Compress(Source* reader, Sink* writer) {
      size_t fragment_size;
      const char* fragment = reader->Peek(&fragment_size);
      DCHECK_NE(fragment_size, 0) << ": premature end of input";
      size_t fragment_size;
      const char* fragment = reader->Peek(&fragment_size);
      DCHECK_NE(fragment_size, 0) << ": premature end of input";
-    const int num_to_read = min(N, kBlockSize);
+    const size_t num_to_read = min(N, kBlockSize);
      size_t bytes_read = fragment_size;
  
      size_t bytes_read = fragment_size;
  
-    int pending_advance = 0;
+    size_t pending_advance = 0;
      if (bytes_read >= num_to_read) {
        // Buffer returned by reader is large enough
        pending_advance = num_to_read;
      if (bytes_read >= num_to_read) {
        // Buffer returned by reader is large enough
        pending_advance = num_to_read;
@@ -902,34 +972,42 @@ class SnappyArrayWriter {
      return op_ == op_limit_;
    }
  
      return op_ == op_limit_;
    }
  
-  inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+  inline bool Append(const char* ip, size_t len) {
      char* op = op_;
      char* op = op_;
-    const int space_left = op_limit_ - op;
-    if (allow_fast_path && len <= 16 && space_left >= 16) {
-      // Fast path, used for the majority (about 90%) of dynamic invocations.
-      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
-      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
-    } else {
-      if (space_left < len) {
-        return false;
-      }
-      memcpy(op, ip, len);
+    const size_t space_left = op_limit_ - op;
+    if (space_left < len) {
+      return false;
      }
      }
+    memcpy(op, ip, len);
      op_ = op + len;
      return true;
    }
  
      op_ = op + len;
      return true;
    }
  
-  inline bool AppendFromSelf(uint32 offset, uint32 len) {
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
      char* op = op_;
      char* op = op_;
-    const int space_left = op_limit_ - op;
+    const size_t space_left = op_limit_ - op;
+    if (len <= 16 && available >= 16 && space_left >= 16) {
+      // Fast path, used for the majority (about 95%) of invocations.
+      UnalignedCopy64(ip, op);
+      UnalignedCopy64(ip + 8, op + 8);
+      op_ = op + len;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool AppendFromSelf(size_t offset, size_t len) {
+    char* op = op_;
+    const size_t space_left = op_limit_ - op;
  
      if (op - base_ <= offset - 1u) {  // -1u catches offset==0
        return false;
      }
      if (len <= 16 && offset >= 8 && space_left >= 16) {
        // Fast path, used for the majority (70-80%) of dynamic invocations.
  
      if (op - base_ <= offset - 1u) {  // -1u catches offset==0
        return false;
      }
      if (len <= 16 && offset >= 8 && space_left >= 16) {
        // Fast path, used for the majority (70-80%) of dynamic invocations.
-      UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
-      UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
+      UnalignedCopy64(op - offset, op);
+      UnalignedCopy64(op - offset + 8, op + 8);
      } else {
        if (space_left >= len + kMaxIncrementCopyOverflow) {
          IncrementalCopyFastPath(op - offset, op, len);
      } else {
        if (space_left >= len + kMaxIncrementCopyOverflow) {
          IncrementalCopyFastPath(op - offset, op, len);
@@ -985,11 +1063,14 @@ class SnappyDecompressionValidator {
    inline bool CheckLength() const {
      return expected_ == produced_;
    }
    inline bool CheckLength() const {
      return expected_ == produced_;
    }
-  inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+  inline bool Append(const char* ip, size_t len) {
      produced_ += len;
      return produced_ <= expected_;
    }
      produced_ += len;
      return produced_ <= expected_;
    }
-  inline bool AppendFromSelf(uint32 offset, uint32 len) {
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
+    return false;
+  }
+  inline bool AppendFromSelf(size_t offset, size_t len) {
      if (produced_ <= offset - 1u) return false;  // -1u catches offset==0
      produced_ += len;
      return produced_ <= expected_;
      if (produced_ <= offset - 1u) return false;  // -1u catches offset==0
      produced_ += len;
      return produced_ <= expected_;
diff --git a/thirdparty/snappy/snappy.h b/thirdparty/snappy/snappy.h

index 8d6ef2294f55aadc92a60de8b2d9b11eed542498..8c2075fefa5d5835bbf4cb9510da85505a606410 100644 (file)
--- a/thirdparty/snappy/snappy.h
+++ b/thirdparty/snappy/snappy.h
@@ -144,10 +144,10 @@ namespace snappy {
    // decompression code should not rely on this guarantee since older
    // compression code may not obey it.
    static const int kBlockLog = 15;
    // decompression code should not rely on this guarantee since older
    // compression code may not obey it.
    static const int kBlockLog = 15;
-  static const int kBlockSize = 1 << kBlockLog;
+  static const size_t kBlockSize = 1 << kBlockLog;
  
    static const int kMaxHashTableBits = 14;
  
    static const int kMaxHashTableBits = 14;
-  static const int kMaxHashTableSize = 1 << kMaxHashTableBits;
+  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
  
  }  // end namespace snappy
  
  
  }  // end namespace snappy
  
diff --git a/thirdparty/snappy/snappy_unittest.cc b/thirdparty/snappy/snappy_unittest.cc

index 6fff333ad1398374e85d9df357eb479edba2f62f..f3b9c83c04935cdd96df1bef7da9efc1ee24a7dc 100644 (file)
--- a/thirdparty/snappy/snappy_unittest.cc
+++ b/thirdparty/snappy/snappy_unittest.cc
@@ -300,7 +300,7 @@ static bool Uncompress(const string& compressed, CompressorType comp,
            reinterpret_cast<const Bytef*>(compressed.data()),
            compressed.size());
        CHECK_EQ(Z_OK, ret);
            reinterpret_cast<const Bytef*>(compressed.data()),
            compressed.size());
        CHECK_EQ(Z_OK, ret);
-      CHECK_EQ(destlen, size);
+      CHECK_EQ(static_cast<uLongf>(size), destlen);
        break;
      }
  #endif  // ZLIB_VERSION
        break;
      }
  #endif  // ZLIB_VERSION
@@ -316,7 +316,7 @@ static bool Uncompress(const string& compressed, CompressorType comp,
            &destlen,
            NULL);
        CHECK_EQ(LZO_E_OK, ret);
            &destlen,
            NULL);
        CHECK_EQ(LZO_E_OK, ret);
-      CHECK_EQ(destlen, size);
+      CHECK_EQ(static_cast<lzo_uint>(size), destlen);
        break;
      }
  #endif  // LZO_VERSION
        break;
      }
  #endif  // LZO_VERSION
@@ -591,22 +591,24 @@ TYPED_TEST(CorruptedTest, VerifyCorrupted) {
      // Another security check; check a crazy big length can't DoS us with an
      // over-allocation.
      // Currently this is done only for 32-bit builds.  On 64-bit builds,
      // Another security check; check a crazy big length can't DoS us with an
      // over-allocation.
      // Currently this is done only for 32-bit builds.  On 64-bit builds,
-    // where 3GBytes might be an acceptable allocation size, Uncompress()
+    // where 3 GB might be an acceptable allocation size, Uncompress()
      // attempts to decompress, and sometimes causes the test to run out of
      // memory.
      dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
      // attempts to decompress, and sometimes causes the test to run out of
      // memory.
      dest[0] = dest[1] = dest[2] = dest[3] = 0xff;
-    // This decodes to a really large size, i.e., 3221225471 bytes
+    // This decodes to a really large size, i.e., about 3 GB.
      dest[4] = 'k';
      CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
      CHECK(!Uncompress(TypeParam(dest), &uncmp));
      dest[4] = 'k';
      CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
      CHECK(!Uncompress(TypeParam(dest), &uncmp));
-    dest[0] = dest[1] = dest[2] = 0xff;
-    dest[3] = 0x7f;
-    CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
-    CHECK(!Uncompress(TypeParam(dest), &uncmp));
    } else {
      LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
    }
  
    } else {
      LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
    }
  
+  // This decodes to about 2 MB; much smaller, but should still fail.
+  dest[0] = dest[1] = dest[2] = 0xff;
+  dest[3] = 0x00;
+  CHECK(!IsValidCompressedBuffer(TypeParam(dest)));
+  CHECK(!Uncompress(TypeParam(dest), &uncmp));
+
    // try reading stuff in from a bad file.
    for (int i = 1; i <= 3; ++i) {
      string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str());
    // try reading stuff in from a bad file.
    for (int i = 1; i <= 3; ++i) {
      string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str());
author	José Fonseca <jose.r.fonseca@gmail.com>
	Wed, 14 Mar 2012 23:51:45 +0000 (23:51 +0000)
committer	José Fonseca <jose.r.fonseca@gmail.com>
	Thu, 15 Mar 2012 00:10:15 +0000 (00:10 +0000)
thirdparty/snappy/ChangeLog		patch \| blob \| history
thirdparty/snappy/NEWS		patch \| blob \| history
thirdparty/snappy/README		patch \| blob \| history
thirdparty/snappy/format_description.txt		patch \| blob \| history
thirdparty/snappy/framing_format.txt	[new file with mode: 0644]	patch \| blob
thirdparty/snappy/snappy-sinksource.cc		patch \| blob \| history
thirdparty/snappy/snappy-sinksource.h		patch \| blob \| history
thirdparty/snappy/snappy-stubs-internal.h		patch \| blob \| history
thirdparty/snappy/snappy-stubs-public.h		patch \| blob \| history
thirdparty/snappy/snappy-test.cc		patch \| blob \| history
thirdparty/snappy/snappy-test.h		patch \| blob \| history
thirdparty/snappy/snappy.cc		patch \| blob \| history
thirdparty/snappy/snappy.h		patch \| blob \| history
thirdparty/snappy/snappy_unittest.cc		patch \| blob \| history