]> git.cworth.org Git - apitrace/commitdiff
More writePNM optimizations.
authorJosé Fonseca <jose.r.fonseca@gmail.com>
Tue, 6 Sep 2011 09:14:57 +0000 (10:14 +0100)
committerJosé Fonseca <jose.r.fonseca@gmail.com>
Tue, 6 Sep 2011 09:14:57 +0000 (10:14 +0100)
image_pnm.cpp

index 82c2dde81c6603ec8fceb1d9e8c21babf20f5fef..5397a1a604d78d7c4afc45ad38ef500f2688a1ce 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <assert.h>
 #include <string.h>
+#include <stdint.h>
 
 #include "image.hpp"
 
@@ -55,29 +56,55 @@ Image::writePNM(std::ostream &os, const char *comment) const {
             os.write((const char *)row, width*channels);
         }
     } else {
-        unsigned char *pixels = new unsigned char[width*3];
+        unsigned char *tmp = new unsigned char[width*3];
         if (channels == 4) {
             for (row = start(); row != end(); row += stride()) {
-                for (unsigned x = 0; x < width; ++x) {
-                    pixels[x*3 + 0] = row[x*4 + 0];
-                    pixels[x*3 + 1] = row[x*4 + 1];
-                    pixels[x*3 + 2] = row[x*4 + 2];
+                const uint32_t *src = (const uint32_t *)row;
+                uint32_t *dst = (uint32_t *)tmp;
+                unsigned x;
+                for (x = 0; x + 4 <= width; x += 4) {
+                    /*
+                     * It's much faster to access dwords than bytes.
+                     *
+                     * FIXME: Big-endian version.
+                     */
+
+                    uint32_t rgba0 = *src++ & 0xffffff;
+                    uint32_t rgba1 = *src++ & 0xffffff;
+                    uint32_t rgba2 = *src++ & 0xffffff;
+                    uint32_t rgba3 = *src++ & 0xffffff;
+                    uint32_t rgb0 = rgba0
+                                  | (rgba1 << 24);
+                    uint32_t rgb1 = (rgba1 >> 8)
+                                  | (rgba2 << 16);
+                    uint32_t rgb2 = (rgba2 >> 16)
+                                  | (rgba3 << 8);
+                    *dst++ = rgb0;
+                    *dst++ = rgb1;
+                    *dst++ = rgb2;
+                }
+                for (; x < width; ++x) {
+                    tmp[x*3 + 0] = row[x*4 + 0];
+                    tmp[x*3 + 1] = row[x*4 + 1];
+                    tmp[x*3 + 2] = row[x*4 + 2];
                 }
-                os.write((const char *)pixels, width*3);
+                os.write((const char *)tmp, width*3);
             }
         } else if (channels == 2) {
             for (row = start(); row != end(); row += stride()) {
+                const unsigned char *src = row;
+                unsigned char *dst = tmp;
                 for (unsigned x = 0; x < width; ++x) {
-                    pixels[x*3 + 0] = row[x*2 + 0];
-                    pixels[x*3 + 1] = row[x*2 + 1];
-                    pixels[x*3 + 2] = 0;
+                    *dst++ = *src++;
+                    *dst++ = *src++;
+                    *dst++ = 0;
                 }
-                os.write((const char *)pixels, width*3);
+                os.write((const char *)tmp, width*3);
             }
         } else {
             assert(0);
         }
-        delete [] pixels;
+        delete [] tmp;
     }
 }