From a87ba12da5df5ad4a3c6f1ef1b42a8a9d2e8eacc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 6 Sep 2011 10:14:57 +0100 Subject: [PATCH] More writePNM optimizations. --- image_pnm.cpp | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/image_pnm.cpp b/image_pnm.cpp index 82c2dde..5397a1a 100644 --- a/image_pnm.cpp +++ b/image_pnm.cpp @@ -27,6 +27,7 @@ #include #include +#include #include "image.hpp" @@ -55,29 +56,55 @@ Image::writePNM(std::ostream &os, const char *comment) const { os.write((const char *)row, width*channels); } } else { - unsigned char *pixels = new unsigned char[width*3]; + unsigned char *tmp = new unsigned char[width*3]; if (channels == 4) { for (row = start(); row != end(); row += stride()) { - for (unsigned x = 0; x < width; ++x) { - pixels[x*3 + 0] = row[x*4 + 0]; - pixels[x*3 + 1] = row[x*4 + 1]; - pixels[x*3 + 2] = row[x*4 + 2]; + const uint32_t *src = (const uint32_t *)row; + uint32_t *dst = (uint32_t *)tmp; + unsigned x; + for (x = 0; x + 4 <= width; x += 4) { + /* + * It's much faster to access dwords than bytes. + * + * FIXME: Big-endian version. + */ + + uint32_t rgba0 = *src++ & 0xffffff; + uint32_t rgba1 = *src++ & 0xffffff; + uint32_t rgba2 = *src++ & 0xffffff; + uint32_t rgba3 = *src++ & 0xffffff; + uint32_t rgb0 = rgba0 + | (rgba1 << 24); + uint32_t rgb1 = (rgba1 >> 8) + | (rgba2 << 16); + uint32_t rgb2 = (rgba2 >> 16) + | (rgba3 << 8); + *dst++ = rgb0; + *dst++ = rgb1; + *dst++ = rgb2; + } + for (; x < width; ++x) { + tmp[x*3 + 0] = row[x*4 + 0]; + tmp[x*3 + 1] = row[x*4 + 1]; + tmp[x*3 + 2] = row[x*4 + 2]; } - os.write((const char *)pixels, width*3); + os.write((const char *)tmp, width*3); } } else if (channels == 2) { for (row = start(); row != end(); row += stride()) { + const unsigned char *src = row; + unsigned char *dst = tmp; for (unsigned x = 0; x < width; ++x) { - pixels[x*3 + 0] = row[x*2 + 0]; - pixels[x*3 + 1] = row[x*2 + 1]; - pixels[x*3 + 2] = 0; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = 0; } - os.write((const char *)pixels, width*3); + os.write((const char *)tmp, width*3); } } else { assert(0); } - delete [] pixels; + delete [] tmp; } } -- 2.43.0