- for (unsigned x = 0; x < width; ++x) {
- pixels[x*3 + 0] = row[x*4 + 0];
- pixels[x*3 + 1] = row[x*4 + 1];
- pixels[x*3 + 2] = row[x*4 + 2];
+ const uint32_t *src = (const uint32_t *)row;
+ uint32_t *dst = (uint32_t *)tmp;
+ unsigned x;
+ for (x = 0; x + 4 <= width; x += 4) {
+ /*
+ * It's much faster to access dwords than bytes.
+ *
+ * FIXME: Big-endian version.
+ */
+
+ uint32_t rgba0 = *src++ & 0xffffff;
+ uint32_t rgba1 = *src++ & 0xffffff;
+ uint32_t rgba2 = *src++ & 0xffffff;
+ uint32_t rgba3 = *src++ & 0xffffff;
+ uint32_t rgb0 = rgba0
+ | (rgba1 << 24);
+ uint32_t rgb1 = (rgba1 >> 8)
+ | (rgba2 << 16);
+ uint32_t rgb2 = (rgba2 >> 16)
+ | (rgba3 << 8);
+ *dst++ = rgb0;
+ *dst++ = rgb1;
+ *dst++ = rgb2;
+ }
+ for (; x < width; ++x) {
+ tmp[x*3 + 0] = row[x*4 + 0];
+ tmp[x*3 + 1] = row[x*4 + 1];
+ tmp[x*3 + 2] = row[x*4 + 2];