- unsigned char *tmp = new unsigned char[width*3];
- if (channels == 4) {
- for (row = start(); row != end(); row += stride()) {
- const uint32_t *src = (const uint32_t *)row;
- uint32_t *dst = (uint32_t *)tmp;
- unsigned x;
- for (x = 0; x + 4 <= width; x += 4) {
- /*
- * It's much faster to access dwords than bytes.
- *
- * FIXME: Big-endian version.
- */
-
- uint32_t rgba0 = *src++ & 0xffffff;
- uint32_t rgba1 = *src++ & 0xffffff;
- uint32_t rgba2 = *src++ & 0xffffff;
- uint32_t rgba3 = *src++ & 0xffffff;
- uint32_t rgb0 = rgba0
- | (rgba1 << 24);
- uint32_t rgb1 = (rgba1 >> 8)
- | (rgba2 << 16);
- uint32_t rgb2 = (rgba2 >> 16)
- | (rgba3 << 8);
- *dst++ = rgb0;
- *dst++ = rgb1;
- *dst++ = rgb2;
+ /*
+ * Need to add/remove channels, one pixel at a time.
+ */
+
+ unsigned char *tmp = new unsigned char[width*bytesPerPixel];
+
+ if (channelType == TYPE_UNORM8) {
+ /*
+ * Optimized path for 8bit unorms.
+ */
+
+ if (channels == 4) {
+ for (row = start(); row != end(); row += stride()) {
+ const uint32_t *src = (const uint32_t *)row;
+ uint32_t *dst = (uint32_t *)tmp;
+ unsigned x;
+ for (x = 0; x + 4 <= width; x += 4) {
+ /*
+ * It's much faster to access dwords than bytes.
+ *
+ * FIXME: Big-endian version.
+ */
+
+ uint32_t rgba0 = *src++ & 0xffffff;
+ uint32_t rgba1 = *src++ & 0xffffff;
+ uint32_t rgba2 = *src++ & 0xffffff;
+ uint32_t rgba3 = *src++ & 0xffffff;
+ uint32_t rgb0 = rgba0
+ | (rgba1 << 24);
+ uint32_t rgb1 = (rgba1 >> 8)
+ | (rgba2 << 16);
+ uint32_t rgb2 = (rgba2 >> 16)
+ | (rgba3 << 8);
+ *dst++ = rgb0;
+ *dst++ = rgb1;
+ *dst++ = rgb2;
+ }
+ for (; x < width; ++x) {
+ tmp[x*3 + 0] = row[x*4 + 0];
+ tmp[x*3 + 1] = row[x*4 + 1];
+ tmp[x*3 + 2] = row[x*4 + 2];
+ }
+ os.write((const char *)tmp, width*3);