]> git.cworth.org Git - vogl/blob - src/voglcore/vogl_threaded_resampler.cpp
Initial vogl checkin
[vogl] / src / voglcore / vogl_threaded_resampler.cpp
1 /**************************************************************************
2  *
3  * Copyright 2013-2014 RAD Game Tools and Valve Software
4  * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  *
25  **************************************************************************/
26
27 // File: vogl_threaded_resampler.cpp
28 #include "vogl_core.h"
29 #include "vogl_threaded_resampler.h"
30 #include "vogl_resample_filters.h"
31 #include "vogl_threading.h"
32
33 namespace vogl
34 {
35     threaded_resampler::threaded_resampler(task_pool &tp)
36         : m_pTask_pool(&tp),
37           m_pParams(NULL),
38           m_pX_contribs(NULL),
39           m_pY_contribs(NULL),
40           m_bytes_per_pixel(0)
41     {
42     }
43
44     threaded_resampler::~threaded_resampler()
45     {
46         free_contrib_lists();
47     }
48
49     void threaded_resampler::free_contrib_lists()
50     {
51         if (m_pX_contribs)
52         {
53             vogl_free(m_pX_contribs->p);
54             m_pX_contribs->p = NULL;
55
56             vogl_free(m_pX_contribs);
57             m_pX_contribs = NULL;
58         }
59
60         if (m_pY_contribs)
61         {
62             vogl_free(m_pY_contribs->p);
63             m_pY_contribs->p = NULL;
64
65             vogl_free(m_pY_contribs);
66             m_pY_contribs = NULL;
67         }
68     }
69
70     void threaded_resampler::resample_x_task(uint64_t data, void *pData_ptr)
71     {
72         VOGL_NOTE_UNUSED(pData_ptr);
73         const uint thread_index = (uint)data;
74
75         for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++)
76         {
77             if (m_pTask_pool->get_num_threads())
78             {
79                 if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
80                     continue;
81             }
82
83             const Resampler::Contrib_List *pContribs = m_pX_contribs;
84             const Resampler::Contrib_List *pContribs_end = m_pX_contribs + m_pParams->m_dst_width;
85
86             switch (m_pParams->m_fmt)
87             {
88                 case cPF_Y_F32:
89                 {
90                     const float *pSrc = reinterpret_cast<const float *>(static_cast<const uint8 *>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
91                     vec4F *pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
92
93                     do
94                     {
95                         const Resampler::Contrib *p = pContribs->p;
96                         const Resampler::Contrib *p_end = pContribs->p + pContribs->n;
97
98                         vec4F s(0.0f);
99
100                         while (p != p_end)
101                         {
102                             const uint src_pixel = p->pixel;
103                             const float src_weight = p->weight;
104
105                             s[0] += pSrc[src_pixel] * src_weight;
106
107                             p++;
108                         }
109
110                         *pDst++ = s;
111                         pContribs++;
112                     } while (pContribs != pContribs_end);
113
114                     break;
115                 }
116                 case cPF_RGBX_F32:
117                 {
118                     const vec4F *pSrc = reinterpret_cast<const vec4F *>(static_cast<const uint8 *>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
119                     vec4F *pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
120
121                     do
122                     {
123                         const Resampler::Contrib *p = pContribs->p;
124                         const Resampler::Contrib *p_end = pContribs->p + pContribs->n;
125
126                         vec4F s(0.0f);
127
128                         while (p != p_end)
129                         {
130                             const float src_weight = p->weight;
131
132                             const vec4F &src_pixel = pSrc[p->pixel];
133
134                             s[0] += src_pixel[0] * src_weight;
135                             s[1] += src_pixel[1] * src_weight;
136                             s[2] += src_pixel[2] * src_weight;
137
138                             p++;
139                         }
140
141                         *pDst++ = s;
142                         pContribs++;
143                     } while (pContribs != pContribs_end);
144
145                     break;
146                 }
147                 case cPF_RGBA_F32:
148                 {
149                     const vec4F *pSrc = reinterpret_cast<const vec4F *>(static_cast<const uint8 *>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
150                     vec4F *pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
151
152                     do
153                     {
154                         Resampler::Contrib *p = pContribs->p;
155                         Resampler::Contrib *p_end = pContribs->p + pContribs->n;
156
157                         vec4F s(0.0f);
158
159                         while (p != p_end)
160                         {
161                             const float src_weight = p->weight;
162
163                             const vec4F &src_pixel = pSrc[p->pixel];
164
165                             s[0] += src_pixel[0] * src_weight;
166                             s[1] += src_pixel[1] * src_weight;
167                             s[2] += src_pixel[2] * src_weight;
168                             s[3] += src_pixel[3] * src_weight;
169
170                             p++;
171                         }
172
173                         *pDst++ = s;
174                         pContribs++;
175                     } while (pContribs != pContribs_end);
176
177                     break;
178                 }
179                 default:
180                     break;
181             }
182         }
183     }
184
185     void threaded_resampler::resample_y_task(uint64_t data, void *pData_ptr)
186     {
187         VOGL_NOTE_UNUSED(pData_ptr);
188
189         const uint thread_index = (uint)data;
190
191         vogl::vector<vec4F> tmp(m_pParams->m_dst_width);
192
193         for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++)
194         {
195             if (m_pTask_pool->get_num_threads())
196             {
197                 if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
198                     continue;
199             }
200
201             const Resampler::Contrib_List &contribs = m_pY_contribs[dst_y];
202
203             const vec4F *pSrc;
204
205             if (contribs.n == 1)
206             {
207                 pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel;
208             }
209             else
210             {
211                 for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++)
212                 {
213                     const vec4F *p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel;
214                     const float weight = contribs.p[src_y_iter].weight;
215
216                     if (!src_y_iter)
217                     {
218                         for (uint i = 0; i < m_pParams->m_dst_width; i++)
219                             tmp[i] = p[i] * weight;
220                     }
221                     else
222                     {
223                         for (uint i = 0; i < m_pParams->m_dst_width; i++)
224                             tmp[i] += p[i] * weight;
225                     }
226                 }
227
228                 pSrc = tmp.get_ptr();
229             }
230
231             const vec4F *pSrc_end = pSrc + m_pParams->m_dst_width;
232
233             const float l = m_pParams->m_sample_low;
234             const float h = m_pParams->m_sample_high;
235
236             switch (m_pParams->m_fmt)
237             {
238                 case cPF_Y_F32:
239                 {
240                     float *pDst = reinterpret_cast<float *>(static_cast<uint8 *>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
241
242                     do
243                     {
244                         *pDst++ = math::clamp((*pSrc)[0], l, h);
245
246                         pSrc++;
247
248                     } while (pSrc != pSrc_end);
249
250                     break;
251                 }
252                 case cPF_RGBX_F32:
253                 {
254                     vec4F *pDst = reinterpret_cast<vec4F *>(static_cast<uint8 *>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
255
256                     do
257                     {
258                         (*pDst)[0] = math::clamp((*pSrc)[0], l, h);
259                         (*pDst)[1] = math::clamp((*pSrc)[1], l, h);
260                         (*pDst)[2] = math::clamp((*pSrc)[2], l, h);
261                         (*pDst)[3] = h;
262
263                         pSrc++;
264                         pDst++;
265
266                     } while (pSrc != pSrc_end);
267
268                     break;
269                 }
270                 case cPF_RGBA_F32:
271                 {
272                     vec4F *pDst = reinterpret_cast<vec4F *>(static_cast<uint8 *>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
273
274                     do
275                     {
276                         (*pDst)[0] = math::clamp((*pSrc)[0], l, h);
277                         (*pDst)[1] = math::clamp((*pSrc)[1], l, h);
278                         (*pDst)[2] = math::clamp((*pSrc)[2], l, h);
279                         (*pDst)[3] = math::clamp((*pSrc)[3], l, h);
280
281                         pSrc++;
282                         pDst++;
283
284                     } while (pSrc != pSrc_end);
285
286                     break;
287                 }
288                 default:
289                     break;
290             }
291         }
292     }
293
294     bool threaded_resampler::resample(const params &p)
295     {
296         free_contrib_lists();
297
298         m_pParams = &p;
299
300         VOGL_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height);
301         VOGL_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height);
302
303         switch (p.m_fmt)
304         {
305             case cPF_Y_F32:
306                 m_bytes_per_pixel = 4;
307                 break;
308             case cPF_RGBX_F32:
309             case cPF_RGBA_F32:
310                 m_bytes_per_pixel = 16;
311                 break;
312             default:
313                 VOGL_ASSERT(false);
314                 return false;
315         }
316
317         int filter_index = find_resample_filter(p.m_Pfilter_name);
318         if (filter_index < 0)
319             return false;
320
321         const resample_filter &filter = g_resample_filters[filter_index];
322
323         m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_x_scale, p.m_x_ofs);
324         if (!m_pX_contribs)
325             return false;
326
327         m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, p.m_y_ofs);
328         if (!m_pY_contribs)
329             return false;
330
331         if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height))
332             return false;
333
334         for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
335             m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, NULL);
336         m_pTask_pool->join();
337
338         for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
339             m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, NULL);
340         m_pTask_pool->join();
341
342         m_tmp_img.clear();
343         free_contrib_lists();
344
345         return true;
346     }
347
348 } // namespace vogl