+++ /dev/null
-/*
- * Command line: opannotate --source
- *
- * Interpretation of command line:
- * Output annotated source file with samples
- * Output all files
- *
- * CPU: Core 2, speed 2133.49 MHz (estimated)
- * Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00
- * (Unhalted core cycles) count 100000
- */
-/*
- * Total samples for file : "/home/cworth/src/xorg/driver/xf86-video-intel/src/i965_render.c"
- *
- * 881083 31.4445
- */
-...
- :Bool
- :i965_prepare_composite(int op, PicturePtr pSrcPicture,
- : PicturePtr pMaskPicture, PicturePtr pDstPicture,
- : PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
- 304 0.0108 :{ /* i965_prepare_composite total: 830728 29.6474 */
- 1017 0.0363 : ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
- 184 0.0066 : I830Ptr pI830 = I830PTR(pScrn);
- : CARD32 src_offset, src_pitch;
- : CARD32 mask_offset = 0, mask_pitch = 0;
- : CARD32 dst_format, dst_offset, dst_pitch;
- : Bool rotation_program = FALSE;
- :
- 22 7.9e-04 : IntelEmitInvarientState(pScrn);
- 103 0.0037 : *pI830->last_3d = LAST_3D_RENDER;
- :
- 153 0.0055 : src_offset = intel_get_pixmap_offset(pSrc);
- 65 0.0023 : src_pitch = intel_get_pixmap_pitch(pSrc);
- 160 0.0057 : dst_offset = intel_get_pixmap_offset(pDst);
- 72 0.0026 : dst_pitch = intel_get_pixmap_pitch(pDst);
- 25 8.9e-04 : if (pMask) {
- 27 9.6e-04 : mask_offset = intel_get_pixmap_offset(pMask);
- 28 1.0e-03 : mask_pitch = intel_get_pixmap_pitch(pMask);
- : }
- 245 0.0087 : pI830->scale_units[0][0] = pSrc->drawable.width;
- 54 0.0019 : pI830->scale_units[0][1] = pSrc->drawable.height;
- :
- 64 0.0023 : pI830->transform[0] = pSrcPicture->transform;
- :
- : if (!pMask) {
- 6 2.1e-04 : pI830->transform[1] = NULL;
- 56 0.0020 : pI830->scale_units[1][0] = -1;
- 2 7.1e-05 : pI830->scale_units[1][1] = -1;
- 16 5.7e-04 : if (pI830->transform[0] &&
- : i965_check_rotation_transform(pI830->transform[0]))
- : rotation_program = TRUE;
- : } else {
- 44 0.0016 : pI830->transform[1] = pMaskPicture->transform;
- 125 0.0045 : pI830->scale_units[1][0] = pMask->drawable.width;
- 99 0.0035 : pI830->scale_units[1][1] = pMask->drawable.height;
- : }
- :
- : /* setup 3d pipeline state */
- :
- 30 0.0011 : binding_table_entries = 2; /* default no mask */
- :
- : /* Wait for sync before we start setting up our new state */
- :#if 0
- : i830WaitSync(pScrn);
- :#endif
- :
- : /* Set up our layout of state in framebuffer. First the general state: */
- : next_offset = 0;
- 31 0.0011 : vs_offset = ALIGN(next_offset, 64);
- : next_offset = vs_offset + sizeof(*vs_state);
- :
- 5 1.8e-04 : sf_offset = ALIGN(next_offset, 32);
- : next_offset = sf_offset + sizeof(*sf_state);
- :
- 14 5.0e-04 : wm_offset = ALIGN(next_offset, 32);
- : next_offset = wm_offset + sizeof(*wm_state);
- :
- 25 8.9e-04 : wm_scratch_offset = ALIGN(next_offset, 1024);
- : next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
- :
- 31 0.0011 : cc_offset = ALIGN(next_offset, 32);
- : next_offset = cc_offset + sizeof(*cc_state);
- :
- : /* keep current sf_kernel, which will send one setup urb entry to
- : * PS kernel
- : */
- 6 2.1e-04 : sf_kernel_offset = ALIGN(next_offset, 64);
- : if (pMask)
- 14 5.0e-04 : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
- : else if (rotation_program)
- : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation);
- : else
- : next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
- :
- 34 0.0012 : ps_kernel_offset = ALIGN(next_offset, 64);
- : if (pMask) {
- : if (pMaskPicture->componentAlpha &&
- : PICT_FORMAT_RGB(pMaskPicture->format)) {
- : if (i965_blend_op[op].src_alpha) {
- : next_offset = ps_kernel_offset +
- : sizeof(ps_kernel_static_maskca_srcalpha);
- : } else {
- : next_offset = ps_kernel_offset +
- : sizeof(ps_kernel_static_maskca);
- : }
- : } else
- 114 0.0041 : next_offset = ps_kernel_offset +
- : sizeof(ps_kernel_static_masknoca);
- : } else if (rotation_program) {
- : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation);
- : } else {
- 99 0.0035 : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
- : }
- :
- 198 0.0071 : sip_kernel_offset = ALIGN(next_offset, 64);
- : next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
- :
- : /* needed? */
- 97 0.0035 : cc_viewport_offset = ALIGN(next_offset, 32);
- : next_offset = cc_viewport_offset + sizeof(*cc_viewport);
- :
- : /* for texture sampler */
- 72 0.0026 : src_sampler_offset = ALIGN(next_offset, 32);
- 30 0.0011 : next_offset = src_sampler_offset + sizeof(*src_sampler_state);
- :
- 112 0.0040 : if (pMask) {
- 14 5.0e-04 : mask_sampler_offset = ALIGN(next_offset, 32);
- 31 0.0011 : next_offset = mask_sampler_offset + sizeof(*mask_sampler_state);
- : }
- : /* Align VB to native size of elements, for safety */
- 337 0.0120 : vb_offset = ALIGN(next_offset, 8);
- : next_offset = vb_offset + vb_size;
- :
- : /* And then the general state: */
- 184 0.0066 : dest_surf_offset = ALIGN(next_offset, 32);
- : next_offset = dest_surf_offset + sizeof(*dest_surf_state);
- :
- 136 0.0049 : src_surf_offset = ALIGN(next_offset, 32);
- 390 0.0139 : next_offset = src_surf_offset + sizeof(*src_surf_state);
- :
- 55 0.0020 : if (pMask) {
- 28 1.0e-03 : mask_surf_offset = ALIGN(next_offset, 32);
- 28 1.0e-03 : next_offset = mask_surf_offset + sizeof(*mask_surf_state);
- 9 3.2e-04 : binding_table_entries = 3;
- : }
- :
- 84 0.0030 : binding_table_offset = ALIGN(next_offset, 32);
- : next_offset = binding_table_offset + (binding_table_entries * 4);
- :
- 155 0.0055 : default_color_offset = ALIGN(next_offset, 32);
- 43 0.0015 : next_offset = default_color_offset + sizeof(*default_color_state);
- :
- 36 0.0013 : total_state_size = next_offset;
- : assert(total_state_size < pI830->exa_965_state->size);
- :
- 103 0.0037 : state_base_offset = pI830->exa_965_state->offset;
- 84 0.0030 : state_base_offset = ALIGN(state_base_offset, 64);
- 173 0.0062 : state_base = (char *)(pI830->FbBase + state_base_offset);
- :
- 12 4.3e-04 : vs_state = (void *)(state_base + vs_offset);
- 36 0.0013 : sf_state = (void *)(state_base + sf_offset);
- 43 0.0015 : wm_state = (void *)(state_base + wm_offset);
- 38 0.0014 : cc_state = (void *)(state_base + cc_offset);
- 29 0.0010 : sf_kernel = (void *)(state_base + sf_kernel_offset);
- 79 0.0028 : ps_kernel = (void *)(state_base + ps_kernel_offset);
- 31 0.0011 : sip_kernel = (void *)(state_base + sip_kernel_offset);
- :
- 63 0.0022 : cc_viewport = (void *)(state_base + cc_viewport_offset);
- :
- 25 8.9e-04 : dest_surf_state = (void *)(state_base + dest_surf_offset);
- 64 0.0023 : src_surf_state = (void *)(state_base + src_surf_offset);
- 37 0.0013 : if (pMask)
- 17 6.1e-04 : mask_surf_state = (void *)(state_base + mask_surf_offset);
- :
- 104 0.0037 : src_sampler_state = (void *)(state_base + src_sampler_offset);
- : if (pMask)
- 20 7.1e-04 : mask_sampler_state = (void *)(state_base + mask_sampler_offset);
- :
- 55 0.0020 : binding_table = (void *)(state_base + binding_table_offset);
- :
- 42 0.0015 : vb = (void *)(state_base + vb_offset);
- :
- 65 0.0023 : default_color_state = (void*)(state_base + default_color_offset);
- :
- : /* Set up a default static partitioning of the URB, which is supposed to
- : * allow anything we would want to do, at potentially lower performance.
- : */
- :#define URB_CS_ENTRY_SIZE 0
- :#define URB_CS_ENTRIES 0
- :
- :#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
- :#define URB_VS_ENTRIES 8 // we needs at least 8 entries
- :
- :#define URB_GS_ENTRY_SIZE 0
- :#define URB_GS_ENTRIES 0
- :
- :#define URB_CLIP_ENTRY_SIZE 0
- :#define URB_CLIP_ENTRIES 0
- :
- :#define URB_SF_ENTRY_SIZE 2
- :#define URB_SF_ENTRIES 1
- :
- 25 8.9e-04 : urb_vs_start = 0;
- 21 7.5e-04 : urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
- 40 0.0014 : urb_gs_start = urb_vs_start + urb_vs_size;
- 58 0.0021 : urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
- 58 0.0021 : urb_clip_start = urb_gs_start + urb_gs_size;
- 50 0.0018 : urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
- 46 0.0016 : urb_sf_start = urb_clip_start + urb_clip_size;
- 52 0.0019 : urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
- 42 0.0015 : urb_cs_start = urb_sf_start + urb_sf_size;
- 43 0.0015 : urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
- :
- : /* Because we only have a single static buffer for our state currently,
- : * we have to sync before updating it every time.
- : */
- :#if 0
- : i830WaitSync(pScrn);
- :#endif
- :
- 274 0.0098 : memset (cc_viewport, 0, sizeof (*cc_viewport));
- 124 0.0044 : cc_viewport->min_depth = -1.e35;
- 122 0.0044 : cc_viewport->max_depth = 1.e35;
- :
- : /* Color calculator state */
- 861 0.0307 : memset(cc_state, 0, sizeof(*cc_state));
- 18559 0.6623 : cc_state->cc0.stencil_enable = 0; /* disable stencil */
- 17836 0.6365 : cc_state->cc2.depth_test = 0; /* disable depth test */
- 12306 0.4392 : cc_state->cc2.logicop_enable = 0; /* disable logic op */
- : cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */
- 7308 0.2608 : cc_state->cc3.blend_enable = 1; /* enable color blend */
- 10 3.6e-04 : cc_state->cc3.alpha_test = 0; /* disable alpha test */
- 9645 0.3442 : cc_state->cc4.cc_viewport_state_offset = (state_base_offset +
- : cc_viewport_offset) >> 5;
- 7354 0.2625 : cc_state->cc5.dither_enable = 0; /* disable dither */
- 926 0.0330 : cc_state->cc5.logicop_func = 0xc; /* COPY */
- 2780 0.0992 : cc_state->cc5.statistics_enable = 1;
- 63 0.0022 : cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
- 32 0.0011 : i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
- : &src_blend, &dst_blend);
- : /* XXX: alpha blend factor should be same as color, but check
- : * for CA case in future
- : */
- 14089 0.5028 : cc_state->cc5.ia_src_blend_factor = src_blend;
- 301 0.0107 : cc_state->cc5.ia_dest_blend_factor = dst_blend;
- 13845 0.4941 : cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
- 31 0.0011 : cc_state->cc6.src_blend_factor = src_blend;
- 2361 0.0843 : cc_state->cc6.dest_blend_factor = dst_blend;
- : cc_state->cc6.clamp_post_alpha_blend = 1;
- 4466 0.1594 : cc_state->cc6.clamp_pre_alpha_blend = 1;
- 359 0.0128 : cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */
- :
- : /* Upload system kernel */
- 86 0.0031 : memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
- :
- : /* Set up the state buffer for the destination surface */
- 332 0.0118 : memset(dest_surf_state, 0, sizeof(*dest_surf_state));
- 15291 0.5457 : dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- 67 0.0024 : dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
- 39 0.0014 : i965_get_dest_format(pDstPicture, &dst_format);
- 14430 0.5150 : dest_surf_state->ss0.surface_format = dst_format;
- :
- : dest_surf_state->ss0.writedisable_alpha = 0;
- 361 0.0129 : dest_surf_state->ss0.writedisable_red = 0;
- : dest_surf_state->ss0.writedisable_green = 0;
- : dest_surf_state->ss0.writedisable_blue = 0;
- 14076 0.5024 : dest_surf_state->ss0.color_blend = 1;
- : dest_surf_state->ss0.vert_line_stride = 0;
- : dest_surf_state->ss0.vert_line_stride_ofs = 0;
- : dest_surf_state->ss0.mipmap_layout_mode = 0;
- : dest_surf_state->ss0.render_cache_read_mode = 0;
- :
- 36 0.0013 : dest_surf_state->ss1.base_addr = dst_offset;
- 420 0.0150 : dest_surf_state->ss2.height = pDst->drawable.height - 1;
- 14567 0.5199 : dest_surf_state->ss2.width = pDst->drawable.width - 1;
- : dest_surf_state->ss2.mip_count = 0;
- 398 0.0142 : dest_surf_state->ss2.render_target_rotation = 0;
- 11691 0.4172 : dest_surf_state->ss3.pitch = dst_pitch - 1;
- :
- : /* Set up the source surface state buffer */
- 66 0.0024 : memset(src_surf_state, 0, sizeof(*src_surf_state));
- 13897 0.4960 : src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- 14657 0.5231 : src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
- :
- : src_surf_state->ss0.writedisable_alpha = 0;
- 25 8.9e-04 : src_surf_state->ss0.writedisable_red = 0;
- : src_surf_state->ss0.writedisable_green = 0;
- : src_surf_state->ss0.writedisable_blue = 0;
- 14981 0.5346 : src_surf_state->ss0.color_blend = 1;
- : src_surf_state->ss0.vert_line_stride = 0;
- : src_surf_state->ss0.vert_line_stride_ofs = 0;
- : src_surf_state->ss0.mipmap_layout_mode = 0;
- 1 3.6e-05 : src_surf_state->ss0.render_cache_read_mode = 0;
- :
- 26 9.3e-04 : src_surf_state->ss1.base_addr = src_offset;
- 127 0.0045 : src_surf_state->ss2.width = pSrc->drawable.width - 1;
- 6454 0.2303 : src_surf_state->ss2.height = pSrc->drawable.height - 1;
- : src_surf_state->ss2.mip_count = 0;
- 15025 0.5362 : src_surf_state->ss2.render_target_rotation = 0;
- 243 0.0087 : src_surf_state->ss3.pitch = src_pitch - 1;
- :
- : /* setup mask surface */
- : if (pMask) {
- 48 0.0017 : memset(mask_surf_state, 0, sizeof(*mask_surf_state));
- 7037 0.2511 : mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- 7619 0.2719 : mask_surf_state->ss0.surface_format =
- : i965_get_card_format(pMaskPicture);
- :
- : mask_surf_state->ss0.writedisable_alpha = 0;
- 25 8.9e-04 : mask_surf_state->ss0.writedisable_red = 0;
- : mask_surf_state->ss0.writedisable_green = 0;
- : mask_surf_state->ss0.writedisable_blue = 0;
- 7789 0.2780 : mask_surf_state->ss0.color_blend = 1;
- : mask_surf_state->ss0.vert_line_stride = 0;
- : mask_surf_state->ss0.vert_line_stride_ofs = 0;
- : mask_surf_state->ss0.mipmap_layout_mode = 0;
- : mask_surf_state->ss0.render_cache_read_mode = 0;
- :
- 15 5.4e-04 : mask_surf_state->ss1.base_addr = mask_offset;
- 37 0.0013 : mask_surf_state->ss2.width = pMask->drawable.width - 1;
- 3281 0.1171 : mask_surf_state->ss2.height = pMask->drawable.height - 1;
- : mask_surf_state->ss2.mip_count = 0;
- 7624 0.2721 : mask_surf_state->ss2.render_target_rotation = 0;
- 71 0.0025 : mask_surf_state->ss3.pitch = mask_pitch - 1;
- : }
- :
- : /* Set up a binding table for our surfaces. Only the PS will use it */
- 94 0.0034 : binding_table[0] = state_base_offset + dest_surf_offset;
- 29 0.0010 : binding_table[1] = state_base_offset + src_surf_offset;
- : if (pMask)
- 2 7.1e-05 : binding_table[2] = state_base_offset + mask_surf_offset;
- :
- : /* PS kernel use this sampler */
- 111 0.0040 : memset(src_sampler_state, 0, sizeof(*src_sampler_state));
- 24 8.6e-04 : src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
- 12445 0.4441 : switch(pSrcPicture->filter) {
- : case PictFilterNearest:
- 30 0.0011 : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
- 14939 0.5332 : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
- : break;
- : case PictFilterBilinear:
- 5 1.8e-04 : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- 5 1.8e-04 : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
- : break;
- : default:
- : I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
- : }
- :
- 15145 0.5405 : memset(default_color_state, 0, sizeof(*default_color_state));
- : default_color_state->color[0] = 0.0; /* R */
- 41 0.0015 : default_color_state->color[1] = 0.0; /* G */
- : default_color_state->color[2] = 0.0; /* B */
- : default_color_state->color[3] = 0.0; /* A */
- :
- 69 0.0025 : src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
- :
- 6323 0.2257 : if (!pSrcPicture->repeat) {
- 88 0.0031 : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- 5650 0.2016 : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- 4660 0.1663 : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
- 532 0.0190 : src_sampler_state->ss2.default_color_pointer =
- : (state_base_offset + default_color_offset) >> 5;
- : } else {
- 33 0.0012 : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- 343 0.0122 : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- 9699 0.3461 : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- : }
- 8398 0.2997 : src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
- :
- 3 1.1e-04 : if (pMask) {
- 104 0.0037 : memset(mask_sampler_state, 0, sizeof(*mask_sampler_state));
- 5 1.8e-04 : mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
- 8123 0.2899 : switch(pMaskPicture->filter) {
- : case PictFilterNearest:
- 22 7.9e-04 : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
- 7801 0.2784 : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
- : break;
- : case PictFilterBilinear:
- : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
- : break;
- : default:
- : I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter);
- : }
- :
- 7750 0.2766 : if (!pMaskPicture->repeat) {
- 85 0.0030 : mask_sampler_state->ss1.r_wrap_mode =
- : BRW_TEXCOORDMODE_CLAMP_BORDER;
- 7668 0.2737 : mask_sampler_state->ss1.s_wrap_mode =
- : BRW_TEXCOORDMODE_CLAMP_BORDER;
- 6142 0.2192 : mask_sampler_state->ss1.t_wrap_mode =
- : BRW_TEXCOORDMODE_CLAMP_BORDER;
- 31 0.0011 : mask_sampler_state->ss2.default_color_pointer =
- : (state_base_offset + default_color_offset)>>5;
- : } else {
- : mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- 1 3.6e-05 : mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- 4 1.4e-04 : mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
- : }
- : mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
- : }
- :
- : /* Set up the vertex shader to be disabled (passthrough) */
- 619 0.0221 : memset(vs_state, 0, sizeof(*vs_state));
- 15697 0.5602 : vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
- 6724 0.2400 : vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
- 8547 0.3050 : vs_state->vs6.vs_enable = 0;
- : vs_state->vs6.vert_cache_disable = 1;
- :
- : /* Set up the SF kernel to do coord interp: for each attribute,
- : * calculate dA/dx and dA/dy. Hand these interpolation coefficients
- : * back to SF which then hands pixels off to WM.
- : */
- : if (pMask)
- 303 0.0108 : memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static));
- 13 4.6e-04 : else if (rotation_program)
- : memcpy(sf_kernel, sf_kernel_static_rotation,
- : sizeof (sf_kernel_static_rotation));
- : else
- 60 0.0021 : memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
- :
- 558 0.0199 : memset(sf_state, 0, sizeof(*sf_state));
- 27418 0.9785 : sf_state->thread0.kernel_start_pointer =
- : (state_base_offset + sf_kernel_offset) >> 6;
- 14701 0.5247 : sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
- 16615 0.5930 : sf_state->sf1.single_program_flow = 1;
- 6135 0.2189 : sf_state->sf1.binding_table_entry_count = 0;
- : sf_state->sf1.thread_priority = 0;
- 1449 0.0517 : sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
- : sf_state->sf1.illegal_op_exception_enable = 1;
- 12439 0.4439 : sf_state->sf1.mask_stack_exception_enable = 1;
- 14092 0.5029 : sf_state->sf1.sw_exception_enable = 1;
- 31133 1.1111 : sf_state->thread2.per_thread_scratch_space = 0;
- : /* scratch space is not used in our kernel */
- 40 0.0014 : sf_state->thread2.scratch_space_base_pointer = 0;
- : sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
- 13800 0.4925 : sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
- 988 0.0353 : sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
- : /* don't smash vertex header, read start from dw8 */
- 2292 0.0818 : sf_state->thread3.urb_entry_read_offset = 1;
- 2819 0.1006 : sf_state->thread3.dispatch_grf_start_reg = 3;
- 20 7.1e-04 : sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
- 1265 0.0451 : sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
- 16395 0.5851 : sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
- 324 0.0116 : sf_state->thread4.stats_enable = 1;
- 978 0.0349 : sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
- 13430 0.4793 : sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
- 1120 0.0400 : sf_state->sf6.scissor = 0;
- 38 0.0014 : sf_state->sf7.trifan_pv = 2;
- 140 0.0050 : sf_state->sf6.dest_org_vbias = 0x8;
- 11266 0.4021 : sf_state->sf6.dest_org_hbias = 0x8;
- :
- : /* Set up the PS kernel (dispatched by WM) */
- 22 7.9e-04 : if (pMask) {
- 26 9.3e-04 : if (pMaskPicture->componentAlpha &&
- : PICT_FORMAT_RGB(pMaskPicture->format)) {
- 38 0.0014 : if (i965_blend_op[op].src_alpha)
- 148 0.0053 : memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha,
- : sizeof (ps_kernel_static_maskca_srcalpha));
- : else
- 48 0.0017 : memcpy(ps_kernel, ps_kernel_static_maskca,
- : sizeof (ps_kernel_static_maskca));
- : } else
- 23 8.2e-04 : memcpy(ps_kernel, ps_kernel_static_masknoca,
- : sizeof (ps_kernel_static_masknoca));
- 74 0.0026 : } else if (rotation_program) {
- 63 0.0022 : memcpy(ps_kernel, ps_kernel_static_rotation,
- : sizeof (ps_kernel_static_rotation));
- : } else {
- 10 3.6e-04 : memcpy(ps_kernel, ps_kernel_static_nomask,
- : sizeof (ps_kernel_static_nomask));
- : }
- :
- 549 0.0196 : memset(wm_state, 0, sizeof (*wm_state));
- 29001 1.0350 : wm_state->thread0.kernel_start_pointer =
- : (state_base_offset + ps_kernel_offset) >> 6;
- 12982 0.4633 : wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
- : wm_state->thread1.single_program_flow = 1;
- : if (!pMask)
- 3871 0.1382 : wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
- : else
- 4843 0.1728 : wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
- :
- 8578 0.3061 : wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
- : wm_scratch_offset)>>10;
- 13261 0.4733 : wm_state->thread2.per_thread_scratch_space = 0;
- : wm_state->thread3.const_urb_entry_read_length = 0;
- 5646 0.2015 : wm_state->thread3.const_urb_entry_read_offset = 0;
- : /* Each pair of attributes (src/mask coords) is one URB entry */
- : if (pMask)
- 2958 0.1056 : wm_state->thread3.urb_entry_read_length = 2;
- : else
- 2386 0.0852 : wm_state->thread3.urb_entry_read_length = 1;
- 30 0.0011 : wm_state->thread3.urb_entry_read_offset = 0;
- : /* wm kernel use urb from 3, see wm_program in compiler module */
- 13080 0.4668 : wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
- :
- 12997 0.4638 : wm_state->wm4.stats_enable = 1; /* statistic */
- 79 0.0028 : wm_state->wm4.sampler_state_pointer = (state_base_offset +
- : src_sampler_offset) >> 5;
- 10829 0.3865 : wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
- 40 0.0014 : wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
- 2 7.1e-05 : wm_state->wm5.thread_dispatch_enable = 1;
- : /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
- : * start point
- : */
- 93 0.0033 : wm_state->wm5.enable_16_pix = 1;
- : wm_state->wm5.enable_8_pix = 0;
- 10969 0.3915 : wm_state->wm5.early_depth_test = 1;
- :
- : /* Begin the long sequence of commands needed to set up the 3D
- : * rendering pipe
- : */
- : {
- 111 0.0040 : BEGIN_LP_RING(2);
- 21 7.5e-04 : OUT_RING(MI_FLUSH |
- : MI_STATE_INSTRUCTION_CACHE_FLUSH |
- : BRW_MI_GLOBAL_SNAPSHOT_RESET);
- 39 0.0014 : OUT_RING(MI_NOOP);
- 239 0.0085 : ADVANCE_LP_RING();
- : }
- : {
- 5017 0.1790 : BEGIN_LP_RING(12);
- :
- : /* Match Mesa driver setup */
- 57 0.0020 : OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
- :
- 130 0.0046 : OUT_RING(BRW_CS_URB_STATE | 0);
- 136 0.0049 : OUT_RING((0 << 4) | /* URB Entry Allocation Size */
- : (0 << 0)); /* Number of URB Entries */
- :
- : /* Zero out the two base address registers so all offsets are
- : * absolute.
- : */
- 71 0.0025 : OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
- 79 0.0028 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
- 68 0.0024 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
- 167 0.0060 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
- : /* general state max addr, disabled */
- 62 0.0022 : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY);
- : /* media object state max addr, disabled */
- 60 0.0021 : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY);
- :
- : /* Set system instruction pointer */
- 159 0.0057 : OUT_RING(BRW_STATE_SIP | 0);
- 223 0.0080 : OUT_RING(state_base_offset + sip_kernel_offset);
- 137 0.0049 : OUT_RING(MI_NOOP);
- 1438 0.0513 : ADVANCE_LP_RING();
- : }
- : {
- 16676 0.5951 : BEGIN_LP_RING(26);
- : /* Pipe control */
- 53 0.0019 : OUT_RING(BRW_PIPE_CONTROL |
- : BRW_PIPE_CONTROL_NOWRITE |
- : BRW_PIPE_CONTROL_IS_FLUSH |
- : 2);
- 181 0.0065 : OUT_RING(0); /* Destination address */
- 62 0.0022 : OUT_RING(0); /* Immediate data low DW */
- 127 0.0045 : OUT_RING(0); /* Immediate data high DW */
- :
- : /* Binding table pointers */
- 77 0.0027 : OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
- 60 0.0021 : OUT_RING(0); /* vs */
- 90 0.0032 : OUT_RING(0); /* gs */
- 57 0.0020 : OUT_RING(0); /* clip */
- 160 0.0057 : OUT_RING(0); /* sf */
- : /* Only the PS uses the binding table */
- 200 0.0071 : OUT_RING(state_base_offset + binding_table_offset); /* ps */
- :
- : /* The drawing rectangle clipping is always on. Set it to values that
- : * shouldn't do any clipping.
- : */
- 282 0.0101 : OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
- 156 0.0056 : OUT_RING(0x00000000); /* ymin, xmin */
- 640 0.0228 : OUT_RING(DRAW_YMAX(pDst->drawable.height - 1) |
- : DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */
- 463 0.0165 : OUT_RING(0x00000000); /* yorigin, xorigin */
- :
- : /* skip the depth buffer */
- : /* skip the polygon stipple */
- : /* skip the polygon stipple offset */
- : /* skip the line stipple */
- :
- : /* Set the pointers to the 3d pipeline state */
- 132 0.0047 : OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
- 529 0.0189 : OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
- 181 0.0065 : OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
- 3012 0.1075 : OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
- 4257 0.1519 : OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
- 423 0.0151 : OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
- 224 0.0080 : OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
- :
- : /* URB fence */
- 31 0.0011 : OUT_RING(BRW_URB_FENCE |
- : UF0_CS_REALLOC |
- : UF0_SF_REALLOC |
- : UF0_CLIP_REALLOC |
- : UF0_GS_REALLOC |
- : UF0_VS_REALLOC |
- : 1);
- 522 0.0186 : OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
- : ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
- : ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
- 466 0.0166 : OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
- : ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
- :
- : /* Constant buffer state */
- 180 0.0064 : OUT_RING(BRW_CS_URB_STATE | 0);
- 31 0.0011 : OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) |
- : (URB_CS_ENTRIES << 0));
- 1134 0.0405 : ADVANCE_LP_RING();
- : }
- : {
- 10119 0.3611 : int nelem = pMask ? 3: 2;
- 1022 0.0365 : BEGIN_LP_RING(pMask?12:10);
- : /* Set up the pointer to our vertex buffer */
- 64 0.0023 : OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3);
- 61 0.0022 : OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
- : VB0_VERTEXDATA |
- : ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
- 119 0.0042 : OUT_RING(state_base_offset + vb_offset);
- 35 0.0012 : OUT_RING(3);
- 94 0.0034 : OUT_RING(0); // ignore for VERTEXDATA, but still there
- :
- : /* Set up our vertex elements, sourced from the single vertex buffer.
- : */
- 64 0.0023 : OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
- : /* vertex coordinates */
- 65 0.0023 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- : VE0_VALID |
- : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- : (0 << VE0_OFFSET_SHIFT));
- 33 0.0012 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- : (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- : /* u0, v0 */
- 45 0.0016 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- : VE0_VALID |
- : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- : (8 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
- 151 0.0054 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
- : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
- : (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
- : /* u1, v1 */
- 42 0.0015 : if (pMask) {
- 35 0.0012 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- : VE0_VALID |
- : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- : (16 << VE0_OFFSET_SHIFT));
- 31 0.0011 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
- : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
- : (10 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- : }
- :
- 1064 0.0380 : ADVANCE_LP_RING();
- : }
- :
- :#ifdef I830DEBUG
- : ErrorF("try to sync to show any errors...");
- : I830Sync(pScrn);
- :#endif
- : return TRUE;
- 16868 0.6020 :}