From 899df8c7fc2a62d35189acbbcacf22a12416ef69 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Thu, 21 Jun 2007 15:40:37 -0700 Subject: [PATCH] Add opanotate_i965_prepare_composite blog entry --- .../opannotate_i965_prepare_composite.mdwn | 80 + .../i965_prepare_composite.assembly_annotate | 2036 +++++++++++++++++ .../i965_prepare_composite.source_annotate | 670 ++++++ 3 files changed, 2786 insertions(+) create mode 100644 src/exa/opannotate_i965_prepare_composite.mdwn create mode 100644 src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.assembly_annotate create mode 100644 src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.source_annotate diff --git a/src/exa/opannotate_i965_prepare_composite.mdwn b/src/exa/opannotate_i965_prepare_composite.mdwn new file mode 100644 index 0000000..7ff167e --- /dev/null +++ b/src/exa/opannotate_i965_prepare_composite.mdwn @@ -0,0 +1,80 @@ +[[meta title="Using opannotate to make sense of profiles"]] + +[[tag exa performance xorg]] + +After I recently posted some surprising +[[profiles|mozilla_i965_profiles]], I received useful feedback from +Michel Dänzer, Adam Jackson, and Eric Anholt. There was general +agreement that the i965_prepare_composite function is generally stupid +about acting synchronously in order to reuse a single state buffer, +and that this shouldn't be too hard to optimize. Options include using +a ring of buffers and synchronizing only when wrapping around and also +optimizing to not send redundant data. + +I had mentioned earlier that I had tried eliminating the `i830WaitSync` +calls, but hadn't noticed any performance change. Well, one problem +was that I had edited the files on the wrong machine, (I'm still not a +true X hacker since I'm not totally in the groove of the two-machine +debugging yet). It certainly did make a difference when I removed +these calls from the code actually executing, (all the text appears in +arbitrary colors, giving me more psychedelia than I actually need on +my desktop). But the performance really didn't improve at all. + +Then I received a very helpful email from Roland Dreier, (thanks +Roland!), cluing me in to opannotate. The results I had posted before +were from opeport which gives profiling reports with function-level +granularity. The opannotate utility gives a similar report, but at the +granularity of either lines of +[[source_code|i965_prepare_composite.source_annotate]] or +[[assembly_instructions|i965_prepare_composite.assembly_annotate]]. + +So these reports make it clear that sometimes there is more going on +than meets the eye by simple examination of the source code. For +example, much of the i965_prepare_composite function looks like simple +assignments such as these: + + memset (cc_viewport, 0, sizeof (*cc_viewport)); + cc_viewport->min_depth = -1.e35; + cc_viewport->max_depth = 1.e35; + + /* Color calculator state */ + memset(cc_state, 0, sizeof(*cc_state)); + cc_state->cc0.stencil_enable = 0; /* disable stencil */ + cc_state->cc2.depth_test = 0; /* disable depth test */ + +But now take a look at the same assignments annotated by +opannotate. The first two columns are sample counts and percentage of +total time attributed to each line of code, (recall that we're trying +to determine why `i965_prepare_composite` is using more than 25% of +the total time in the test): + + 274 0.0098 : memset (cc_viewport, 0, sizeof (*cc_viewport)); + 124 0.0044 : cc_viewport->min_depth = -1.e35; + 122 0.0044 : cc_viewport->max_depth = 1.e35; + : + : /* Color calculator state */ + 861 0.0307 : memset(cc_state, 0, sizeof(*cc_state)); + 18559 0.6623 : cc_state->cc0.stencil_enable = 0; /* disable stencil */ + 17836 0.6365 : cc_state->cc2.depth_test = 0; /* disable depth test */ + +Clearly, not all assignments are created equal as the final two +assignments are a couple of orders of magnitude slower than the first +two. For a closer look, here's a chunk of the annotated assembly code +showing some very expensive operations: + + : cc_state->cc2.depth_test = 0; /* disable depth test */ + : cc_state->cc2.logicop_enable = 0; /* disable logic op */ + : cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */ + : cc_state->cc3.blend_enable = 1; /* enable color blend */ + 1 3.6e-05 : 33277: movzbl 0xd(%ecx),%eax + 18168 0.6484 : 3327b: andb $0x7f,0x3(%ecx) + 17836 0.6365 : 3327f: andb $0x7f,0x9(%ecx) + 12306 0.4392 : 33283: andb $0xfe,0x8(%ecx) + 7307 0.2608 : 33287: or $0x30,%eax + +So, we've got some bitfields being used here. Is this uncached memory +that's causing it to be so expensive? + +If I'm as fortunate as I was with my last post, hopefully someone will +drop a handy note into my inbox telling me how to make this function +go blisteringly fast. I'm really looking forward to that. diff --git a/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.assembly_annotate b/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.assembly_annotate new file mode 100644 index 0000000..f753928 --- /dev/null +++ b/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.assembly_annotate @@ -0,0 +1,2036 @@ +/* + * Command line: opannotate --source --assembly + * + * Interpretation of command line: + * Output annotated assembly listing with samples + * + * CPU: Core 2, speed 2133.49 MHz (estimated) + * Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000 + */ + : + :/home/cworth/opt/xorg/lib/xorg/modules/drivers/intel_drv.so: file format elf32-i386 + : + :Disassembly of section .init: + :Disassembly of section .plt: + :Disassembly of section .text: + : +00032e90 : /* i965_prepare_composite total: 830728 29.6474 */ + :Bool + :i965_prepare_composite(int op, PicturePtr pSrcPicture, + : PicturePtr pMaskPicture, PicturePtr pDstPicture, + : PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) + :{ + 79 0.0028 : 32e90: push %ebp + 56 0.0020 : 32e91: mov %esp,%ebp + : 32e93: push %edi + 33 0.0012 : 32e94: push %esi + 4 1.4e-04 : 32e95: push %ebx + 10 3.6e-04 : 32e96: sub $0x8c,%esp + : ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; + 24 8.6e-04 : 32e9c: mov 0xc(%ebp),%edx + 34 0.0012 : 32e9f: call 8c27 <__i686.get_pc_thunk.bx> + 88 0.0031 : 32ea4: add $0x12968,%ebx + 24 8.6e-04 : 32eaa: mov (%edx),%eax + 32 0.0011 : 32eac: mov 0xffffff70(%ebx),%edx + 276 0.0099 : 32eb2: mov 0x10(%eax),%eax + 23 8.2e-04 : 32eb5: mov (%eax),%eax + 86 0.0031 : 32eb7: shl $0x2,%eax + 20 7.1e-04 : 32eba: add (%edx),%eax + 252 0.0090 : 32ebc: mov (%eax),%eax + 280 0.0100 : 32ebe: mov %eax,0xffffff98(%ebp) + : I830Ptr pI830 = I830PTR(pScrn); + 54 0.0019 : 32ec1: mov 0xf8(%eax),%ecx + 130 0.0046 : 32ec7: mov %ecx,0xffffff9c(%ebp) + : CARD32 src_offset, src_pitch; + : CARD32 mask_offset = 0, mask_pitch = 0; + : CARD32 dst_format, dst_offset, dst_pitch; + : Bool rotation_program = FALSE; + : + : IntelEmitInvarientState(pScrn); + 22 7.9e-04 : 32eca: mov %eax,(%esp) + : 32ecd: call 75e8 + : *pI830->last_3d = LAST_3D_RENDER; + 32 0.0011 : 32ed2: mov 0xffffff9c(%ebp),%esi + 17 6.1e-04 : 32ed5: mov 0xc30(%esi),%eax + 54 0.0019 : 32edb: movl $0x2,(%eax) + : + : src_offset = intel_get_pixmap_offset(pSrc); + 70 0.0025 : 32ee1: mov 0x18(%ebp),%eax + 68 0.0024 : 32ee4: mov %eax,(%esp) + 11 3.9e-04 : 32ee7: call 8558 + : src_pitch = intel_get_pixmap_pitch(pSrc); + 21 7.5e-04 : 32eec: mov 0x18(%ebp),%edx + 4 1.4e-04 : 32eef: mov %eax,0xffffffa0(%ebp) + 11 3.9e-04 : 32ef2: mov %edx,(%esp) + 29 0.0010 : 32ef5: call 7728 + : dst_offset = intel_get_pixmap_offset(pDst); + 34 0.0012 : 32efa: mov 0x20(%ebp),%ecx + 4 1.4e-04 : 32efd: mov %eax,0xffffffa4(%ebp) + 16 5.7e-04 : 32f00: mov %ecx,(%esp) + 109 0.0039 : 32f03: call 8558 + : dst_pitch = intel_get_pixmap_pitch(pDst); + 27 9.6e-04 : 32f08: mov 0x20(%ebp),%esi + 1 3.6e-05 : 32f0b: mov %eax,0xffffffb0(%ebp) + 7 2.5e-04 : 32f0e: mov %esi,(%esp) + 38 0.0014 : 32f11: call 7728 + : if (pMask) { + 25 8.9e-04 : 32f16: mov 0x1c(%ebp),%ecx + : 32f19: test %ecx,%ecx + : 32f1b: mov %eax,0xffffffb4(%ebp) + : 32f1e: je 338d0 + : mask_offset = intel_get_pixmap_offset(pMask); + 20 7.1e-04 : 32f24: mov 0x1c(%ebp),%eax + 5 1.8e-04 : 32f27: mov %eax,(%esp) + 2 7.1e-05 : 32f2a: call 8558 + : mask_pitch = intel_get_pixmap_pitch(pMask); + 12 4.3e-04 : 32f2f: mov 0x1c(%ebp),%edx + : 32f32: mov %eax,0xffffffa8(%ebp) + : 32f35: mov %edx,(%esp) + 16 5.7e-04 : 32f38: call 7728 + : } + : pI830->scale_units[0][0] = pSrc->drawable.width; + 18 6.4e-04 : 32f3d: mov 0x18(%ebp),%ecx + : 32f40: mov %eax,0xffffffac(%ebp) + 8 2.9e-04 : 32f43: movzwl 0xc(%ecx),%eax + 4 1.4e-04 : 32f47: push %eax + 15 5.4e-04 : 32f48: mov 0xffffff9c(%ebp),%esi + 18 6.4e-04 : 32f4b: fildl (%esp) + 23 8.2e-04 : 32f4e: fstps 0x264(%esi) + : pI830->scale_units[0][1] = pSrc->drawable.height; + 16 5.7e-04 : 32f54: movzwl 0xe(%ecx),%eax + 4 1.4e-04 : 32f58: mov %eax,(%esp) + : 32f5b: fildl (%esp) + 6 2.1e-04 : 32f5e: fstps 0x268(%esi) + : + : pI830->transform[0] = pSrcPicture->transform; + 17 6.1e-04 : 32f64: mov 0xc(%ebp),%edx + 1 3.6e-05 : 32f67: mov 0x40(%edx),%eax + 37 0.0013 : 32f6a: mov %eax,0x274(%esi) + : + : if (!pMask) { + : pI830->transform[1] = NULL; + : pI830->scale_units[1][0] = -1; + : pI830->scale_units[1][1] = -1; + : if (pI830->transform[0] && + : i965_check_rotation_transform(pI830->transform[0])) + : rotation_program = TRUE; + : } else { + : pI830->transform[1] = pMaskPicture->transform; + 8 2.9e-04 : 32f70: mov 0x10(%ebp),%ecx + 15 5.4e-04 : 32f73: mov 0x40(%ecx),%eax + 21 7.5e-04 : 32f76: mov %eax,0x278(%esi) + : pI830->scale_units[1][0] = pMask->drawable.width; + 8 2.9e-04 : 32f7c: mov 0x1c(%ebp),%esi + : 32f7f: movzwl 0xc(%esi),%eax + 32 0.0011 : 32f83: mov %eax,(%esp) + 1 3.6e-05 : 32f86: mov 0xffffff9c(%ebp),%eax + 3 1.1e-04 : 32f89: fildl (%esp) + 81 0.0029 : 32f8c: fstps 0x26c(%eax) + : pI830->scale_units[1][1] = pMask->drawable.height; + 10 3.6e-04 : 32f92: movzwl 0xe(%esi),%eax + 4 1.4e-04 : 32f96: mov %eax,(%esp) + 9 3.2e-04 : 32f99: mov 0xffffff9c(%ebp),%edx + 5 1.8e-04 : 32f9c: fildl (%esp) + 71 0.0025 : 32f9f: add $0x4,%esp + : 32fa2: fstps 0x270(%edx) + : } + : + : /* setup 3d pipeline state */ + : + : binding_table_entries = 2; /* default no mask */ + 12 4.3e-04 : 32fa8: movl $0x2,0x2a04(%ebx) + : + : /* Wait for sync before we start setting up our new state */ + :#if 0 + : i830WaitSync(pScrn); + :#endif + : + : /* Set up our layout of state in framebuffer. First the general state: */ + : next_offset = 0; + : vs_offset = ALIGN(next_offset, 64); + 4 1.4e-04 : 32fb2: movl $0x0,0x2a1c(%ebx) + : next_offset = vs_offset + sizeof(*vs_state); + : + : sf_offset = ALIGN(next_offset, 32); + 3 1.1e-04 : 32fbc: movl $0x20,0x2a20(%ebx) + : next_offset = sf_offset + sizeof(*sf_state); + : + : wm_offset = ALIGN(next_offset, 32); + 11 3.9e-04 : 32fc6: movl $0x40,0x2a24(%ebx) + : next_offset = wm_offset + sizeof(*wm_state); + : + : wm_scratch_offset = ALIGN(next_offset, 1024); + 6 2.1e-04 : 32fd0: movl $0x400,0x2a40(%ebx) + : next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS; + : + : cc_offset = ALIGN(next_offset, 32); + 30 0.0011 : 32fda: movl $0x8400,0x2a28(%ebx) + : next_offset = cc_offset + sizeof(*cc_state); + : + : /* keep current sf_kernel, which will send one setup urb entry to + : * PS kernel + : */ + : sf_kernel_offset = ALIGN(next_offset, 64); + : if (pMask) + : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask); + 14 5.0e-04 : 32fe4: movl $0x85d0,0x2a4c(%ebx) + 1 3.6e-05 : 32fee: movl $0x8440,0x2a34(%ebx) + : else if (rotation_program) + : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation); + : else + : next_offset = sf_kernel_offset + sizeof (sf_kernel_static); + : + : ps_kernel_offset = ALIGN(next_offset, 64); + 6 2.1e-04 : 32ff8: movl $0x8600,0x2a38(%ebx) + : if (pMask) { + : if (pMaskPicture->componentAlpha && + : PICT_FORMAT_RGB(pMaskPicture->format)) { + : if (i965_blend_op[op].src_alpha) { + : next_offset = ps_kernel_offset + + : sizeof(ps_kernel_static_maskca_srcalpha); + : } else { + : next_offset = ps_kernel_offset + + : sizeof(ps_kernel_static_maskca); + : } + : } else + : next_offset = ps_kernel_offset + + 94 0.0034 : 33002: movl $0x8bf0,0x2a4c(%ebx) + 12 4.3e-04 : 3300c: movl $0x8600,0xffffffc0(%ebp) + 8 2.9e-04 : 33013: movl $0x0,0xffffffb8(%ebp) + : sizeof(ps_kernel_static_masknoca); + : } else if (rotation_program) { + : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation); + : } else { + : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask); + : } + : + : sip_kernel_offset = ALIGN(next_offset, 64); + 33 0.0012 : 3301a: mov 0x2a4c(%ebx),%ecx + : next_offset = sip_kernel_offset + sizeof (sip_kernel_static); + : + : /* needed? */ + : cc_viewport_offset = ALIGN(next_offset, 32); + : next_offset = cc_viewport_offset + sizeof(*cc_viewport); + : + : /* for texture sampler */ + : src_sampler_offset = ALIGN(next_offset, 32); + : next_offset = src_sampler_offset + sizeof(*src_sampler_state); + : + : if (pMask) { + 69 0.0025 : 33020: mov 0x1c(%ebp),%edx + 93 0.0033 : 33023: add $0x3f,%ecx + 8 2.9e-04 : 33026: and $0xffffffc0,%ecx + 22 7.9e-04 : 33029: mov %ecx,0xffffff88(%ebp) + 42 0.0015 : 3302c: mov %ecx,0x2a3c(%ebx) + 65 0.0023 : 33032: add $0xa0,%ecx + 1 3.6e-05 : 33038: mov %ecx,0xffffff8c(%ebp) + 31 0.0011 : 3303b: mov %ecx,0x2a30(%ebx) + 35 0.0012 : 33041: add $0x20,%ecx + 2 7.1e-05 : 33044: mov %ecx,%eax + : 33046: add $0x10,%eax + 16 5.7e-04 : 33049: test %edx,%edx + 18 6.4e-04 : 3304b: mov %ecx,0xffffffd0(%ebp) + 19 6.8e-04 : 3304e: mov %ecx,0x2a14(%ebx) + 28 1.0e-03 : 33054: mov %eax,0x2a4c(%ebx) + 27 9.6e-04 : 3305a: je 3306e + : mask_sampler_offset = ALIGN(next_offset, 32); + 1 3.6e-05 : 3305c: add $0x10,%eax + 13 4.6e-04 : 3305f: mov %eax,0x2a18(%ebx) + : next_offset = mask_sampler_offset + sizeof(*mask_sampler_state); + 31 0.0011 : 33065: add $0x10,%eax + : 33068: mov %eax,0x2a4c(%ebx) + : } + : /* Align VB to native size of elements, for safety */ + : vb_offset = ALIGN(next_offset, 8); + 41 0.0015 : 3306e: mov 0x2a4c(%ebx),%esi + 211 0.0075 : 33074: add $0x7,%esi + 22 7.9e-04 : 33077: and $0xfffffff8,%esi + : next_offset = vb_offset + vb_size; + : + : /* And then the general state: */ + : dest_surf_offset = ALIGN(next_offset, 32); + 32 0.0011 : 3307a: mov %esi,%ecx + 36 0.0013 : 3307c: add $0x5f,%ecx + 27 9.6e-04 : 3307f: and $0xffffffe0,%ecx + : next_offset = dest_surf_offset + sizeof(*dest_surf_state); + : + : src_surf_offset = ALIGN(next_offset, 32); + 75 0.0027 : 33082: lea 0x20(%ecx),%eax + 31 0.0011 : 33085: mov %eax,0xffffffd4(%ebp) + 30 0.0011 : 33088: mov %eax,0x2a0c(%ebx) + : next_offset = src_surf_offset + sizeof(*src_surf_state); + 40 0.0014 : 3308e: add $0x14,%eax + 350 0.0125 : 33091: mov %eax,0x2a4c(%ebx) + : + : if (pMask) { + 26 9.3e-04 : 33097: mov 0x1c(%ebp),%eax + 33 0.0012 : 3309a: mov %esi,0xffffff90(%ebp) + 30 0.0011 : 3309d: mov %esi,0x2a2c(%ebx) + 89 0.0032 : 330a3: mov %ecx,0x2a08(%ebx) + 29 0.0010 : 330a9: test %eax,%eax + : 330ab: je 330cc + : mask_surf_offset = ALIGN(next_offset, 32); + 1 3.6e-05 : 330ad: mov 0xffffffd4(%ebp),%eax + : next_offset = mask_surf_offset + sizeof(*mask_surf_state); + : binding_table_entries = 3; + 9 3.2e-04 : 330b0: movl $0x3,0x2a04(%ebx) + 13 4.6e-04 : 330ba: add $0x20,%eax + 14 5.0e-04 : 330bd: mov %eax,0x2a10(%ebx) + 28 1.0e-03 : 330c3: add $0x14,%eax + : 330c6: mov %eax,0x2a4c(%ebx) + : } + : + : binding_table_offset = ALIGN(next_offset, 32); + 25 8.9e-04 : 330cc: mov 0x2a4c(%ebx),%edi + : next_offset = binding_table_offset + (binding_table_entries * 4); + : + : default_color_offset = ALIGN(next_offset, 32); + 101 0.0036 : 330d2: mov 0x2a04(%ebx),%eax + : next_offset = default_color_offset + sizeof(*default_color_state); + : + : total_state_size = next_offset; + : assert(total_state_size < pI830->exa_965_state->size); + : + : state_base_offset = pI830->exa_965_state->offset; + 39 0.0014 : 330d8: mov 0xffffff9c(%ebp),%edx + 4 1.4e-04 : 330db: add $0x1f,%edi + 28 1.0e-03 : 330de: and $0xffffffe0,%edi + 26 9.3e-04 : 330e1: lea 0x1f(%edi,%eax,4),%esi + 28 1.0e-03 : 330e5: and $0xffffffe0,%esi + 35 0.0012 : 330e8: lea 0x10(%esi),%eax + 27 9.6e-04 : 330eb: mov %edi,0x2a44(%ebx) + : 330f1: mov %esi,0x2a48(%ebx) + 8 2.9e-04 : 330f7: mov %eax,0x2a4c(%ebx) + 36 0.0013 : 330fd: mov %eax,0x2a50(%ebx) + 64 0.0023 : 33103: mov 0x68(%edx),%eax + : state_base_offset = ALIGN(state_base_offset, 64); + 35 0.0012 : 33106: mov (%eax),%edx + : state_base = (char *)(pI830->FbBase + state_base_offset); + 157 0.0056 : 33108: mov 0xffffff9c(%ebp),%eax + 10 3.6e-04 : 3310b: add $0x3f,%edx + 23 8.2e-04 : 3310e: and $0xffffffc0,%edx + 16 5.7e-04 : 33111: mov %edx,0x2a58(%ebx) + 14 5.0e-04 : 33117: add 0x8(%eax),%edx + : + : vs_state = (void *)(state_base + vs_offset); + : sf_state = (void *)(state_base + sf_offset); + 11 3.9e-04 : 3311a: lea 0x20(%edx),%eax + 25 8.9e-04 : 3311d: mov %eax,0x29e4(%ebx) + : wm_state = (void *)(state_base + wm_offset); + 39 0.0014 : 33123: lea 0x40(%edx),%eax + 4 1.4e-04 : 33126: mov %eax,0x29e8(%ebx) + : cc_state = (void *)(state_base + cc_offset); + 34 0.0012 : 3312c: lea 0x8400(%edx),%eax + 4 1.4e-04 : 33132: mov %eax,0x29ec(%ebx) + : sf_kernel = (void *)(state_base + sf_kernel_offset); + 29 0.0010 : 33138: lea 0x8440(%edx),%eax + : 3313e: mov %eax,0x29f4(%ebx) + : ps_kernel = (void *)(state_base + ps_kernel_offset); + 54 0.0019 : 33144: mov 0xffffffc0(%ebp),%eax + 2 7.1e-05 : 33147: mov %edx,0x2a54(%ebx) + 12 4.3e-04 : 3314d: mov %edx,0x29e0(%ebx) + 10 3.6e-04 : 33153: add %edx,%eax + 15 5.4e-04 : 33155: mov %eax,0x29f8(%ebx) + : sip_kernel = (void *)(state_base + sip_kernel_offset); + 11 3.9e-04 : 3315b: mov 0xffffff88(%ebp),%eax + 14 5.0e-04 : 3315e: add %edx,%eax + 6 2.1e-04 : 33160: mov %eax,0x29fc(%ebx) + : + : cc_viewport = (void *)(state_base + cc_viewport_offset); + 27 9.6e-04 : 33166: mov 0xffffff8c(%ebp),%eax + 6 2.1e-04 : 33169: add %edx,%eax + 3 1.1e-04 : 3316b: mov %eax,0xffffff94(%ebp) + 27 9.6e-04 : 3316e: mov %eax,0x29f0(%ebx) + : + : dest_surf_state = (void *)(state_base + dest_surf_offset); + 25 8.9e-04 : 33174: lea (%edx,%ecx,1),%eax + : src_surf_state = (void *)(state_base + src_surf_offset); + 4 1.4e-04 : 33177: mov 0xffffffd4(%ebp),%ecx + : 3317a: mov %eax,0x29c8(%ebx) + 12 4.3e-04 : 33180: lea (%edx,%ecx,1),%eax + 48 0.0017 : 33183: mov %eax,0x29cc(%ebx) + : if (pMask) + 27 9.6e-04 : 33189: mov 0x1c(%ebp),%eax + 2 7.1e-05 : 3318c: test %eax,%eax + 8 2.9e-04 : 3318e: je 34240 + : mask_surf_state = (void *)(state_base + mask_surf_offset); + : + : src_sampler_state = (void *)(state_base + src_sampler_offset); + : 33194: mov 0xffffffd0(%ebp),%ecx + 8 2.9e-04 : 33197: mov %edx,%eax + 8 2.9e-04 : 33199: add 0x2a10(%ebx),%eax + 1 3.6e-05 : 3319f: mov %eax,0x29d0(%ebx) + 8 2.9e-04 : 331a5: lea (%edx,%ecx,1),%eax + 1 3.6e-05 : 331a8: mov %eax,0x29d4(%ebx) + : if (pMask) + : mask_sampler_state = (void *)(state_base + mask_sampler_offset); + 14 5.0e-04 : 331ae: mov %edx,%eax + 2 7.1e-05 : 331b0: add 0x2a18(%ebx),%eax + 4 1.4e-04 : 331b6: mov %eax,0x29d8(%ebx) + : + : binding_table = (void *)(state_base + binding_table_offset); + : + : vb = (void *)(state_base + vb_offset); + 21 7.5e-04 : 331bc: mov 0xffffff90(%ebp),%ecx + 33 0.0012 : 331bf: lea (%edx,%edi,1),%eax + 22 7.9e-04 : 331c2: mov %eax,0x2a00(%ebx) + : + : default_color_state = (void*)(state_base + default_color_offset); + : + : /* Set up a default static partitioning of the URB, which is supposed to + : * allow anything we would want to do, at potentially lower performance. + : */ + :#define URB_CS_ENTRY_SIZE 0 + :#define URB_CS_ENTRIES 0 + : + :#define URB_VS_ENTRY_SIZE 1 // each 512-bit row + :#define URB_VS_ENTRIES 8 // we needs at least 8 entries + : + :#define URB_GS_ENTRY_SIZE 0 + :#define URB_GS_ENTRIES 0 + : + :#define URB_CLIP_ENTRY_SIZE 0 + :#define URB_CLIP_ENTRIES 0 + : + :#define URB_SF_ENTRY_SIZE 2 + :#define URB_SF_ENTRIES 1 + : + : urb_vs_start = 0; + 25 8.9e-04 : 331c8: movl $0x0,0x29a0(%ebx) + : urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + 21 7.5e-04 : 331d2: movl $0x8,0x29a4(%ebx) + 19 6.8e-04 : 331dc: lea (%edx,%ecx,1),%eax + : urb_gs_start = urb_vs_start + urb_vs_size; + : urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + : urb_clip_start = urb_gs_start + urb_gs_size; + : urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + : urb_sf_start = urb_clip_start + urb_clip_size; + : urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + : urb_cs_start = urb_sf_start + urb_sf_size; + : urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + : + : /* Because we only have a single static buffer for our state currently, + : * we have to sync before updating it every time. + : */ + :#if 0 + : i830WaitSync(pScrn); + :#endif + : + : memset (cc_viewport, 0, sizeof (*cc_viewport)); + : cc_viewport->min_depth = -1.e35; + : cc_viewport->max_depth = 1.e35; + : + : /* Color calculator state */ + : memset(cc_state, 0, sizeof(*cc_state)); + : 331df: mov $0x8,%ecx + 2 7.1e-05 : 331e4: mov %eax,0x2a5c(%ebx) + 32 0.0011 : 331ea: lea (%edx,%esi,1),%eax + 1 3.6e-05 : 331ed: mov 0xffffff94(%ebp),%esi + 33 0.0012 : 331f0: mov %eax,0x29dc(%ebx) + 40 0.0014 : 331f6: movl $0x8,0x29a8(%ebx) + 58 0.0021 : 33200: movl $0x0,0x29ac(%ebx) + 58 0.0021 : 3320a: movl $0x8,0x29b0(%ebx) + 50 0.0018 : 33214: movl $0x0,0x29b4(%ebx) + 46 0.0016 : 3321e: movl $0x8,0x29b8(%ebx) + 52 0.0019 : 33228: movl $0x2,0x29bc(%ebx) + 42 0.0015 : 33232: movl $0xa,0x29c0(%ebx) + 43 0.0015 : 3323c: movl $0x0,0x29c4(%ebx) + 92 0.0033 : 33246: movl $0x0,(%esi) + 181 0.0065 : 3324c: movl $0x0,0x4(%esi) + 87 0.0031 : 33253: mov 0x29f0(%ebx),%eax + 114 0.0041 : 33259: cld + 37 0.0013 : 3325a: movl $0xf99a130c,(%eax) + 122 0.0044 : 33260: movl $0x799a130c,0x4(%eax) + 41 0.0015 : 33267: mov 0x29ec(%ebx),%edi + 15 5.4e-04 : 3326d: xor %eax,%eax + 691 0.0247 : 3326f: rep stos %eax,%es:(%edi) + : cc_state->cc0.stencil_enable = 0; /* disable stencil */ + 391 0.0140 : 33271: mov 0x29ec(%ebx),%ecx + : cc_state->cc2.depth_test = 0; /* disable depth test */ + : cc_state->cc2.logicop_enable = 0; /* disable logic op */ + : cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */ + : cc_state->cc3.blend_enable = 1; /* enable color blend */ + 1 3.6e-05 : 33277: movzbl 0xd(%ecx),%eax + 18168 0.6484 : 3327b: andb $0x7f,0x3(%ecx) + 17836 0.6365 : 3327f: andb $0x7f,0x9(%ecx) + 12306 0.4392 : 33283: andb $0xfe,0x8(%ecx) + 7307 0.2608 : 33287: or $0x30,%eax + : cc_state->cc3.alpha_test = 0; /* disable alpha test */ + : 3328a: and $0xfffffff7,%eax + 10 3.6e-04 : 3328d: mov %al,0xd(%ecx) + : cc_state->cc4.cc_viewport_state_offset = (state_base_offset + + : 33290: mov 0x10(%ecx),%eax + 9589 0.3422 : 33293: mov 0x2a30(%ebx),%edx + : 33299: add 0x2a58(%ebx),%edx + 1 3.6e-05 : 3329f: and $0x1f,%eax + 26 9.3e-04 : 332a2: and $0xffffffe0,%edx + 6 2.1e-04 : 332a5: or %edx,%eax + 23 8.2e-04 : 332a7: mov %eax,0x10(%ecx) + : cc_viewport_offset) >> 5; + : cc_state->cc5.dither_enable = 0; /* disable dither */ + : cc_state->cc5.logicop_func = 0xc; /* COPY */ + 30 0.0011 : 332aa: movzbl 0x16(%ecx),%eax + 7354 0.2625 : 332ae: andb $0x7f,0x17(%ecx) + 839 0.0299 : 332b2: and $0xfffffff0,%eax + 31 0.0011 : 332b5: or $0xc,%eax + 26 9.3e-04 : 332b8: mov %al,0x16(%ecx) + : cc_state->cc5.statistics_enable = 1; + 28 1.0e-03 : 332bb: movzbl 0x15(%ecx),%eax + 2752 0.0982 : 332bf: or $0xffffff80,%eax + : cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + 55 0.0020 : 332c2: and $0xffffff8f,%eax + 8 2.9e-04 : 332c5: mov %al,0x15(%ecx) + : i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, + 29 0.0010 : 332c8: mov 0x14(%ebp),%eax + 4 1.4e-04 : 332cb: mov 0x8(%ebp),%esi + 3 1.1e-04 : 332ce: mov 0x8(%eax),%edx + 2 7.1e-05 : 332d1: shl $0x4,%esi + 25 8.9e-04 : 332d4: mov 0xffffb0fc(%ebx,%esi,1),%edi + 30 0.0011 : 332db: mov 0xffffb100(%ebx,%esi,1),%eax + : 332e2: and $0xf0,%dh + : 332e5: mov %edi,0x2a60(%ebx) + 26 9.3e-04 : 332eb: mov %eax,0x2a64(%ebx) + 8 2.9e-04 : 332f1: jne 33302 + : 332f3: mov 0xffffb0f4(%ebx,%esi,1),%eax + : 332fa: test %eax,%eax + 21 7.5e-04 : 332fc: jne 33880 + 24 8.6e-04 : 33302: mov 0x10(%ebp),%edi + 5 1.8e-04 : 33305: test %edi,%edi + : 33307: je 33316 + : 33309: mov 0x10(%ebp),%edx + 19 6.8e-04 : 3330c: testb $0x1,0x19(%edx) + 4 1.4e-04 : 33310: jne 33790 + : &src_blend, &dst_blend); + : /* XXX: alpha blend factor should be same as color, but check + : * for CA case in future + : */ + : cc_state->cc5.ia_src_blend_factor = src_blend; + 42 0.0015 : 33316: movzbl 0x2a60(%ebx),%edx + 22 7.9e-04 : 3331d: movzwl 0x14(%ecx),%eax + 13919 0.4967 : 33321: and $0x1f,%edx + : 33324: and $0x1f,%edx + : 33327: and $0xf07f,%ax + 36 0.0013 : 3332b: shl $0x7,%edx + : 3332e: or %edx,%eax + 70 0.0025 : 33330: mov %ax,0x14(%ecx) + : cc_state->cc5.ia_dest_blend_factor = dst_blend; + 37 0.0013 : 33334: movzbl 0x2a64(%ebx),%edx + : 3333b: movzbl 0x14(%ecx),%eax + : cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; + 13845 0.4941 : 3333f: andb $0x1f,0x1b(%ecx) + 215 0.0077 : 33343: and $0x1f,%edx + : 33346: shl $0x2,%edx + 32 0.0011 : 33349: and $0xffffff83,%eax + 2 7.1e-05 : 3334c: or %edx,%eax + 15 5.4e-04 : 3334e: mov %al,0x14(%ecx) + : cc_state->cc6.src_blend_factor = src_blend; + 31 0.0011 : 33351: movzbl 0x2a60(%ebx),%eax + : 33358: and $0x1f,%eax + : 3335b: mov %al,0x1b(%ecx) + : cc_state->cc6.dest_blend_factor = dst_blend; + 2 7.1e-05 : 3335e: movzbl 0x2a64(%ebx),%edx + 34 0.0012 : 33365: movzbl 0x1a(%ecx),%eax + 2280 0.0814 : 33369: and $0x1f,%edx + : 3336c: shl $0x3,%edx + : 3336f: and $0x7,%eax + 27 9.6e-04 : 33372: or %edx,%eax + 18 6.4e-04 : 33374: mov %al,0x1a(%ecx) + : cc_state->cc6.clamp_post_alpha_blend = 1; + : cc_state->cc6.clamp_pre_alpha_blend = 1; + 15 5.4e-04 : 33377: movzbl 0x18(%ecx),%eax + 4451 0.1588 : 3337b: or $0x3,%eax + : cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ + 25 8.9e-04 : 3337e: and $0xfffffff3,%eax + 334 0.0119 : 33381: mov %al,0x18(%ecx) + : + : /* Upload system kernel */ + : memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static)); + 39 0.0014 : 33384: lea 0xffffb1d4(%ebx),%eax + : 3338a: mov %eax,0x4(%esp) + 23 8.2e-04 : 3338e: mov 0x29fc(%ebx),%eax + : 33394: movl $0xa0,0x8(%esp) + 15 5.4e-04 : 3339c: mov %eax,(%esp) + 9 3.2e-04 : 3339f: call 7d88 + : + : /* Set up the state buffer for the destination surface */ + : memset(dest_surf_state, 0, sizeof(*dest_surf_state)); + 47 0.0017 : 333a4: mov 0x29c8(%ebx),%eax + : dest_surf_state->ss0.surface_type = BRW_SURFACE_2D; + : dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + : i965_get_dest_format(pDstPicture, &dst_format); + 2 7.1e-05 : 333aa: lea 0xfffffff0(%ebp),%edx + : 333ad: movl $0x0,(%eax) + 83 0.0030 : 333b3: movl $0x0,0x4(%eax) + 34 0.0012 : 333ba: movl $0x0,0x8(%eax) + 144 0.0051 : 333c1: movl $0x0,0xc(%eax) + 24 8.6e-04 : 333c8: movl $0x0,0x10(%eax) + 34 0.0012 : 333cf: mov 0x29c8(%ebx),%esi + 13 4.6e-04 : 333d5: movzbl 0x3(%esi),%eax + 15244 0.5440 : 333d9: and $0x17,%eax + 39 0.0014 : 333dc: or $0x20,%eax + 28 1.0e-03 : 333df: mov %al,0x3(%esi) + 36 0.0013 : 333e2: mov 0x14(%ebp),%eax + 1 3.6e-05 : 333e5: call 32810 + : dest_surf_state->ss0.surface_format = dst_format; + 86 0.0031 : 333ea: movzwl 0xfffffff0(%ebp),%edx + 26 9.3e-04 : 333ee: movzwl 0x2(%esi),%eax + 14220 0.5075 : 333f2: and $0x1ff,%dx + : 333f7: shl $0x2,%edx + : 333fa: and $0xf803,%ax + 33 0.0012 : 333fe: or %edx,%eax + 65 0.0023 : 33400: mov %ax,0x2(%esi) + : + : dest_surf_state->ss0.writedisable_alpha = 0; + : dest_surf_state->ss0.writedisable_red = 0; + : dest_surf_state->ss0.writedisable_green = 0; + : dest_surf_state->ss0.writedisable_blue = 0; + : dest_surf_state->ss0.color_blend = 1; + 23 8.2e-04 : 33404: movzbl 0x1(%esi),%eax + 361 0.0129 : 33408: andb $0xfc,0x2(%esi) + 14053 0.5015 : 3340c: and $0x21,%eax + : dest_surf_state->ss0.vert_line_stride = 0; + : dest_surf_state->ss0.vert_line_stride_ofs = 0; + : dest_surf_state->ss0.mipmap_layout_mode = 0; + : dest_surf_state->ss0.render_cache_read_mode = 0; + : 3340f: or $0x20,%eax + : 33412: mov %al,0x1(%esi) + : + : dest_surf_state->ss1.base_addr = dst_offset; + : 33415: mov 0xffffffb0(%ebp),%ecx + 36 0.0013 : 33418: mov %ecx,0x4(%esi) + : dest_surf_state->ss2.height = pDst->drawable.height - 1; + : 3341b: mov 0x20(%ebp),%eax + : 3341e: movzwl 0xe(%eax),%edx + : 33422: movzwl 0xa(%esi),%eax + 387 0.0138 : 33426: sub $0x1,%edx + : 33429: shl $0x3,%edx + : 3342c: and $0x7,%eax + 1 3.6e-05 : 3342f: or %edx,%eax + 32 0.0011 : 33431: mov %ax,0xa(%esi) + : dest_surf_state->ss2.width = pDst->drawable.width - 1; + 1 3.6e-05 : 33435: mov 0x20(%ebp),%ecx + : 33438: mov 0x8(%esi),%eax + 14501 0.5175 : 3343b: movzwl 0xc(%ecx),%edx + : 3343f: sub $0x1,%edx + 2 7.1e-05 : 33442: and $0x1fff,%edx + : 33448: and $0xfff8003f,%eax + 29 0.0010 : 3344d: shl $0x6,%edx + 1 3.6e-05 : 33450: or %edx,%eax + 33 0.0012 : 33452: mov %eax,0x8(%esi) + : dest_surf_state->ss2.mip_count = 0; + : dest_surf_state->ss2.render_target_rotation = 0; + : dest_surf_state->ss3.pitch = dst_pitch - 1; + 45 0.0016 : 33455: mov 0xc(%esi),%eax + 398 0.0142 : 33458: andb $0xc0,0x8(%esi) + 11618 0.4146 : 3345c: mov 0xffffffb4(%ebp),%edx + : 3345f: and $0xffe00007,%eax + : 33464: sub $0x1,%edx + : 33467: and $0x3ffff,%edx + 28 1.0e-03 : 3346d: shl $0x3,%edx + : 33470: or %edx,%eax + : 33472: mov %eax,0xc(%esi) + : + : /* Set up the source surface state buffer */ + : memset(src_surf_state, 0, sizeof(*src_surf_state)); + 1 3.6e-05 : 33475: mov 0x29cc(%ebx),%eax + 37 0.0013 : 3347b: movl $0x0,(%eax) + 3 1.1e-04 : 33481: movl $0x0,0x4(%eax) + : 33488: movl $0x0,0x8(%eax) + : 3348f: movl $0x0,0xc(%eax) + 25 8.9e-04 : 33496: movl $0x0,0x10(%eax) + : src_surf_state->ss0.surface_type = BRW_SURFACE_2D; + 1 3.6e-05 : 3349d: mov 0x29cc(%ebx),%esi + : 334a3: movzbl 0x3(%esi),%eax + 13825 0.4934 : 334a7: and $0x1f,%eax + 37 0.0013 : 334aa: or $0x20,%eax + 34 0.0012 : 334ad: mov %al,0x3(%esi) + : src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture); + 28 1.0e-03 : 334b0: mov 0xc(%ebp),%eax + : 334b3: call 329b0 + 24 8.6e-04 : 334b8: movzwl 0x2(%esi),%edx + 14209 0.5071 : 334bc: and $0xf803,%dx + 66 0.0024 : 334c1: and $0x1ff,%ax + : 334c5: shl $0x2,%eax + 3 1.1e-04 : 334c8: or %eax,%edx + : + : src_surf_state->ss0.writedisable_alpha = 0; + : src_surf_state->ss0.writedisable_red = 0; + : src_surf_state->ss0.writedisable_green = 0; + : src_surf_state->ss0.writedisable_blue = 0; + : src_surf_state->ss0.color_blend = 1; + 53 0.0019 : 334ca: movzbl 0x1(%esi),%eax + 327 0.0117 : 334ce: mov %dx,0x2(%esi) + 25 8.9e-04 : 334d2: andb $0xfc,0x2(%esi) + 14928 0.5328 : 334d6: and $0x21,%eax + : src_surf_state->ss0.vert_line_stride = 0; + : src_surf_state->ss0.vert_line_stride_ofs = 0; + : src_surf_state->ss0.mipmap_layout_mode = 0; + : src_surf_state->ss0.render_cache_read_mode = 0; + 1 3.6e-05 : 334d9: or $0x20,%eax + : 334dc: mov %al,0x1(%esi) + : + : src_surf_state->ss1.base_addr = src_offset; + : 334df: mov 0xffffffa0(%ebp),%eax + 26 9.3e-04 : 334e2: mov %eax,0x4(%esi) + : src_surf_state->ss2.width = pSrc->drawable.width - 1; + : 334e5: mov 0x18(%ebp),%ecx + : 334e8: mov 0x8(%esi),%eax + : 334eb: movzwl 0xc(%ecx),%edx + 40 0.0014 : 334ef: and $0xfff8003f,%eax + : 334f4: sub $0x1,%edx + : 334f7: and $0x1fff,%edx + : 334fd: shl $0x6,%edx + 33 0.0012 : 33500: or %edx,%eax + 54 0.0019 : 33502: mov %eax,0x8(%esi) + : src_surf_state->ss2.height = pSrc->drawable.height - 1; + 9 3.2e-04 : 33505: movzwl 0xe(%ecx),%edx + : 33509: movzwl 0xa(%esi),%eax + : src_surf_state->ss2.mip_count = 0; + : src_surf_state->ss2.render_target_rotation = 0; + 15025 0.5362 : 3350d: andb $0xc0,0x8(%esi) + 6407 0.2287 : 33511: sub $0x1,%edx + : 33514: shl $0x3,%edx + : 33517: and $0x7,%eax + : 3351a: or %edx,%eax + 38 0.0014 : 3351c: mov %ax,0xa(%esi) + : src_surf_state->ss3.pitch = src_pitch - 1; + : 33520: mov 0xffffffa4(%ebp),%edx + : 33523: mov 0xc(%esi),%eax + 178 0.0064 : 33526: sub $0x1,%edx + 27 9.6e-04 : 33529: and $0x3ffff,%edx + : 3352f: shl $0x3,%edx + : 33532: and $0xffe00007,%eax + : 33537: or %edx,%eax + 38 0.0014 : 33539: mov %eax,0xc(%esi) + : + : /* setup mask surface */ + : if (pMask) { + : 3353c: mov 0x1c(%ebp),%ecx + : 3353f: test %ecx,%ecx + : 33541: je 338a5 + : memset(mask_surf_state, 0, sizeof(*mask_surf_state)); + 12 4.3e-04 : 33547: mov 0x29d0(%ebx),%eax + 19 6.8e-04 : 3354d: movl $0x0,(%eax) + 1 3.6e-05 : 33553: movl $0x0,0x4(%eax) + 2 7.1e-05 : 3355a: movl $0x0,0x8(%eax) + : 33561: movl $0x0,0xc(%eax) + 14 5.0e-04 : 33568: movl $0x0,0x10(%eax) + : mask_surf_state->ss0.surface_type = BRW_SURFACE_2D; + 2 7.1e-05 : 3356f: mov 0x29d0(%ebx),%esi + 1 3.6e-05 : 33575: movzbl 0x3(%esi),%eax + 7000 0.2498 : 33579: and $0x1f,%eax + 18 6.4e-04 : 3357c: or $0x20,%eax + 16 5.7e-04 : 3357f: mov %al,0x3(%esi) + : mask_surf_state->ss0.surface_format = + 20 7.1e-04 : 33582: mov 0x10(%ebp),%eax + : 33585: call 329b0 + 28 1.0e-03 : 3358a: movzwl 0x2(%esi),%edx + 7436 0.2654 : 3358e: and $0xf803,%dx + 18 6.4e-04 : 33593: and $0x1ff,%ax + : 33597: shl $0x2,%eax + : 3359a: or %eax,%edx + : i965_get_card_format(pMaskPicture); + : + : mask_surf_state->ss0.writedisable_alpha = 0; + : mask_surf_state->ss0.writedisable_red = 0; + : mask_surf_state->ss0.writedisable_green = 0; + : mask_surf_state->ss0.writedisable_blue = 0; + : mask_surf_state->ss0.color_blend = 1; + 36 0.0013 : 3359c: movzbl 0x1(%esi),%eax + 117 0.0042 : 335a0: mov %dx,0x2(%esi) + 25 8.9e-04 : 335a4: andb $0xfc,0x2(%esi) + 7753 0.2767 : 335a8: and $0x21,%eax + : mask_surf_state->ss0.vert_line_stride = 0; + : mask_surf_state->ss0.vert_line_stride_ofs = 0; + : mask_surf_state->ss0.mipmap_layout_mode = 0; + : mask_surf_state->ss0.render_cache_read_mode = 0; + : 335ab: or $0x20,%eax + : 335ae: mov %al,0x1(%esi) + : + : mask_surf_state->ss1.base_addr = mask_offset; + : 335b1: mov 0xffffffa8(%ebp),%eax + 15 5.4e-04 : 335b4: mov %eax,0x4(%esi) + : mask_surf_state->ss2.width = pMask->drawable.width - 1; + : 335b7: mov 0x1c(%ebp),%ecx + : 335ba: mov 0x8(%esi),%eax + : 335bd: movzwl 0xc(%ecx),%edx + 23 8.2e-04 : 335c1: and $0xfff8003f,%eax + : 335c6: sub $0x1,%edx + : 335c9: and $0x1fff,%edx + 2 7.1e-05 : 335cf: shl $0x6,%edx + 12 4.3e-04 : 335d2: or %edx,%eax + : 335d4: mov %eax,0x8(%esi) + : mask_surf_state->ss2.height = pMask->drawable.height - 1; + : 335d7: movzwl 0xe(%ecx),%edx + 1 3.6e-05 : 335db: movzwl 0xa(%esi),%eax + : mask_surf_state->ss2.mip_count = 0; + : mask_surf_state->ss2.render_target_rotation = 0; + 7624 0.2721 : 335df: andb $0xc0,0x8(%esi) + 3264 0.1165 : 335e3: sub $0x1,%edx + : 335e6: shl $0x3,%edx + : 335e9: and $0x7,%eax + : 335ec: or %edx,%eax + 16 5.7e-04 : 335ee: mov %ax,0xa(%esi) + : mask_surf_state->ss3.pitch = mask_pitch - 1; + : 335f2: mov 0xffffffac(%ebp),%edx + : 335f5: mov 0xc(%esi),%eax + 21 7.5e-04 : 335f8: sub $0x1,%edx + 20 7.1e-04 : 335fb: and $0x3ffff,%edx + 16 5.7e-04 : 33601: and $0xffe00007,%eax + : 33606: shl $0x3,%edx + : 33609: or %edx,%eax + 14 5.0e-04 : 3360b: mov %eax,0xc(%esi) + : } + : + : /* Set up a binding table for our surfaces. Only the PS will use it */ + : binding_table[0] = state_base_offset + dest_surf_offset; + 2 7.1e-05 : 3360e: mov 0x2a58(%ebx),%eax + : 33614: mov 0x2a00(%ebx),%ecx + : 3361a: mov %eax,%edx + 12 4.3e-04 : 3361c: add 0x2a08(%ebx),%edx + 1 3.6e-05 : 33622: mov %edx,(%ecx) + : binding_table[1] = state_base_offset + src_surf_offset; + 2 7.1e-05 : 33624: mov %eax,%edx + : 33626: add 0x2a0c(%ebx),%edx + 19 6.8e-04 : 3362c: mov %edx,0x4(%ecx) + : if (pMask) + : binding_table[2] = state_base_offset + mask_surf_offset; + 2 7.1e-05 : 3362f: add 0x2a10(%ebx),%eax + : 33635: mov %eax,0x8(%ecx) + : + : /* PS kernel use this sampler */ + : memset(src_sampler_state, 0, sizeof(*src_sampler_state)); + 15 5.4e-04 : 33638: mov 0x29d4(%ebx),%eax + 36 0.0013 : 3363e: movl $0x0,(%eax) + 45 0.0016 : 33644: movl $0x0,0x4(%eax) + 2 7.1e-05 : 3364b: movl $0x0,0x8(%eax) + 13 4.6e-04 : 33652: movl $0x0,0xc(%eax) + : src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + 20 7.1e-04 : 33659: mov 0x29d4(%ebx),%edx + 4 1.4e-04 : 3365f: orb $0x10,0x3(%edx) + : switch(pSrcPicture->filter) { + 12445 0.4441 : 33663: mov 0xc(%ebp),%esi + : 33666: mov 0x44(%esi),%eax + : 33669: test %eax,%eax + : 3366b: je 33683 + : 3366d: sub $0x1,%eax + : 33670: je 34183 + : case PictFilterNearest: + : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + : break; + : case PictFilterBilinear: + : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + : break; + : default: + : I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter); + : } + : + : memset(default_color_state, 0, sizeof(*default_color_state)); + : default_color_state->color[0] = 0.0; /* R */ + : default_color_state->color[1] = 0.0; /* G */ + : default_color_state->color[2] = 0.0; /* B */ + : default_color_state->color[3] = 0.0; /* A */ + : + : src_sampler_state->ss0.default_color_mode = 0; /* GL mode */ + : + : if (!pSrcPicture->repeat) { + : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + : src_sampler_state->ss2.default_color_pointer = + : (state_base_offset + default_color_offset) >> 5; + : } else { + : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : } + : src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ + : + : if (pMask) { + : memset(mask_sampler_state, 0, sizeof(*mask_sampler_state)); + : mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + : switch(pMaskPicture->filter) { + : case PictFilterNearest: + : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + : break; + : case PictFilterBilinear: + : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + : break; + : default: + : I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter); + : } + : + : if (!pMaskPicture->repeat) { + : mask_sampler_state->ss1.r_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + : mask_sampler_state->ss1.s_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + : mask_sampler_state->ss1.t_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + : mask_sampler_state->ss2.default_color_pointer = + : (state_base_offset + default_color_offset)>>5; + : } else { + : mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : } + : mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ + : } + : + : /* Set up the vertex shader to be disabled (passthrough) */ + : memset(vs_state, 0, sizeof(*vs_state)); + : vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; + : vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + : vs_state->vs6.vs_enable = 0; + : vs_state->vs6.vert_cache_disable = 1; + : + : /* Set up the SF kernel to do coord interp: for each attribute, + : * calculate dA/dx and dA/dy. Hand these interpolation coefficients + : * back to SF which then hands pixels off to WM. + : */ + : if (pMask) + : memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static)); + : else if (rotation_program) + : memcpy(sf_kernel, sf_kernel_static_rotation, + : sizeof (sf_kernel_static_rotation)); + : else + : memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static)); + : + : memset(sf_state, 0, sizeof(*sf_state)); + : sf_state->thread0.kernel_start_pointer = + : (state_base_offset + sf_kernel_offset) >> 6; + : sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + : sf_state->sf1.single_program_flow = 1; + : sf_state->sf1.binding_table_entry_count = 0; + : sf_state->sf1.thread_priority = 0; + : sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ + : sf_state->sf1.illegal_op_exception_enable = 1; + : sf_state->sf1.mask_stack_exception_enable = 1; + : sf_state->sf1.sw_exception_enable = 1; + : sf_state->thread2.per_thread_scratch_space = 0; + : /* scratch space is not used in our kernel */ + : sf_state->thread2.scratch_space_base_pointer = 0; + : sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + : sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + : sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + : /* don't smash vertex header, read start from dw8 */ + : sf_state->thread3.urb_entry_read_offset = 1; + : sf_state->thread3.dispatch_grf_start_reg = 3; + : sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + : sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + : sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + : sf_state->thread4.stats_enable = 1; + : sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + : sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; + : sf_state->sf6.scissor = 0; + : sf_state->sf7.trifan_pv = 2; + : sf_state->sf6.dest_org_vbias = 0x8; + : sf_state->sf6.dest_org_hbias = 0x8; + : + : /* Set up the PS kernel (dispatched by WM) */ + : if (pMask) { + : if (pMaskPicture->componentAlpha && + : PICT_FORMAT_RGB(pMaskPicture->format)) { + : if (i965_blend_op[op].src_alpha) + : memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha, + : sizeof (ps_kernel_static_maskca_srcalpha)); + : else + : memcpy(ps_kernel, ps_kernel_static_maskca, + : sizeof (ps_kernel_static_maskca)); + : } else + : memcpy(ps_kernel, ps_kernel_static_masknoca, + : sizeof (ps_kernel_static_masknoca)); + : } else if (rotation_program) { + : memcpy(ps_kernel, ps_kernel_static_rotation, + : sizeof (ps_kernel_static_rotation)); + : } else { + : memcpy(ps_kernel, ps_kernel_static_nomask, + : sizeof (ps_kernel_static_nomask)); + : } + : + : memset(wm_state, 0, sizeof (*wm_state)); + : wm_state->thread0.kernel_start_pointer = + : (state_base_offset + ps_kernel_offset) >> 6; + : wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + : wm_state->thread1.single_program_flow = 1; + : if (!pMask) + : wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + : else + : wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + : + : wm_state->thread2.scratch_space_base_pointer = (state_base_offset + + : wm_scratch_offset)>>10; + : wm_state->thread2.per_thread_scratch_space = 0; + : wm_state->thread3.const_urb_entry_read_length = 0; + : wm_state->thread3.const_urb_entry_read_offset = 0; + : /* Each pair of attributes (src/mask coords) is one URB entry */ + : if (pMask) + : wm_state->thread3.urb_entry_read_length = 2; + : else + : wm_state->thread3.urb_entry_read_length = 1; + : wm_state->thread3.urb_entry_read_offset = 0; + : /* wm kernel use urb from 3, see wm_program in compiler module */ + : wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + : + : wm_state->wm4.stats_enable = 1; /* statistic */ + : wm_state->wm4.sampler_state_pointer = (state_base_offset + + : src_sampler_offset) >> 5; + : wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ + : wm_state->wm5.max_threads = PS_MAX_THREADS - 1; + : wm_state->wm5.thread_dispatch_enable = 1; + : /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + : * start point + : */ + : wm_state->wm5.enable_16_pix = 1; + : wm_state->wm5.enable_8_pix = 0; + : wm_state->wm5.early_depth_test = 1; + : + : /* Begin the long sequence of commands needed to set up the 3D + : * rendering pipe + : */ + : { + : BEGIN_LP_RING(2); + : OUT_RING(MI_FLUSH | + : MI_STATE_INSTRUCTION_CACHE_FLUSH | + : BRW_MI_GLOBAL_SNAPSHOT_RESET); + : OUT_RING(MI_NOOP); + : ADVANCE_LP_RING(); + : } + : { + : BEGIN_LP_RING(12); + : + : /* Match Mesa driver setup */ + : OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + : + : OUT_RING(BRW_CS_URB_STATE | 0); + : OUT_RING((0 << 4) | /* URB Entry Allocation Size */ + : (0 << 0)); /* Number of URB Entries */ + : + : /* Zero out the two base address registers so all offsets are + : * absolute. + : */ + : OUT_RING(BRW_STATE_BASE_ADDRESS | 4); + : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ + : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + : /* general state max addr, disabled */ + : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); + : /* media object state max addr, disabled */ + : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); + : + : /* Set system instruction pointer */ + : OUT_RING(BRW_STATE_SIP | 0); + : OUT_RING(state_base_offset + sip_kernel_offset); + : OUT_RING(MI_NOOP); + : ADVANCE_LP_RING(); + : } + : { + : BEGIN_LP_RING(26); + : /* Pipe control */ + : OUT_RING(BRW_PIPE_CONTROL | + : BRW_PIPE_CONTROL_NOWRITE | + : BRW_PIPE_CONTROL_IS_FLUSH | + : 2); + : OUT_RING(0); /* Destination address */ + : OUT_RING(0); /* Immediate data low DW */ + : OUT_RING(0); /* Immediate data high DW */ + : + : /* Binding table pointers */ + : OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + : OUT_RING(0); /* vs */ + : OUT_RING(0); /* gs */ + : OUT_RING(0); /* clip */ + : OUT_RING(0); /* sf */ + : /* Only the PS uses the binding table */ + : OUT_RING(state_base_offset + binding_table_offset); /* ps */ + : + : /* The drawing rectangle clipping is always on. Set it to values that + : * shouldn't do any clipping. + : */ + : OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ + : OUT_RING(0x00000000); /* ymin, xmin */ + : OUT_RING(DRAW_YMAX(pDst->drawable.height - 1) | + : DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */ + : OUT_RING(0x00000000); /* yorigin, xorigin */ + : + : /* skip the depth buffer */ + : /* skip the polygon stipple */ + : /* skip the polygon stipple offset */ + : /* skip the line stipple */ + : + : /* Set the pointers to the 3d pipeline state */ + : OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5); + : OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */ + : OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ + : OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ + : OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */ + : OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */ + : OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */ + : + : /* URB fence */ + : OUT_RING(BRW_URB_FENCE | + : UF0_CS_REALLOC | + : UF0_SF_REALLOC | + : UF0_CLIP_REALLOC | + : UF0_GS_REALLOC | + : UF0_VS_REALLOC | + : 1); + : OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + : ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + : ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + : OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + : ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + : + : /* Constant buffer state */ + : OUT_RING(BRW_CS_URB_STATE | 0); + : OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | + : (URB_CS_ENTRIES << 0)); + : ADVANCE_LP_RING(); + : } + : { + : int nelem = pMask ? 3: 2; + : BEGIN_LP_RING(pMask?12:10); + : /* Set up the pointer to our vertex buffer */ + : OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); + : OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) | + : VB0_VERTEXDATA | + : ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT)); + : OUT_RING(state_base_offset + vb_offset); + : OUT_RING(3); + : OUT_RING(0); // ignore for VERTEXDATA, but still there + : + : /* Set up our vertex elements, sourced from the single vertex buffer. + : */ + : OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); + : /* vertex coordinates */ + : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (0 << VE0_OFFSET_SHIFT)); + : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + : (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + : /* u0, v0 */ + : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (8 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + : (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + : /* u1, v1 */ + : if (pMask) { + : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (16 << VE0_OFFSET_SHIFT)); + : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + : (10 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + : } + : + : ADVANCE_LP_RING(); + : } + : + :#ifdef I830DEBUG + : ErrorF("try to sync to show any errors..."); + : I830Sync(pScrn); + :#endif + : return TRUE; + :} + : 33676: add $0x8c,%esp + : 3367c: xor %eax,%eax + : 3367e: pop %ebx + : 3367f: pop %esi + : 33680: pop %edi + : 33681: pop %ebp + : 33682: ret + 30 0.0011 : 33683: andl $0xfffe3fff,(%edx) + 14939 0.5332 : 33689: andb $0xf1,0x2(%edx) + 15110 0.5393 : 3368d: mov 0x29dc(%ebx),%eax + : 33693: movl $0x0,(%eax) + 1 3.6e-05 : 33699: movl $0x0,0x4(%eax) + : 336a0: movl $0x0,0x8(%eax) + 34 0.0012 : 336a7: movl $0x0,0xc(%eax) + : 336ae: mov 0x29dc(%ebx),%edx + : 336b4: xor %eax,%eax + : 336b6: mov %eax,(%edx) + 41 0.0015 : 336b8: mov %eax,0x4(%edx) + : 336bb: mov %eax,0x8(%edx) + : 336be: mov %eax,0xc(%edx) + 36 0.0013 : 336c1: mov 0x29d4(%ebx),%ecx + 33 0.0012 : 336c7: andb $0xdf,0x3(%ecx) + 6323 0.2257 : 336cb: mov 0xc(%ebp),%eax + : 336ce: testb $0x1,0x18(%eax) + : 336d2: jne 337d8 + : 336d8: movzbl 0x4(%ecx),%eax + 88 0.0031 : 336dc: and $0xfffffff8,%eax + : 336df: or $0x4,%eax + : 336e2: mov %al,0x4(%ecx) + : 336e5: movzwl 0x4(%ecx),%eax + 5627 0.2008 : 336e9: and $0xfe3f,%ax + 5 1.8e-04 : 336ed: or $0x1,%ah + 18 6.4e-04 : 336f0: mov %ax,0x4(%ecx) + 17 6.1e-04 : 336f4: movzbl 0x4(%ecx),%eax + 4612 0.1646 : 336f8: and $0xffffffc7,%eax + 16 5.7e-04 : 336fb: or $0x20,%eax + 15 5.4e-04 : 336fe: mov %al,0x4(%ecx) + 43 0.0015 : 33701: mov 0x8(%ecx),%eax + 168 0.0060 : 33704: mov 0x2a48(%ebx),%edx + : 3370a: add 0x2a58(%ebx),%edx + 2 7.1e-05 : 33710: andb $0xfd,0xf(%ecx) + 312 0.0111 : 33714: and $0x1f,%eax + : 33717: and $0xffffffe0,%edx + 9 3.2e-04 : 3371a: or %edx,%eax + : 3371c: mov %eax,0x8(%ecx) + : 3371f: mov 0x1c(%ebp),%edx + : 33722: test %edx,%edx + : 33724: je 337f5 + 13 4.6e-04 : 3372a: mov 0x29d8(%ebx),%eax + 19 6.8e-04 : 33730: movl $0x0,(%eax) + 2 7.1e-05 : 33736: movl $0x0,0x4(%eax) + : 3373d: movl $0x0,0x8(%eax) + 70 0.0025 : 33744: movl $0x0,0xc(%eax) + 2 7.1e-05 : 3374b: mov 0x29d8(%ebx),%ecx + 3 1.1e-04 : 33751: orb $0x10,0x3(%ecx) + 8123 0.2899 : 33755: mov 0x10(%ebp),%edx + : 33758: mov 0x44(%edx),%eax + : 3375b: test %eax,%eax + : 3375d: je 339b7 + : 33763: sub $0x1,%eax + : 33766: jne 33676 + : 3376c: mov (%ecx),%eax + : 3376e: and $0xfffe3fff,%eax + : 33773: or $0x40,%ah + : 33776: mov %eax,(%ecx) + : 33778: movzbl 0x2(%ecx),%eax + : 3377c: and $0xfffffff1,%eax + : 3377f: or $0x2,%eax + : 33782: mov %al,0x2(%ecx) + : 33785: jmp 339c1 + : 3378a: lea 0x0(%esi),%esi + 13 4.6e-04 : 33790: testw $0xfff,0x8(%edx) + 13 4.6e-04 : 33796: je 33316 + 2 7.1e-05 : 3379c: mov 0x8(%ebp),%eax + : 3379f: shl $0x4,%eax + 18 6.4e-04 : 337a2: mov 0xffffb0f8(%ebx,%eax,1),%esi + : 337a9: test %esi,%esi + 1 3.6e-05 : 337ab: je 33316 + : 337b1: mov 0x2a64(%ebx),%eax + 45 0.0016 : 337b7: cmp $0x3,%eax + : 337ba: je 343eb + 3 1.1e-04 : 337c0: cmp $0x13,%eax + : 337c3: jne 33316 + 80 0.0029 : 337c9: movl $0x12,0x2a64(%ebx) + 23 8.2e-04 : 337d3: jmp 33316 + 33 0.0012 : 337d8: andb $0xf8,0x4(%ecx) + 343 0.0122 : 337dc: andw $0xfe3f,0x4(%ecx) + 9699 0.3461 : 337e2: andb $0xc7,0x4(%ecx) + 8396 0.2996 : 337e6: andb $0xfd,0xf(%ecx) + 3 1.1e-04 : 337ea: mov 0x1c(%ebp),%edx + : 337ed: test %edx,%edx + : 337ef: jne 3372a + 20 7.1e-04 : 337f5: mov 0x29e0(%ebx),%eax + 71 0.0025 : 337fb: movl $0x0,(%eax) + 119 0.0042 : 33801: movl $0x0,0x4(%eax) + 27 9.6e-04 : 33808: movl $0x0,0x8(%eax) + 28 1.0e-03 : 3380f: movl $0x0,0xc(%eax) + 29 0.0010 : 33816: movl $0x0,0x10(%eax) + 7 2.5e-04 : 3381d: movl $0x0,0x14(%eax) + 44 0.0016 : 33824: movl $0x0,0x18(%eax) + 131 0.0047 : 3382b: mov 0x29e0(%ebx),%edx + 13 4.6e-04 : 33831: mov 0x10(%edx),%eax + 7635 0.2725 : 33834: and $0xfffc07ff,%eax + 18 6.4e-04 : 33839: or $0x40,%ah + 29 0.0010 : 3383c: mov %eax,0x10(%edx) + 40 0.0014 : 3383f: movzbl 0x18(%edx),%eax + 3770 0.1345 : 33843: andb $0x7,0x12(%edx) + 4124 0.1472 : 33847: and $0xfffffffe,%eax + : 3384a: or $0x2,%eax + : 3384d: mov %al,0x18(%edx) + 1 3.6e-05 : 33850: mov 0xffffffb8(%ebp),%eax + 12 4.3e-04 : 33853: test %eax,%eax + : 33855: je 343d5 + : 3385b: lea 0xffffb414(%ebx),%eax + : 33861: mov %eax,0x4(%esp) + : 33865: mov 0x29f4(%ebx),%eax + : 3386b: movl $0x140,0x8(%esp) + : 33873: mov %eax,(%esp) + : 33876: call 7d88 + : 3387b: jmp 33a90 + : 33880: cmp $0x4,%edi + : 33883: je 341a1 + : 33889: cmp $0x14,%edi + : 3388c: lea 0x0(%esi),%esi + : 33890: jne 33302 + : 33896: movl $0x11,0x2a60(%ebx) + : 338a0: jmp 33302 + 30 0.0011 : 338a5: mov 0x2a58(%ebx),%edx + 28 1.0e-03 : 338ab: mov 0x2a00(%ebx),%eax + 1 3.6e-05 : 338b1: mov %edx,%ecx + : 338b3: add 0x2a08(%ebx),%ecx + 20 7.1e-04 : 338b9: mov %ecx,(%eax) + 2 7.1e-05 : 338bb: add 0x2a0c(%ebx),%edx + 5 1.8e-04 : 338c1: mov %edx,0x4(%eax) + 1 3.6e-05 : 338c4: jmp 33638 + : 338c9: lea 0x0(%esi),%esi + 21 7.5e-04 : 338d0: mov 0x18(%ebp),%ecx + 55 0.0020 : 338d3: movzwl 0xc(%ecx),%eax + 11 3.9e-04 : 338d7: push %eax + 4 1.4e-04 : 338d8: mov 0xffffff9c(%ebp),%esi + 26 9.3e-04 : 338db: fildl (%esp) + 42 0.0015 : 338de: fstps 0x264(%esi) + 6 2.1e-04 : 338e4: movzwl 0xe(%ecx),%eax + 10 3.6e-04 : 338e8: mov %eax,(%esp) + : 338eb: fildl (%esp) + 3 1.1e-04 : 338ee: add $0x4,%esp + 9 3.2e-04 : 338f1: fstps 0x268(%esi) + 6 2.1e-04 : 338f7: mov 0xc(%ebp),%eax + : 338fa: mov 0x40(%eax),%edx + 45 0.0016 : 338fd: mov $0xbf800000,%eax + 6 2.1e-04 : 33902: movl $0x0,0x278(%esi) + 11 3.9e-04 : 3390c: mov %eax,0x26c(%esi) + 2 7.1e-05 : 33912: mov %eax,0x270(%esi) + 8 2.9e-04 : 33918: test %edx,%edx + 3 1.1e-04 : 3391a: mov %edx,0x274(%esi) + 8 2.9e-04 : 33920: je 3393c + : 33922: movswl 0x6(%edx),%eax + : 33926: movswl 0xe(%edx),%edx + : 3392a: cmp $0xffffffff,%eax + : 3392d: je 343b9 + : 33933: sub $0x1,%eax + : 33936: je 341b0 + 18 6.4e-04 : 3393c: movl $0x2,0x2a04(%ebx) + 27 9.6e-04 : 33946: movl $0x0,0x2a1c(%ebx) + 2 7.1e-05 : 33950: movl $0x20,0x2a20(%ebx) + 3 1.1e-04 : 3395a: movl $0x40,0x2a24(%ebx) + 19 6.8e-04 : 33964: movl $0x400,0x2a40(%ebx) + 1 3.6e-05 : 3396e: movl $0x8400,0x2a28(%ebx) + 5 1.8e-04 : 33978: movl $0x8440,0x2a34(%ebx) + 28 1.0e-03 : 33982: movl $0x8580,0x2a38(%ebx) + 32 0.0011 : 3398c: movl $0x89e0,0x2a4c(%ebx) + 14 5.0e-04 : 33996: movl $0x8580,0xffffffc0(%ebp) + 17 6.1e-04 : 3399d: movl $0x0,0xffffffb8(%ebp) + 21 7.5e-04 : 339a4: movl $0x0,0xffffffa8(%ebp) + 5 1.8e-04 : 339ab: movl $0x0,0xffffffac(%ebp) + 10 3.6e-04 : 339b2: jmp 3301a + 22 7.9e-04 : 339b7: andl $0xfffe3fff,(%ecx) + 7801 0.2784 : 339bd: andb $0xf1,0x2(%ecx) + 7750 0.2766 : 339c1: mov 0x10(%ebp),%esi + : 339c4: testb $0x1,0x18(%esi) + : 339c8: jne 34170 + : 339ce: movzbl 0x4(%ecx),%eax + 85 0.0030 : 339d2: and $0xfffffff8,%eax + : 339d5: or $0x4,%eax + : 339d8: mov %al,0x4(%ecx) + : 339db: movzwl 0x4(%ecx),%eax + 7606 0.2714 : 339df: and $0xfe3f,%ax + 17 6.1e-04 : 339e3: or $0x1,%ah + 45 0.0016 : 339e6: mov %ax,0x4(%ecx) + 35 0.0012 : 339ea: movzbl 0x4(%ecx),%eax + 6070 0.2166 : 339ee: and $0xffffffc7,%eax + 16 5.7e-04 : 339f1: or $0x20,%eax + 21 7.5e-04 : 339f4: mov %al,0x4(%ecx) + 10 3.6e-04 : 339f7: mov 0x8(%ecx),%eax + : 339fa: mov 0x2a48(%ebx),%edx + : 33a00: add 0x2a58(%ebx),%edx + 8 2.9e-04 : 33a06: and $0x1f,%eax + 10 3.6e-04 : 33a09: and $0xffffffe0,%edx + : 33a0c: or %edx,%eax + 3 1.1e-04 : 33a0e: mov %eax,0x8(%ecx) + : 33a11: andb $0xfd,0xf(%ecx) + 161 0.0057 : 33a15: mov 0x29e0(%ebx),%eax + 2 7.1e-05 : 33a1b: movl $0x0,(%eax) + : 33a21: movl $0x0,0x4(%eax) + 30 0.0011 : 33a28: movl $0x0,0x8(%eax) + 11 3.9e-04 : 33a2f: movl $0x0,0xc(%eax) + 8 2.9e-04 : 33a36: movl $0x0,0x10(%eax) + 9 3.2e-04 : 33a3d: movl $0x0,0x14(%eax) + 53 0.0019 : 33a44: movl $0x0,0x18(%eax) + 65 0.0023 : 33a4b: mov 0x29e0(%ebx),%edx + 7 2.5e-04 : 33a51: mov 0x10(%edx),%eax + 7749 0.2766 : 33a54: and $0xfffc07ff,%eax + 20 7.1e-04 : 33a59: or $0x40,%ah + 30 0.0011 : 33a5c: mov %eax,0x10(%edx) + 24 8.6e-04 : 33a5f: movzbl 0x18(%edx),%eax + 2954 0.1054 : 33a63: andb $0x7,0x12(%edx) + 4359 0.1556 : 33a67: and $0xfffffffe,%eax + : 33a6a: or $0x2,%eax + : 33a6d: mov %al,0x18(%edx) + 1 3.6e-05 : 33a70: lea 0xffffb274(%ebx),%eax + 47 0.0017 : 33a76: mov %eax,0x4(%esp) + 22 7.9e-04 : 33a7a: mov 0x29f4(%ebx),%eax + : 33a80: movl $0x110,0x8(%esp) + 229 0.0082 : 33a88: mov %eax,(%esp) + 4 1.4e-04 : 33a8b: call 7d88 + 49 0.0017 : 33a90: mov 0x29e4(%ebx),%edi + : 33a96: xor %eax,%eax + : 33a98: mov $0x8,%ecx + : 33a9d: cld + 509 0.0182 : 33a9e: rep stos %eax,%es:(%edi) + 235 0.0084 : 33aa0: mov 0x29e4(%ebx),%eax + 31 0.0011 : 33aa6: mov 0x2a34(%ebx),%ecx + 21 7.5e-04 : 33aac: add 0x2a58(%ebx),%ecx + 60 0.0021 : 33ab2: mov (%eax),%edx + 19787 0.7062 : 33ab4: and $0xffffffc0,%ecx + : 33ab7: andb $0x81,0xf(%eax) + 13800 0.4925 : 33abb: andb $0x3,0xe(%eax) + 7250 0.2587 : 33abf: and $0x3f,%edx + 27 9.6e-04 : 33ac2: or %ecx,%edx + 7 2.5e-04 : 33ac4: mov %edx,(%eax) + 1 3.6e-05 : 33ac6: mov 0xc(%eax),%edx + 16615 0.5930 : 33ac9: orb $0x80,0x7(%eax) + 6135 0.2189 : 33acd: andw $0xfc03,0x6(%eax) + 31133 1.1111 : 33ad3: andb $0xf0,0x8(%eax) + 958 0.0342 : 33ad7: and $0xfffe07ff,%edx + : 33add: or $0x8,%dh + 29 0.0010 : 33ae0: mov %edx,0xc(%eax) + 2 7.1e-05 : 33ae3: movzwl 0xc(%eax),%edx + 14701 0.5247 : 33ae7: andb $0xf1,(%eax) + 1449 0.0517 : 33aea: andb $0xfc,0x6(%eax) + 12439 0.4439 : 33aee: orb $0x28,0x5(%eax) + 2268 0.0809 : 33af2: and $0xfc0f,%dx + : 33af7: or $0x10,%edx + 22 7.9e-04 : 33afa: mov %dx,0xc(%eax) + 10 3.6e-04 : 33afe: movzbl 0xc(%eax),%edx + 14092 0.5029 : 33b02: orb $0x80,0x4(%eax) + 40 0.0014 : 33b06: andl $0x3ff,0x8(%eax) + 2795 0.0997 : 33b0d: and $0xfffffff0,%edx + : 33b10: or $0x3,%edx + 14 5.0e-04 : 33b13: mov %dl,0xc(%eax) + 20 7.1e-04 : 33b16: andb $0x81,0x13(%eax) + 898 0.0320 : 33b1a: movzbl 0x12(%eax),%edx + 1120 0.0400 : 33b1e: andb $0xfd,0x1a(%eax) + 978 0.0349 : 33b22: andb $0xfd,0x14(%eax) + 347 0.0124 : 33b26: and $0x7,%edx + : 33b29: or $0x8,%edx + 20 7.1e-04 : 33b2c: mov %dl,0x12(%eax) + 7 2.5e-04 : 33b2f: mov 0x10(%eax),%edx + 16298 0.5817 : 33b32: and $0xfffc07ff,%edx + 25 8.9e-04 : 33b38: or $0x8,%dh + 65 0.0023 : 33b3b: mov %edx,0x10(%eax) + 54 0.0019 : 33b3e: movzbl 0x1b(%eax),%edx + 324 0.0116 : 33b42: orb $0x4,0x11(%eax) + 13376 0.4774 : 33b46: and $0xffffff9f,%edx + : 33b49: or $0x20,%edx + : 33b4c: mov %dl,0x1b(%eax) + : 33b4f: movzbl 0x1f(%eax),%edx + 38 0.0014 : 33b53: and $0xfffffff9,%edx + : 33b56: or $0x4,%edx + : 33b59: mov %dl,0x1f(%eax) + 1 3.6e-05 : 33b5c: movzbl 0x19(%eax),%edx + 139 0.0050 : 33b60: and $0xffffffe1,%edx + : 33b63: or $0x10,%edx + : 33b66: mov %dl,0x19(%eax) + 2 7.1e-05 : 33b69: mov 0x18(%eax),%edx + 11186 0.3992 : 33b6c: and $0xfffe1fff,%edx + 36 0.0013 : 33b72: or $0x10000,%edx + 42 0.0015 : 33b78: mov %edx,0x18(%eax) + 22 7.9e-04 : 33b7b: mov 0x1c(%ebp),%eax + : 33b7e: test %eax,%eax + : 33b80: je 342e5 + 11 3.9e-04 : 33b86: mov 0x10(%ebp),%eax + 1 3.6e-05 : 33b89: testb $0x1,0x19(%eax) + 8 2.9e-04 : 33b8d: je 34234 + 4 1.4e-04 : 33b93: testw $0xfff,0x8(%eax) + 2 7.1e-05 : 33b99: je 34234 + : 33b9f: shll $0x4,0x8(%ebp) + 22 7.9e-04 : 33ba3: mov 0x8(%ebp),%edx + 1 3.6e-05 : 33ba6: mov 0xffffb0f8(%ebx,%edx,1),%eax + 15 5.4e-04 : 33bad: test %eax,%eax + : 33baf: je 343c7 + 7 2.5e-04 : 33bb5: lea 0xffffb674(%ebx),%eax + 18 6.4e-04 : 33bbb: nop + : 33bbc: lea 0x0(%esi),%esi + 19 6.8e-04 : 33bc0: mov %eax,0x4(%esp) + 75 0.0027 : 33bc4: mov 0x29f8(%ebx),%eax + : 33bca: movl $0x5f0,0x8(%esp) + 9 3.2e-04 : 33bd2: mov %eax,(%esp) + 20 7.1e-04 : 33bd5: call 7d88 + 24 8.6e-04 : 33bda: mov 0x29e8(%ebx),%edi + : 33be0: xor %eax,%eax + : 33be2: mov $0x8,%ecx + : 33be7: cld + 284 0.0101 : 33be8: rep stos %eax,%es:(%edi) + 149 0.0053 : 33bea: mov 0x29e8(%ebx),%ecx + 19 6.8e-04 : 33bf0: mov 0x2a58(%ebx),%esi + 10 3.6e-04 : 33bf6: mov (%ecx),%eax + 10010 0.3572 : 33bf8: mov %esi,%edx + : 33bfa: add 0x2a38(%ebx),%edx + : 33c00: orb $0x80,0x7(%ecx) + 5995 0.2140 : 33c04: and $0x3f,%eax + : 33c07: and $0xffffffc0,%edx + : 33c0a: or %edx,%eax + : 33c0c: mov %esi,%edx + 21 7.5e-04 : 33c0e: mov %eax,(%ecx) + : 33c10: movzbl (%ecx),%eax + 7306 0.2607 : 33c13: and $0xfffffff1,%eax + 14 5.0e-04 : 33c16: or $0x2,%eax + 17 6.1e-04 : 33c19: mov %al,(%ecx) + 20 7.1e-04 : 33c1b: movzwl 0x6(%ecx),%eax + 4771 0.1703 : 33c1f: and $0xfc03,%ax + 25 8.9e-04 : 33c23: or $0xc,%eax + 27 9.6e-04 : 33c26: mov %ax,0x6(%ecx) + 22 7.9e-04 : 33c2a: mov 0x8(%ecx),%eax + 737 0.0263 : 33c2d: add 0x2a40(%ebx),%edx + : 33c33: andb $0x81,0xf(%ecx) + 3387 0.1209 : 33c37: andb $0x3,0xe(%ecx) + 4057 0.1448 : 33c3b: and $0xfffffc00,%edx + 5 1.8e-04 : 33c41: and $0x3ff,%eax + 5 1.8e-04 : 33c46: or %edx,%eax + : 33c48: mov %eax,0x8(%ecx) + 13 4.6e-04 : 33c4b: mov 0xc(%ecx),%eax + 7489 0.2673 : 33c4e: andb $0xf0,0x8(%ecx) + 2944 0.1051 : 33c52: and $0xfffe07ff,%eax + : 33c57: or $0x10,%ah + 1 3.6e-05 : 33c5a: mov %eax,0xc(%ecx) + 30 0.0011 : 33c5d: andw $0xfc0f,0xc(%ecx) + 13013 0.4644 : 33c63: movzbl 0xc(%ecx),%eax + 12997 0.4638 : 33c67: orb $0x1,0x10(%ecx) + 2 7.1e-05 : 33c6b: movzbl 0x16(%ecx),%edx + 3 1.1e-04 : 33c6f: and $0xfffffff0,%eax + 36 0.0013 : 33c72: or $0x3,%eax + 28 1.0e-03 : 33c75: mov %al,0xc(%ecx) + 39 0.0014 : 33c78: mov 0x10(%ecx),%eax + 10942 0.3905 : 33c7b: or $0xc,%edx + : 33c7e: add 0x2a14(%ebx),%esi + 27 9.6e-04 : 33c84: mov %dl,0x16(%ecx) + 1 3.6e-05 : 33c87: and $0xffffffe0,%esi + 12 4.3e-04 : 33c8a: and $0x1f,%eax + 14 5.0e-04 : 33c8d: or %esi,%eax + 13 4.6e-04 : 33c8f: mov %eax,0x10(%ecx) + 47 0.0017 : 33c92: movzbl 0x10(%ecx),%eax + 10716 0.3824 : 33c96: and $0xffffffe3,%eax + 27 9.6e-04 : 33c99: or $0x4,%eax + 39 0.0014 : 33c9c: mov %al,0x10(%ecx) + 39 0.0014 : 33c9f: movzbl 0x17(%ecx),%eax + 1 3.6e-05 : 33ca3: and $0xffffff81,%eax + : 33ca6: or $0x3e,%eax + : 33ca9: mov %al,0x17(%ecx) + 34 0.0012 : 33cac: movzbl 0x14(%ecx),%eax + 59 0.0021 : 33cb0: or $0x2,%eax + : 33cb3: and $0xfffffffe,%eax + : 33cb6: mov %al,0x14(%ecx) + 33 0.0012 : 33cb9: mov 0xffffff9c(%ebp),%ecx + : 33cbc: mov 0x6c(%ecx),%edx + : 33cbf: cmpl $0x7,0x14(%edx) + 50 0.0018 : 33cc3: jle 34251 + 25 8.9e-04 : 33cc9: mov 0x10(%edx),%eax + 1 3.6e-05 : 33ccc: mov 0x8(%edx),%ecx + 2 7.1e-05 : 33ccf: mov (%edx),%esi + : 33cd1: lea (%ecx,%eax,1),%edx + 21 7.5e-04 : 33cd4: add $0x4,%eax + : 33cd7: and %esi,%eax + 2 7.1e-05 : 33cd9: add %eax,%ecx + : 33cdb: movl $0x200000a,(%edx) + 35 0.0012 : 33ce1: lea 0x4(%eax),%edx + : 33ce4: movl $0x0,(%ecx) + 20 7.1e-04 : 33cea: mov 0xffffff9c(%ebp),%ecx + 2 7.1e-05 : 33ced: and %esi,%edx + 33 0.0012 : 33cef: mov 0x6c(%ecx),%eax + 2 7.1e-05 : 33cf2: mov %edx,0x10(%eax) + 7 2.5e-04 : 33cf5: mov 0x6c(%ecx),%eax + 6 2.1e-04 : 33cf8: subl $0x8,0x14(%eax) + 38 0.0014 : 33cfc: test $0x7,%dl + 2 7.1e-05 : 33cff: jne 343fa + 78 0.0028 : 33d05: mov 0xffffff9c(%ebp),%esi + 12 4.3e-04 : 33d08: mov (%esi),%eax + 41 0.0015 : 33d0a: mov %edx,0x2030(%eax) + 4758 0.1698 : 33d10: mov 0x6c(%esi),%edx + 106 0.0038 : 33d13: cmpl $0x2f,0x14(%edx) + 117 0.0042 : 33d17: jle 34277 + 32 0.0011 : 33d1d: mov 0x8(%edx),%ecx + 4 1.4e-04 : 33d20: mov 0x10(%edx),%eax + : 33d23: mov (%edx),%esi + 20 7.1e-04 : 33d25: lea (%ecx,%eax,1),%edx + 3 1.1e-04 : 33d28: add $0x4,%eax + 11 3.9e-04 : 33d2b: and %esi,%eax + 23 8.2e-04 : 33d2d: movl $0x61040000,(%edx) + 126 0.0045 : 33d33: lea (%ecx,%eax,1),%edx + : 33d36: add $0x4,%eax + 4 1.4e-04 : 33d39: and %esi,%eax + : 33d3b: movl $0x60010000,(%edx) + 122 0.0044 : 33d41: lea (%ecx,%eax,1),%edx + : 33d44: add $0x4,%eax + 4 1.4e-04 : 33d47: and %esi,%eax + 10 3.6e-04 : 33d49: movl $0x0,(%edx) + 65 0.0023 : 33d4f: lea (%ecx,%eax,1),%edx + : 33d52: add $0x4,%eax + 1 3.6e-05 : 33d55: and %esi,%eax + 5 1.8e-04 : 33d57: movl $0x61010004,(%edx) + 77 0.0027 : 33d5d: lea (%ecx,%eax,1),%edx + : 33d60: add $0x4,%eax + : 33d63: and %esi,%eax + 2 7.1e-05 : 33d65: movl $0x1,(%edx) + 60 0.0021 : 33d6b: lea (%ecx,%eax,1),%edx + : 33d6e: add $0x4,%eax + 3 1.1e-04 : 33d71: and %esi,%eax + 5 1.8e-04 : 33d73: movl $0x1,(%edx) + 85 0.0030 : 33d79: lea (%ecx,%eax,1),%edx + : 33d7c: add $0x4,%eax + 1 3.6e-05 : 33d7f: and %esi,%eax + 81 0.0029 : 33d81: movl $0x1,(%edx) + 56 0.0020 : 33d87: lea (%ecx,%eax,1),%edx + : 33d8a: add $0x4,%eax + 5 1.8e-04 : 33d8d: and %esi,%eax + 1 3.6e-05 : 33d8f: movl $0x10000001,(%edx) + 58 0.0021 : 33d95: lea (%ecx,%eax,1),%edx + : 33d98: add $0x4,%eax + 1 3.6e-05 : 33d9b: and %esi,%eax + 1 3.6e-05 : 33d9d: movl $0x10000001,(%edx) + 72 0.0026 : 33da3: lea (%ecx,%eax,1),%edx + : 33da6: add $0x4,%eax + 2 7.1e-05 : 33da9: movl $0x61020000,(%edx) + 85 0.0030 : 33daf: and %esi,%eax + : 33db1: mov 0x2a3c(%ebx),%edx + 91 0.0032 : 33db7: add 0x2a58(%ebx),%edx + 95 0.0034 : 33dbd: lea (%ecx,%eax,1),%edi + 1 3.6e-05 : 33dc0: add $0x4,%eax + 9 3.2e-04 : 33dc3: and %esi,%eax + : 33dc5: add %eax,%ecx + 27 9.6e-04 : 33dc7: mov %edx,(%edi) + 12 4.3e-04 : 33dc9: lea 0x4(%eax),%edx + : 33dcc: movl $0x0,(%ecx) + 56 0.0020 : 33dd2: mov 0xffffff9c(%ebp),%ecx + 125 0.0045 : 33dd5: and %esi,%edx + : 33dd7: mov 0x6c(%ecx),%eax + 147 0.0052 : 33dda: mov %edx,0x10(%eax) + 130 0.0046 : 33ddd: mov 0x6c(%ecx),%eax + 122 0.0044 : 33de0: subl $0x30,0x14(%eax) + 583 0.0208 : 33de4: test $0x7,%dl + : 33de7: jne 343fa + 4 1.4e-04 : 33ded: mov 0xffffff9c(%ebp),%esi + 202 0.0072 : 33df0: mov (%esi),%eax + 194 0.0069 : 33df2: mov %edx,0x2030(%eax) + 16218 0.5788 : 33df8: mov 0x6c(%esi),%edx + 172 0.0061 : 33dfb: cmpl $0x67,0x14(%edx) + 133 0.0047 : 33dff: jle 342a0 + 122 0.0044 : 33e05: mov (%edx),%ecx + 9 3.2e-04 : 33e07: mov 0x10(%edx),%eax + 4 1.4e-04 : 33e0a: mov %ecx,0xffffffd8(%ebp) + 18 6.4e-04 : 33e0d: mov 0x8(%edx),%edi + 14 5.0e-04 : 33e10: lea (%edi,%eax,1),%edx + 32 0.0011 : 33e13: add $0x4,%eax + : 33e16: and %ecx,%eax + 7 2.5e-04 : 33e18: movl $0x7a000802,(%edx) + 179 0.0064 : 33e1e: lea (%edi,%eax,1),%edx + 2 7.1e-05 : 33e21: add $0x4,%eax + : 33e24: and %ecx,%eax + : 33e26: movl $0x0,(%edx) + 54 0.0019 : 33e2c: lea (%edi,%eax,1),%edx + 5 1.8e-04 : 33e2f: add $0x4,%eax + : 33e32: and %ecx,%eax + 3 1.1e-04 : 33e34: movl $0x0,(%edx) + 68 0.0024 : 33e3a: lea (%edi,%eax,1),%edx + 1 3.6e-05 : 33e3d: add $0x4,%eax + : 33e40: and %ecx,%eax + 58 0.0021 : 33e42: movl $0x0,(%edx) + 76 0.0027 : 33e48: lea (%edi,%eax,1),%edx + : 33e4b: add $0x4,%eax + : 33e4e: and %ecx,%eax + 1 3.6e-05 : 33e50: movl $0x78010004,(%edx) + 54 0.0019 : 33e56: lea (%edi,%eax,1),%edx + 2 7.1e-05 : 33e59: add $0x4,%eax + : 33e5c: and %ecx,%eax + 4 1.4e-04 : 33e5e: movl $0x0,(%edx) + 88 0.0031 : 33e64: lea (%edi,%eax,1),%edx + 1 3.6e-05 : 33e67: add $0x4,%eax + : 33e6a: and %ecx,%eax + 1 3.6e-05 : 33e6c: movl $0x0,(%edx) + 56 0.0020 : 33e72: lea (%edi,%eax,1),%edx + 1 3.6e-05 : 33e75: add $0x4,%eax + : 33e78: and %ecx,%eax + : 33e7a: movl $0x0,(%edx) + 73 0.0026 : 33e80: lea (%edi,%eax,1),%edx + 9 3.2e-04 : 33e83: add $0x4,%eax + : 33e86: movl $0x0,(%edx) + 67 0.0024 : 33e8c: mov 0x2a58(%ebx),%esi + 78 0.0028 : 33e92: and %ecx,%eax + 2 7.1e-05 : 33e94: lea (%edi,%eax,1),%ecx + : 33e97: add $0x4,%eax + 28 1.0e-03 : 33e9a: mov %esi,%edx + 6 2.1e-04 : 33e9c: add 0x2a44(%ebx),%edx + 58 0.0021 : 33ea2: mov %edx,(%ecx) + 39 0.0014 : 33ea4: and 0xffffffd8(%ebp),%eax + 92 0.0033 : 33ea7: lea (%edi,%eax,1),%edx + 24 8.6e-04 : 33eaa: add $0x4,%eax + 6 2.1e-04 : 33ead: movl $0x79000002,(%edx) + 160 0.0057 : 33eb3: and 0xffffffd8(%ebp),%eax + 13 4.6e-04 : 33eb6: lea (%edi,%eax,1),%edx + 25 8.9e-04 : 33eb9: add $0x4,%eax + 6 2.1e-04 : 33ebc: movl $0x0,(%edx) + 206 0.0074 : 33ec2: mov 0x20(%ebp),%ecx + 112 0.0040 : 33ec5: and 0xffffffd8(%ebp),%eax + 39 0.0014 : 33ec8: lea (%edi,%eax,1),%edx + 25 8.9e-04 : 33ecb: add $0x4,%eax + 5 1.8e-04 : 33ece: mov %edx,0xffffffdc(%ebp) + 21 7.5e-04 : 33ed1: movzwl 0xe(%ecx),%edx + 80 0.0029 : 33ed5: movzwl 0xc(%ecx),%ecx + 14 5.0e-04 : 33ed9: sub $0x1,%edx + 3 1.1e-04 : 33edc: mov %ecx,0xffffff84(%ebp) + 35 0.0012 : 33edf: shl $0x10,%edx + 4 1.4e-04 : 33ee2: sub $0x1,%ecx + 12 4.3e-04 : 33ee5: or %ecx,%edx + 27 9.6e-04 : 33ee7: mov 0xffffffdc(%ebp),%ecx + 24 8.6e-04 : 33eea: mov %edx,(%ecx) + 145 0.0052 : 33eec: and 0xffffffd8(%ebp),%eax + 230 0.0082 : 33eef: lea (%edi,%eax,1),%edx + 39 0.0014 : 33ef2: add $0x4,%eax + 12 4.3e-04 : 33ef5: movl $0x0,(%edx) + 182 0.0065 : 33efb: and 0xffffffd8(%ebp),%eax + 4 1.4e-04 : 33efe: lea (%edi,%eax,1),%edx + 118 0.0042 : 33f01: add $0x4,%eax + 3 1.1e-04 : 33f04: movl $0x78000005,(%edx) + 175 0.0062 : 33f0a: mov %esi,%edx + 7 2.5e-04 : 33f0c: and 0xffffffd8(%ebp),%eax + 11 3.9e-04 : 33f0f: add 0x2a1c(%ebx),%edx + 154 0.0055 : 33f15: lea (%edi,%eax,1),%ecx + 3 1.1e-04 : 33f18: add $0x4,%eax + 17 6.1e-04 : 33f1b: mov %edx,(%ecx) + 169 0.0060 : 33f1d: and 0xffffffd8(%ebp),%eax + : 33f20: lea (%edi,%eax,1),%edx + 24 8.6e-04 : 33f23: add $0x4,%eax + 3 1.1e-04 : 33f26: movl $0x0,(%edx) + 154 0.0055 : 33f2c: and 0xffffffd8(%ebp),%eax + 563 0.0201 : 33f2f: lea (%edi,%eax,1),%edx + 728 0.0260 : 33f32: add $0x4,%eax + 1721 0.0614 : 33f35: movl $0x0,(%edx) + 4024 0.1436 : 33f3b: mov %esi,%edx + : 33f3d: and 0xffffffd8(%ebp),%eax + 173 0.0062 : 33f40: add 0x2a20(%ebx),%edx + 36 0.0013 : 33f46: lea (%edi,%eax,1),%ecx + 11 3.9e-04 : 33f49: add $0x4,%eax + 13 4.6e-04 : 33f4c: mov %edx,(%ecx) + 168 0.0060 : 33f4e: mov %esi,%edx + : 33f50: and 0xffffffd8(%ebp),%eax + 7 2.5e-04 : 33f53: add 0x2a24(%ebx),%edx + 25 8.9e-04 : 33f59: lea (%edi,%eax,1),%ecx + 28 1.0e-03 : 33f5c: add $0x4,%eax + 19 6.8e-04 : 33f5f: mov %edx,(%ecx) + 176 0.0063 : 33f61: and 0xffffffd8(%ebp),%eax + : 33f64: add 0x2a28(%ebx),%esi + 25 8.9e-04 : 33f6a: lea (%edi,%eax,1),%edx + 1 3.6e-05 : 33f6d: add $0x4,%eax + 18 6.4e-04 : 33f70: mov %esi,(%edx) + 180 0.0064 : 33f72: and 0xffffffd8(%ebp),%eax + 1 3.6e-05 : 33f75: lea (%edi,%eax,1),%edx + 28 1.0e-03 : 33f78: add $0x4,%eax + 2 7.1e-05 : 33f7b: movl $0x60002f01,(%edx) + 214 0.0076 : 33f81: mov 0x29b4(%ebx),%edx + : 33f87: and 0xffffffd8(%ebp),%eax + 30 0.0011 : 33f8a: add 0x29b0(%ebx),%edx + 37 0.0013 : 33f90: mov 0x29a4(%ebx),%ecx + : 33f96: lea (%edi,%eax,1),%esi + 2 7.1e-05 : 33f99: add $0x4,%eax + 10 3.6e-04 : 33f9c: mov %esi,0xffffffe0(%ebp) + 20 7.1e-04 : 33f9f: mov 0x29ac(%ebx),%esi + : 33fa5: shl $0x14,%edx + 2 7.1e-05 : 33fa8: add 0x29a0(%ebx),%ecx + 57 0.0020 : 33fae: add 0x29a8(%ebx),%esi + 36 0.0013 : 33fb4: or %ecx,%edx + 21 7.5e-04 : 33fb6: shl $0xa,%esi + 12 4.3e-04 : 33fb9: or %edx,%esi + 32 0.0011 : 33fbb: mov 0xffffffe0(%ebp),%edx + 3 1.1e-04 : 33fbe: mov %esi,(%edx) + 133 0.0047 : 33fc0: mov 0x29c4(%ebx),%edx + 118 0.0042 : 33fc6: add 0x29c0(%ebx),%edx + 15 5.4e-04 : 33fcc: mov 0x29bc(%ebx),%ecx + 5 1.8e-04 : 33fd2: add 0x29b8(%ebx),%ecx + 46 0.0016 : 33fd8: and 0xffffffd8(%ebp),%eax + 18 6.4e-04 : 33fdb: shl $0x14,%edx + 19 6.8e-04 : 33fde: or %ecx,%edx + 15 5.4e-04 : 33fe0: lea (%edi,%eax,1),%esi + 9 3.2e-04 : 33fe3: add $0x4,%eax + 2 7.1e-05 : 33fe6: mov %edx,(%esi) + 132 0.0047 : 33fe8: and 0xffffffd8(%ebp),%eax + 1 3.6e-05 : 33feb: lea (%edi,%eax,1),%edx + 14 5.0e-04 : 33fee: add $0x4,%eax + 3 1.1e-04 : 33ff1: movl $0x60010000,(%edx) + 162 0.0058 : 33ff7: and 0xffffffd8(%ebp),%eax + 1 3.6e-05 : 33ffa: add %eax,%edi + 27 9.6e-04 : 33ffc: movl $0xfffffff0,(%edi) + 271 0.0097 : 34002: mov 0xffffff9c(%ebp),%ecx + 3 1.1e-04 : 34005: lea 0x4(%eax),%edx + : 34008: and 0xffffffd8(%ebp),%edx + 21 7.5e-04 : 3400b: mov 0x6c(%ecx),%eax + 44 0.0016 : 3400e: mov %edx,0x10(%eax) + 167 0.0060 : 34011: mov 0x6c(%ecx),%eax + 154 0.0055 : 34014: subl $0x68,0x14(%eax) + 213 0.0076 : 34018: test $0x7,%dl + : 3401b: jne 343fa + : 34021: mov 0xffffff9c(%ebp),%esi + 202 0.0072 : 34024: mov (%esi),%eax + 62 0.0022 : 34026: mov %edx,0x2030(%eax) + 10060 0.3590 : 3402c: mov 0x1c(%ebp),%eax + 4 1.4e-04 : 3402f: test %eax,%eax + : 34031: je 34398 + : 34037: movl $0x30,0xffffffbc(%ebp) + 2 7.1e-05 : 3403e: movl $0x18,0xffffffc4(%ebp) + 17 6.1e-04 : 34045: movl $0x78090005,0xffffffc8(%ebp) + 1 3.6e-05 : 3404c: movl $0x30,0xffffffcc(%ebp) + 17 6.1e-04 : 34053: mov 0xffffff9c(%ebp),%eax + 10 3.6e-04 : 34056: mov 0xffffffbc(%ebp),%ecx + 13 4.6e-04 : 34059: mov 0x6c(%eax),%edx + 6 2.1e-04 : 3405c: cmp 0x14(%edx),%ecx + 504 0.0180 : 3405f: jg 342c3 + 6 2.1e-04 : 34065: mov 0x8(%edx),%esi + 19 6.8e-04 : 34068: mov 0x10(%edx),%eax + 447 0.0160 : 3406b: mov (%edx),%edi + 15 5.4e-04 : 3406d: lea (%esi,%eax,1),%edx + 1 3.6e-05 : 34070: add $0x4,%eax + 14 5.0e-04 : 34073: movl $0x78080003,(%edx) + 12 4.3e-04 : 34079: mov 0xffffffc4(%ebp),%ecx + 34 0.0012 : 3407c: and %edi,%eax + : 3407e: lea (%esi,%eax,1),%edx + 33 0.0012 : 34081: add $0x4,%eax + : 34084: and %edi,%eax + 16 5.7e-04 : 34086: mov %ecx,(%edx) + 37 0.0013 : 34088: mov 0x2a2c(%ebx),%edx + 16 5.7e-04 : 3408e: lea (%esi,%eax,1),%ecx + : 34091: add 0x2a58(%ebx),%edx + 42 0.0015 : 34097: add $0x4,%eax + 9 3.2e-04 : 3409a: and %edi,%eax + 15 5.4e-04 : 3409c: mov %edx,(%ecx) + 6 2.1e-04 : 3409e: lea (%esi,%eax,1),%edx + 7 2.5e-04 : 340a1: add $0x4,%eax + 12 4.3e-04 : 340a4: and %edi,%eax + 10 3.6e-04 : 340a6: movl $0x3,(%edx) + 19 6.8e-04 : 340ac: lea (%esi,%eax,1),%edx + 10 3.6e-04 : 340af: add $0x4,%eax + 10 3.6e-04 : 340b2: movl $0x0,(%edx) + 16 5.7e-04 : 340b8: mov 0xffffffc8(%ebp),%ecx + 55 0.0020 : 340bb: and %edi,%eax + 15 5.4e-04 : 340bd: lea (%esi,%eax,1),%edx + 15 5.4e-04 : 340c0: add $0x4,%eax + 16 5.7e-04 : 340c3: and %edi,%eax + 2 7.1e-05 : 340c5: mov %ecx,(%edx) + 33 0.0012 : 340c7: lea (%esi,%eax,1),%edx + 12 4.3e-04 : 340ca: add $0x4,%eax + 8 2.9e-04 : 340cd: and %edi,%eax + 4 1.4e-04 : 340cf: mov $0x28,%ecx + 12 4.3e-04 : 340d4: movl $0x4850000,(%edx) + 14 5.0e-04 : 340da: lea (%esi,%eax,1),%edx + 7 2.5e-04 : 340dd: add $0x4,%eax + 1 3.6e-05 : 340e0: and %edi,%eax + 11 3.9e-04 : 340e2: movl $0x11330004,(%edx) + 34 0.0012 : 340e8: lea (%esi,%eax,1),%edx + 2 7.1e-05 : 340eb: add $0x4,%eax + : 340ee: and %edi,%eax + 9 3.2e-04 : 340f0: movl $0x4850008,(%edx) + 22 7.9e-04 : 340f6: lea (%esi,%eax,1),%edx + 8 2.9e-04 : 340f9: movl $0x11000008,(%edx) + 27 9.6e-04 : 340ff: lea 0x4(%eax),%edx + 32 0.0011 : 34102: mov 0x1c(%ebp),%eax + 94 0.0034 : 34105: and %edi,%edx + 6 2.1e-04 : 34107: test %eax,%eax + : 34109: je 34129 + 14 5.0e-04 : 3410b: lea (%esi,%edx,1),%eax + : 3410e: mov $0x30,%cl + : 34110: movl $0x4850010,(%eax) + 12 4.3e-04 : 34116: lea 0x4(%edx),%eax + 9 3.2e-04 : 34119: and %edi,%eax + : 3411b: lea (%esi,%eax,1),%edx + 2 7.1e-05 : 3411e: movl $0x1100000a,(%edx) + 20 7.1e-04 : 34124: lea 0x4(%eax),%edx + 9 3.2e-04 : 34127: and %edi,%edx + 20 7.1e-04 : 34129: cmp 0xffffffcc(%ebp),%ecx + 10 3.6e-04 : 3412c: ja 34416 + 58 0.0021 : 34132: jb 34439 + 9 3.2e-04 : 34138: mov 0xffffff9c(%ebp),%esi + 92 0.0033 : 3413b: mov 0x6c(%esi),%eax + 118 0.0042 : 3413e: mov %edx,0x10(%eax) + 55 0.0020 : 34141: mov 0x6c(%esi),%eax + 22 7.9e-04 : 34144: sub %ecx,0x14(%eax) + 249 0.0089 : 34147: test $0x7,%dl + : 3414a: jne 343fa + 8 2.9e-04 : 34150: mov 0xffffff9c(%ebp),%ecx + 224 0.0080 : 34153: mov (%ecx),%eax + 199 0.0071 : 34155: mov %edx,0x2030(%eax) + 16824 0.6004 : 3415b: add $0x8c,%esp + : 34161: mov $0x1,%eax + : 34166: pop %ebx + 9 3.2e-04 : 34167: pop %esi + 34 0.0012 : 34168: pop %edi + 1 3.6e-05 : 34169: pop %ebp + : 3416a: ret + : 3416b: nop + : 3416c: lea 0x0(%esi),%esi + : 34170: andb $0xf8,0x4(%ecx) + 1 3.6e-05 : 34174: andw $0xfe3f,0x4(%ecx) + 2 7.1e-05 : 3417a: andb $0xc7,0x4(%ecx) + 2 7.1e-05 : 3417e: jmp 33a11 + : 34183: mov (%edx),%eax + 5 1.8e-04 : 34185: and $0xfffe3fff,%eax + : 3418a: or $0x40,%ah + : 3418d: mov %eax,(%edx) + : 3418f: movzbl 0x2(%edx),%eax + 5 1.8e-04 : 34193: and $0xfffffff1,%eax + : 34196: or $0x2,%eax + : 34199: mov %al,0x2(%edx) + : 3419c: jmp 3368d + : 341a1: movl $0x1,0x2a60(%ebx) + : 341ab: jmp 33302 + : 341b0: add $0x1,%edx + : 341b3: jne 3393c + : 341b9: movl $0x2,0x2a04(%ebx) + : 341c3: movl $0x0,0x2a1c(%ebx) + : 341cd: movl $0x20,0x2a20(%ebx) + : 341d7: movl $0x40,0x2a24(%ebx) + : 341e1: movl $0x400,0x2a40(%ebx) + : 341eb: movl $0x8400,0x2a28(%ebx) + : 341f5: movl $0x8440,0x2a34(%ebx) + : 341ff: movl $0x8580,0x2a38(%ebx) + : 34209: movl $0x89e0,0x2a4c(%ebx) + : 34213: movl $0x8580,0xffffffc0(%ebp) + : 3421a: movl $0x1,0xffffffb8(%ebp) + : 34221: movl $0x0,0xffffffa8(%ebp) + : 34228: movl $0x0,0xffffffac(%ebp) + : 3422f: jmp 3301a + 1 3.6e-05 : 34234: lea 0xffffc274(%ebx),%eax + 22 7.9e-04 : 3423a: jmp 33bc0 + : 3423f: nop + 20 7.1e-04 : 34240: mov 0xffffffd0(%ebp),%ecx + 62 0.0022 : 34243: lea (%edx,%ecx,1),%eax + 2 7.1e-05 : 34246: mov %eax,0x29d4(%ebx) + 11 3.9e-04 : 3424c: jmp 331bc + : 34251: movl $0x0,0x8(%esp) + : 34259: movl $0x8,0x4(%esp) + : 34261: mov 0xffffff98(%ebp),%esi + : 34264: mov %esi,(%esp) + : 34267: call 8358 + : 3426c: mov 0xffffff9c(%ebp),%eax + : 3426f: mov 0x6c(%eax),%edx + : 34272: jmp 33cc9 + : 34277: movl $0x0,0x8(%esp) + : 3427f: movl $0x30,0x4(%esp) + : 34287: mov 0xffffff98(%ebp),%eax + : 3428a: mov %eax,(%esp) + : 3428d: call 8358 + : 34292: mov 0x6c(%esi),%edx + : 34295: jmp 33d1d + : 3429a: lea 0x0(%esi),%esi + : 342a0: movl $0x0,0x8(%esp) + : 342a8: movl $0x68,0x4(%esp) + : 342b0: mov 0xffffff98(%ebp),%eax + : 342b3: mov %eax,(%esp) + : 342b6: call 8358 + : 342bb: mov 0x6c(%esi),%edx + : 342be: jmp 33e05 + : 342c3: movl $0x0,0x8(%esp) + : 342cb: mov %ecx,0x4(%esp) + : 342cf: mov 0xffffff98(%ebp),%esi + : 342d2: mov %esi,(%esp) + : 342d5: call 8358 + : 342da: mov 0xffffff9c(%ebp),%eax + : 342dd: mov 0x6c(%eax),%edx + : 342e0: jmp 34065 + 27 9.6e-04 : 342e5: mov 0xffffffb8(%ebp),%eax + 47 0.0017 : 342e8: test %eax,%eax + : 342ea: je 343e0 + : 342f0: lea 0xffffc874(%ebx),%eax + 17 6.1e-04 : 342f6: mov %eax,0x4(%esp) + 4 1.4e-04 : 342fa: mov 0x29f8(%ebx),%eax + 1 3.6e-05 : 34300: movl $0x460,0x8(%esp) + 27 9.6e-04 : 34308: mov %eax,(%esp) + 14 5.0e-04 : 3430b: call 7d88 + 20 7.1e-04 : 34310: mov 0x29e8(%ebx),%edi + 1 3.6e-05 : 34316: xor %eax,%eax + : 34318: mov $0x8,%ecx + : 3431d: cld + 220 0.0079 : 3431e: rep stos %eax,%es:(%edi) + 236 0.0084 : 34320: mov 0x29e8(%ebx),%ecx + 4 1.4e-04 : 34326: mov 0x2a58(%ebx),%esi + 3 1.1e-04 : 3432c: mov (%ecx),%eax + 7936 0.2832 : 3432e: mov %esi,%edx + : 34330: add 0x2a38(%ebx),%edx + : 34336: orb $0x80,0x7(%ecx) + 4613 0.1646 : 3433a: and $0x3f,%eax + : 3433d: and $0xffffffc0,%edx + : 34340: or %edx,%eax + 40 0.0014 : 34342: mov %esi,%edx + 5 1.8e-04 : 34344: mov %eax,(%ecx) + : 34346: movzbl (%ecx),%eax + 5624 0.2007 : 34349: and $0xfffffff1,%eax + 9 3.2e-04 : 3434c: or $0x2,%eax + 12 4.3e-04 : 3434f: mov %al,(%ecx) + 11 3.9e-04 : 34351: movzwl 0x6(%ecx),%eax + 3823 0.1364 : 34355: and $0xfc03,%ax + 14 5.0e-04 : 34359: or $0x8,%eax + 23 8.2e-04 : 3435c: mov %ax,0x6(%ecx) + 16 5.7e-04 : 34360: mov 0x8(%ecx),%eax + 752 0.0268 : 34363: add 0x2a40(%ebx),%edx + : 34369: andb $0x81,0xf(%ecx) + 2259 0.0806 : 3436d: andb $0x3,0xe(%ecx) + 2940 0.1049 : 34371: and $0xfffffc00,%edx + : 34377: and $0x3ff,%eax + 4 1.4e-04 : 3437c: or %edx,%eax + : 3437e: mov %eax,0x8(%ecx) + 14 5.0e-04 : 34381: mov 0xc(%ecx),%eax + 5772 0.2060 : 34384: andb $0xf0,0x8(%ecx) + 2356 0.0841 : 34388: and $0xfffe07ff,%eax + : 3438d: or $0x8,%ah + 1 3.6e-05 : 34390: mov %eax,0xc(%ecx) + 15 5.4e-04 : 34393: jmp 33c5d + 15 5.4e-04 : 34398: movl $0x28,0xffffffbc(%ebp) + 3 1.1e-04 : 3439f: movl $0x10,0xffffffc4(%ebp) + : 343a6: movl $0x78090003,0xffffffc8(%ebp) + : 343ad: movl $0x28,0xffffffcc(%ebp) + 17 6.1e-04 : 343b4: jmp 34053 + : 343b9: sub $0x1,%edx + : 343bc: jne 3393c + : 343c2: jmp 341b9 + 7 2.5e-04 : 343c7: lea 0xffffbc74(%ebx),%eax + 33 0.0012 : 343cd: lea 0x0(%esi),%esi + 8 2.9e-04 : 343d0: jmp 33bc0 + 17 6.1e-04 : 343d5: lea 0xffffb554(%ebx),%eax + 43 0.0015 : 343db: jmp 33a76 + 10 3.6e-04 : 343e0: lea 0xffffccd4(%ebx),%eax + : 343e6: jmp 342f6 + : 343eb: movl $0x2,0x2a64(%ebx) + : 343f5: jmp 33316 + : 343fa: lea 0xffffd134(%ebx),%eax + : 34400: mov %eax,0x4(%esp) + : 34404: lea 0xffff553c(%ebx),%eax + : 3440a: mov %edx,0x8(%esp) + : 3440e: mov %eax,(%esp) + : 34411: call 8a18 + : 34416: mov 0xffffffbc(%ebp),%esi + : 34419: lea 0xffffd134(%ebx),%eax + : 3441f: mov %eax,0x4(%esp) + : 34423: lea 0xffffabdc(%ebx),%eax + : 34429: mov %ecx,0x8(%esp) + : 3442d: mov %eax,(%esp) + : 34430: mov %esi,0xc(%esp) + : 34434: call 8a18 + : 34439: mov 0xffffffbc(%ebp),%eax + : 3443c: mov %ecx,0x8(%esp) + : 34440: mov %eax,0xc(%esp) + : 34444: lea 0xffffd134(%ebx),%eax + : 3444a: mov %eax,0x4(%esp) + : 3444e: lea 0xffff5508(%ebx),%eax + : 34454: mov %eax,(%esp) + : 34457: call 8a18 + : 3445c: nop + : 3445d: nop + : 3445e: nop + : 3445f: nop + :Disassembly of section .fini: + : + :/home/cworth/opt/xorg/lib/xorg/modules/drivers/intel_drv.so: file format elf32-i386 + : + :Disassembly of section .init: + :Disassembly of section .plt: + :Disassembly of section .text: + : +00011330 : /* I830WaitLpRing total: 94669 3.3786 */ + :} diff --git a/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.source_annotate b/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.source_annotate new file mode 100644 index 0000000..f900716 --- /dev/null +++ b/src/exa/opannotate_i965_prepare_composite/i965_prepare_composite.source_annotate @@ -0,0 +1,670 @@ +/* + * Command line: opannotate --source + * + * Interpretation of command line: + * Output annotated source file with samples + * Output all files + * + * CPU: Core 2, speed 2133.49 MHz (estimated) + * Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 + * (Unhalted core cycles) count 100000 + */ +/* + * Total samples for file : "/home/cworth/src/xorg/driver/xf86-video-intel/src/i965_render.c" + * + * 881083 31.4445 + */ +... + :Bool + :i965_prepare_composite(int op, PicturePtr pSrcPicture, + : PicturePtr pMaskPicture, PicturePtr pDstPicture, + : PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) + 304 0.0108 :{ /* i965_prepare_composite total: 830728 29.6474 */ + 1017 0.0363 : ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; + 184 0.0066 : I830Ptr pI830 = I830PTR(pScrn); + : CARD32 src_offset, src_pitch; + : CARD32 mask_offset = 0, mask_pitch = 0; + : CARD32 dst_format, dst_offset, dst_pitch; + : Bool rotation_program = FALSE; + : + 22 7.9e-04 : IntelEmitInvarientState(pScrn); + 103 0.0037 : *pI830->last_3d = LAST_3D_RENDER; + : + 153 0.0055 : src_offset = intel_get_pixmap_offset(pSrc); + 65 0.0023 : src_pitch = intel_get_pixmap_pitch(pSrc); + 160 0.0057 : dst_offset = intel_get_pixmap_offset(pDst); + 72 0.0026 : dst_pitch = intel_get_pixmap_pitch(pDst); + 25 8.9e-04 : if (pMask) { + 27 9.6e-04 : mask_offset = intel_get_pixmap_offset(pMask); + 28 1.0e-03 : mask_pitch = intel_get_pixmap_pitch(pMask); + : } + 245 0.0087 : pI830->scale_units[0][0] = pSrc->drawable.width; + 54 0.0019 : pI830->scale_units[0][1] = pSrc->drawable.height; + : + 64 0.0023 : pI830->transform[0] = pSrcPicture->transform; + : + : if (!pMask) { + 6 2.1e-04 : pI830->transform[1] = NULL; + 56 0.0020 : pI830->scale_units[1][0] = -1; + 2 7.1e-05 : pI830->scale_units[1][1] = -1; + 16 5.7e-04 : if (pI830->transform[0] && + : i965_check_rotation_transform(pI830->transform[0])) + : rotation_program = TRUE; + : } else { + 44 0.0016 : pI830->transform[1] = pMaskPicture->transform; + 125 0.0045 : pI830->scale_units[1][0] = pMask->drawable.width; + 99 0.0035 : pI830->scale_units[1][1] = pMask->drawable.height; + : } + : + : /* setup 3d pipeline state */ + : + 30 0.0011 : binding_table_entries = 2; /* default no mask */ + : + : /* Wait for sync before we start setting up our new state */ + :#if 0 + : i830WaitSync(pScrn); + :#endif + : + : /* Set up our layout of state in framebuffer. First the general state: */ + : next_offset = 0; + 31 0.0011 : vs_offset = ALIGN(next_offset, 64); + : next_offset = vs_offset + sizeof(*vs_state); + : + 5 1.8e-04 : sf_offset = ALIGN(next_offset, 32); + : next_offset = sf_offset + sizeof(*sf_state); + : + 14 5.0e-04 : wm_offset = ALIGN(next_offset, 32); + : next_offset = wm_offset + sizeof(*wm_state); + : + 25 8.9e-04 : wm_scratch_offset = ALIGN(next_offset, 1024); + : next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS; + : + 31 0.0011 : cc_offset = ALIGN(next_offset, 32); + : next_offset = cc_offset + sizeof(*cc_state); + : + : /* keep current sf_kernel, which will send one setup urb entry to + : * PS kernel + : */ + 6 2.1e-04 : sf_kernel_offset = ALIGN(next_offset, 64); + : if (pMask) + 14 5.0e-04 : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask); + : else if (rotation_program) + : next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation); + : else + : next_offset = sf_kernel_offset + sizeof (sf_kernel_static); + : + 34 0.0012 : ps_kernel_offset = ALIGN(next_offset, 64); + : if (pMask) { + : if (pMaskPicture->componentAlpha && + : PICT_FORMAT_RGB(pMaskPicture->format)) { + : if (i965_blend_op[op].src_alpha) { + : next_offset = ps_kernel_offset + + : sizeof(ps_kernel_static_maskca_srcalpha); + : } else { + : next_offset = ps_kernel_offset + + : sizeof(ps_kernel_static_maskca); + : } + : } else + 114 0.0041 : next_offset = ps_kernel_offset + + : sizeof(ps_kernel_static_masknoca); + : } else if (rotation_program) { + : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation); + : } else { + 99 0.0035 : next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask); + : } + : + 198 0.0071 : sip_kernel_offset = ALIGN(next_offset, 64); + : next_offset = sip_kernel_offset + sizeof (sip_kernel_static); + : + : /* needed? */ + 97 0.0035 : cc_viewport_offset = ALIGN(next_offset, 32); + : next_offset = cc_viewport_offset + sizeof(*cc_viewport); + : + : /* for texture sampler */ + 72 0.0026 : src_sampler_offset = ALIGN(next_offset, 32); + 30 0.0011 : next_offset = src_sampler_offset + sizeof(*src_sampler_state); + : + 112 0.0040 : if (pMask) { + 14 5.0e-04 : mask_sampler_offset = ALIGN(next_offset, 32); + 31 0.0011 : next_offset = mask_sampler_offset + sizeof(*mask_sampler_state); + : } + : /* Align VB to native size of elements, for safety */ + 337 0.0120 : vb_offset = ALIGN(next_offset, 8); + : next_offset = vb_offset + vb_size; + : + : /* And then the general state: */ + 184 0.0066 : dest_surf_offset = ALIGN(next_offset, 32); + : next_offset = dest_surf_offset + sizeof(*dest_surf_state); + : + 136 0.0049 : src_surf_offset = ALIGN(next_offset, 32); + 390 0.0139 : next_offset = src_surf_offset + sizeof(*src_surf_state); + : + 55 0.0020 : if (pMask) { + 28 1.0e-03 : mask_surf_offset = ALIGN(next_offset, 32); + 28 1.0e-03 : next_offset = mask_surf_offset + sizeof(*mask_surf_state); + 9 3.2e-04 : binding_table_entries = 3; + : } + : + 84 0.0030 : binding_table_offset = ALIGN(next_offset, 32); + : next_offset = binding_table_offset + (binding_table_entries * 4); + : + 155 0.0055 : default_color_offset = ALIGN(next_offset, 32); + 43 0.0015 : next_offset = default_color_offset + sizeof(*default_color_state); + : + 36 0.0013 : total_state_size = next_offset; + : assert(total_state_size < pI830->exa_965_state->size); + : + 103 0.0037 : state_base_offset = pI830->exa_965_state->offset; + 84 0.0030 : state_base_offset = ALIGN(state_base_offset, 64); + 173 0.0062 : state_base = (char *)(pI830->FbBase + state_base_offset); + : + 12 4.3e-04 : vs_state = (void *)(state_base + vs_offset); + 36 0.0013 : sf_state = (void *)(state_base + sf_offset); + 43 0.0015 : wm_state = (void *)(state_base + wm_offset); + 38 0.0014 : cc_state = (void *)(state_base + cc_offset); + 29 0.0010 : sf_kernel = (void *)(state_base + sf_kernel_offset); + 79 0.0028 : ps_kernel = (void *)(state_base + ps_kernel_offset); + 31 0.0011 : sip_kernel = (void *)(state_base + sip_kernel_offset); + : + 63 0.0022 : cc_viewport = (void *)(state_base + cc_viewport_offset); + : + 25 8.9e-04 : dest_surf_state = (void *)(state_base + dest_surf_offset); + 64 0.0023 : src_surf_state = (void *)(state_base + src_surf_offset); + 37 0.0013 : if (pMask) + 17 6.1e-04 : mask_surf_state = (void *)(state_base + mask_surf_offset); + : + 104 0.0037 : src_sampler_state = (void *)(state_base + src_sampler_offset); + : if (pMask) + 20 7.1e-04 : mask_sampler_state = (void *)(state_base + mask_sampler_offset); + : + 55 0.0020 : binding_table = (void *)(state_base + binding_table_offset); + : + 42 0.0015 : vb = (void *)(state_base + vb_offset); + : + 65 0.0023 : default_color_state = (void*)(state_base + default_color_offset); + : + : /* Set up a default static partitioning of the URB, which is supposed to + : * allow anything we would want to do, at potentially lower performance. + : */ + :#define URB_CS_ENTRY_SIZE 0 + :#define URB_CS_ENTRIES 0 + : + :#define URB_VS_ENTRY_SIZE 1 // each 512-bit row + :#define URB_VS_ENTRIES 8 // we needs at least 8 entries + : + :#define URB_GS_ENTRY_SIZE 0 + :#define URB_GS_ENTRIES 0 + : + :#define URB_CLIP_ENTRY_SIZE 0 + :#define URB_CLIP_ENTRIES 0 + : + :#define URB_SF_ENTRY_SIZE 2 + :#define URB_SF_ENTRIES 1 + : + 25 8.9e-04 : urb_vs_start = 0; + 21 7.5e-04 : urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + 40 0.0014 : urb_gs_start = urb_vs_start + urb_vs_size; + 58 0.0021 : urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + 58 0.0021 : urb_clip_start = urb_gs_start + urb_gs_size; + 50 0.0018 : urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + 46 0.0016 : urb_sf_start = urb_clip_start + urb_clip_size; + 52 0.0019 : urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + 42 0.0015 : urb_cs_start = urb_sf_start + urb_sf_size; + 43 0.0015 : urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + : + : /* Because we only have a single static buffer for our state currently, + : * we have to sync before updating it every time. + : */ + :#if 0 + : i830WaitSync(pScrn); + :#endif + : + 274 0.0098 : memset (cc_viewport, 0, sizeof (*cc_viewport)); + 124 0.0044 : cc_viewport->min_depth = -1.e35; + 122 0.0044 : cc_viewport->max_depth = 1.e35; + : + : /* Color calculator state */ + 861 0.0307 : memset(cc_state, 0, sizeof(*cc_state)); + 18559 0.6623 : cc_state->cc0.stencil_enable = 0; /* disable stencil */ + 17836 0.6365 : cc_state->cc2.depth_test = 0; /* disable depth test */ + 12306 0.4392 : cc_state->cc2.logicop_enable = 0; /* disable logic op */ + : cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */ + 7308 0.2608 : cc_state->cc3.blend_enable = 1; /* enable color blend */ + 10 3.6e-04 : cc_state->cc3.alpha_test = 0; /* disable alpha test */ + 9645 0.3442 : cc_state->cc4.cc_viewport_state_offset = (state_base_offset + + : cc_viewport_offset) >> 5; + 7354 0.2625 : cc_state->cc5.dither_enable = 0; /* disable dither */ + 926 0.0330 : cc_state->cc5.logicop_func = 0xc; /* COPY */ + 2780 0.0992 : cc_state->cc5.statistics_enable = 1; + 63 0.0022 : cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + 32 0.0011 : i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, + : &src_blend, &dst_blend); + : /* XXX: alpha blend factor should be same as color, but check + : * for CA case in future + : */ + 14089 0.5028 : cc_state->cc5.ia_src_blend_factor = src_blend; + 301 0.0107 : cc_state->cc5.ia_dest_blend_factor = dst_blend; + 13845 0.4941 : cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; + 31 0.0011 : cc_state->cc6.src_blend_factor = src_blend; + 2361 0.0843 : cc_state->cc6.dest_blend_factor = dst_blend; + : cc_state->cc6.clamp_post_alpha_blend = 1; + 4466 0.1594 : cc_state->cc6.clamp_pre_alpha_blend = 1; + 359 0.0128 : cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ + : + : /* Upload system kernel */ + 86 0.0031 : memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static)); + : + : /* Set up the state buffer for the destination surface */ + 332 0.0118 : memset(dest_surf_state, 0, sizeof(*dest_surf_state)); + 15291 0.5457 : dest_surf_state->ss0.surface_type = BRW_SURFACE_2D; + 67 0.0024 : dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + 39 0.0014 : i965_get_dest_format(pDstPicture, &dst_format); + 14430 0.5150 : dest_surf_state->ss0.surface_format = dst_format; + : + : dest_surf_state->ss0.writedisable_alpha = 0; + 361 0.0129 : dest_surf_state->ss0.writedisable_red = 0; + : dest_surf_state->ss0.writedisable_green = 0; + : dest_surf_state->ss0.writedisable_blue = 0; + 14076 0.5024 : dest_surf_state->ss0.color_blend = 1; + : dest_surf_state->ss0.vert_line_stride = 0; + : dest_surf_state->ss0.vert_line_stride_ofs = 0; + : dest_surf_state->ss0.mipmap_layout_mode = 0; + : dest_surf_state->ss0.render_cache_read_mode = 0; + : + 36 0.0013 : dest_surf_state->ss1.base_addr = dst_offset; + 420 0.0150 : dest_surf_state->ss2.height = pDst->drawable.height - 1; + 14567 0.5199 : dest_surf_state->ss2.width = pDst->drawable.width - 1; + : dest_surf_state->ss2.mip_count = 0; + 398 0.0142 : dest_surf_state->ss2.render_target_rotation = 0; + 11691 0.4172 : dest_surf_state->ss3.pitch = dst_pitch - 1; + : + : /* Set up the source surface state buffer */ + 66 0.0024 : memset(src_surf_state, 0, sizeof(*src_surf_state)); + 13897 0.4960 : src_surf_state->ss0.surface_type = BRW_SURFACE_2D; + 14657 0.5231 : src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture); + : + : src_surf_state->ss0.writedisable_alpha = 0; + 25 8.9e-04 : src_surf_state->ss0.writedisable_red = 0; + : src_surf_state->ss0.writedisable_green = 0; + : src_surf_state->ss0.writedisable_blue = 0; + 14981 0.5346 : src_surf_state->ss0.color_blend = 1; + : src_surf_state->ss0.vert_line_stride = 0; + : src_surf_state->ss0.vert_line_stride_ofs = 0; + : src_surf_state->ss0.mipmap_layout_mode = 0; + 1 3.6e-05 : src_surf_state->ss0.render_cache_read_mode = 0; + : + 26 9.3e-04 : src_surf_state->ss1.base_addr = src_offset; + 127 0.0045 : src_surf_state->ss2.width = pSrc->drawable.width - 1; + 6454 0.2303 : src_surf_state->ss2.height = pSrc->drawable.height - 1; + : src_surf_state->ss2.mip_count = 0; + 15025 0.5362 : src_surf_state->ss2.render_target_rotation = 0; + 243 0.0087 : src_surf_state->ss3.pitch = src_pitch - 1; + : + : /* setup mask surface */ + : if (pMask) { + 48 0.0017 : memset(mask_surf_state, 0, sizeof(*mask_surf_state)); + 7037 0.2511 : mask_surf_state->ss0.surface_type = BRW_SURFACE_2D; + 7619 0.2719 : mask_surf_state->ss0.surface_format = + : i965_get_card_format(pMaskPicture); + : + : mask_surf_state->ss0.writedisable_alpha = 0; + 25 8.9e-04 : mask_surf_state->ss0.writedisable_red = 0; + : mask_surf_state->ss0.writedisable_green = 0; + : mask_surf_state->ss0.writedisable_blue = 0; + 7789 0.2780 : mask_surf_state->ss0.color_blend = 1; + : mask_surf_state->ss0.vert_line_stride = 0; + : mask_surf_state->ss0.vert_line_stride_ofs = 0; + : mask_surf_state->ss0.mipmap_layout_mode = 0; + : mask_surf_state->ss0.render_cache_read_mode = 0; + : + 15 5.4e-04 : mask_surf_state->ss1.base_addr = mask_offset; + 37 0.0013 : mask_surf_state->ss2.width = pMask->drawable.width - 1; + 3281 0.1171 : mask_surf_state->ss2.height = pMask->drawable.height - 1; + : mask_surf_state->ss2.mip_count = 0; + 7624 0.2721 : mask_surf_state->ss2.render_target_rotation = 0; + 71 0.0025 : mask_surf_state->ss3.pitch = mask_pitch - 1; + : } + : + : /* Set up a binding table for our surfaces. Only the PS will use it */ + 94 0.0034 : binding_table[0] = state_base_offset + dest_surf_offset; + 29 0.0010 : binding_table[1] = state_base_offset + src_surf_offset; + : if (pMask) + 2 7.1e-05 : binding_table[2] = state_base_offset + mask_surf_offset; + : + : /* PS kernel use this sampler */ + 111 0.0040 : memset(src_sampler_state, 0, sizeof(*src_sampler_state)); + 24 8.6e-04 : src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + 12445 0.4441 : switch(pSrcPicture->filter) { + : case PictFilterNearest: + 30 0.0011 : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + 14939 0.5332 : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + : break; + : case PictFilterBilinear: + 5 1.8e-04 : src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + 5 1.8e-04 : src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + : break; + : default: + : I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter); + : } + : + 15145 0.5405 : memset(default_color_state, 0, sizeof(*default_color_state)); + : default_color_state->color[0] = 0.0; /* R */ + 41 0.0015 : default_color_state->color[1] = 0.0; /* G */ + : default_color_state->color[2] = 0.0; /* B */ + : default_color_state->color[3] = 0.0; /* A */ + : + 69 0.0025 : src_sampler_state->ss0.default_color_mode = 0; /* GL mode */ + : + 6323 0.2257 : if (!pSrcPicture->repeat) { + 88 0.0031 : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + 5650 0.2016 : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + 4660 0.1663 : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + 532 0.0190 : src_sampler_state->ss2.default_color_pointer = + : (state_base_offset + default_color_offset) >> 5; + : } else { + 33 0.0012 : src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + 343 0.0122 : src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + 9699 0.3461 : src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : } + 8398 0.2997 : src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ + : + 3 1.1e-04 : if (pMask) { + 104 0.0037 : memset(mask_sampler_state, 0, sizeof(*mask_sampler_state)); + 5 1.8e-04 : mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + 8123 0.2899 : switch(pMaskPicture->filter) { + : case PictFilterNearest: + 22 7.9e-04 : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + 7801 0.2784 : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + : break; + : case PictFilterBilinear: + : mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + : mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + : break; + : default: + : I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter); + : } + : + 7750 0.2766 : if (!pMaskPicture->repeat) { + 85 0.0030 : mask_sampler_state->ss1.r_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + 7668 0.2737 : mask_sampler_state->ss1.s_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + 6142 0.2192 : mask_sampler_state->ss1.t_wrap_mode = + : BRW_TEXCOORDMODE_CLAMP_BORDER; + 31 0.0011 : mask_sampler_state->ss2.default_color_pointer = + : (state_base_offset + default_color_offset)>>5; + : } else { + : mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + 1 3.6e-05 : mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + 4 1.4e-04 : mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + : } + : mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ + : } + : + : /* Set up the vertex shader to be disabled (passthrough) */ + 619 0.0221 : memset(vs_state, 0, sizeof(*vs_state)); + 15697 0.5602 : vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; + 6724 0.2400 : vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + 8547 0.3050 : vs_state->vs6.vs_enable = 0; + : vs_state->vs6.vert_cache_disable = 1; + : + : /* Set up the SF kernel to do coord interp: for each attribute, + : * calculate dA/dx and dA/dy. Hand these interpolation coefficients + : * back to SF which then hands pixels off to WM. + : */ + : if (pMask) + 303 0.0108 : memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static)); + 13 4.6e-04 : else if (rotation_program) + : memcpy(sf_kernel, sf_kernel_static_rotation, + : sizeof (sf_kernel_static_rotation)); + : else + 60 0.0021 : memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static)); + : + 558 0.0199 : memset(sf_state, 0, sizeof(*sf_state)); + 27418 0.9785 : sf_state->thread0.kernel_start_pointer = + : (state_base_offset + sf_kernel_offset) >> 6; + 14701 0.5247 : sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + 16615 0.5930 : sf_state->sf1.single_program_flow = 1; + 6135 0.2189 : sf_state->sf1.binding_table_entry_count = 0; + : sf_state->sf1.thread_priority = 0; + 1449 0.0517 : sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ + : sf_state->sf1.illegal_op_exception_enable = 1; + 12439 0.4439 : sf_state->sf1.mask_stack_exception_enable = 1; + 14092 0.5029 : sf_state->sf1.sw_exception_enable = 1; + 31133 1.1111 : sf_state->thread2.per_thread_scratch_space = 0; + : /* scratch space is not used in our kernel */ + 40 0.0014 : sf_state->thread2.scratch_space_base_pointer = 0; + : sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + 13800 0.4925 : sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + 988 0.0353 : sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + : /* don't smash vertex header, read start from dw8 */ + 2292 0.0818 : sf_state->thread3.urb_entry_read_offset = 1; + 2819 0.1006 : sf_state->thread3.dispatch_grf_start_reg = 3; + 20 7.1e-04 : sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + 1265 0.0451 : sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + 16395 0.5851 : sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + 324 0.0116 : sf_state->thread4.stats_enable = 1; + 978 0.0349 : sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + 13430 0.4793 : sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; + 1120 0.0400 : sf_state->sf6.scissor = 0; + 38 0.0014 : sf_state->sf7.trifan_pv = 2; + 140 0.0050 : sf_state->sf6.dest_org_vbias = 0x8; + 11266 0.4021 : sf_state->sf6.dest_org_hbias = 0x8; + : + : /* Set up the PS kernel (dispatched by WM) */ + 22 7.9e-04 : if (pMask) { + 26 9.3e-04 : if (pMaskPicture->componentAlpha && + : PICT_FORMAT_RGB(pMaskPicture->format)) { + 38 0.0014 : if (i965_blend_op[op].src_alpha) + 148 0.0053 : memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha, + : sizeof (ps_kernel_static_maskca_srcalpha)); + : else + 48 0.0017 : memcpy(ps_kernel, ps_kernel_static_maskca, + : sizeof (ps_kernel_static_maskca)); + : } else + 23 8.2e-04 : memcpy(ps_kernel, ps_kernel_static_masknoca, + : sizeof (ps_kernel_static_masknoca)); + 74 0.0026 : } else if (rotation_program) { + 63 0.0022 : memcpy(ps_kernel, ps_kernel_static_rotation, + : sizeof (ps_kernel_static_rotation)); + : } else { + 10 3.6e-04 : memcpy(ps_kernel, ps_kernel_static_nomask, + : sizeof (ps_kernel_static_nomask)); + : } + : + 549 0.0196 : memset(wm_state, 0, sizeof (*wm_state)); + 29001 1.0350 : wm_state->thread0.kernel_start_pointer = + : (state_base_offset + ps_kernel_offset) >> 6; + 12982 0.4633 : wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + : wm_state->thread1.single_program_flow = 1; + : if (!pMask) + 3871 0.1382 : wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + : else + 4843 0.1728 : wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + : + 8578 0.3061 : wm_state->thread2.scratch_space_base_pointer = (state_base_offset + + : wm_scratch_offset)>>10; + 13261 0.4733 : wm_state->thread2.per_thread_scratch_space = 0; + : wm_state->thread3.const_urb_entry_read_length = 0; + 5646 0.2015 : wm_state->thread3.const_urb_entry_read_offset = 0; + : /* Each pair of attributes (src/mask coords) is one URB entry */ + : if (pMask) + 2958 0.1056 : wm_state->thread3.urb_entry_read_length = 2; + : else + 2386 0.0852 : wm_state->thread3.urb_entry_read_length = 1; + 30 0.0011 : wm_state->thread3.urb_entry_read_offset = 0; + : /* wm kernel use urb from 3, see wm_program in compiler module */ + 13080 0.4668 : wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + : + 12997 0.4638 : wm_state->wm4.stats_enable = 1; /* statistic */ + 79 0.0028 : wm_state->wm4.sampler_state_pointer = (state_base_offset + + : src_sampler_offset) >> 5; + 10829 0.3865 : wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ + 40 0.0014 : wm_state->wm5.max_threads = PS_MAX_THREADS - 1; + 2 7.1e-05 : wm_state->wm5.thread_dispatch_enable = 1; + : /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + : * start point + : */ + 93 0.0033 : wm_state->wm5.enable_16_pix = 1; + : wm_state->wm5.enable_8_pix = 0; + 10969 0.3915 : wm_state->wm5.early_depth_test = 1; + : + : /* Begin the long sequence of commands needed to set up the 3D + : * rendering pipe + : */ + : { + 111 0.0040 : BEGIN_LP_RING(2); + 21 7.5e-04 : OUT_RING(MI_FLUSH | + : MI_STATE_INSTRUCTION_CACHE_FLUSH | + : BRW_MI_GLOBAL_SNAPSHOT_RESET); + 39 0.0014 : OUT_RING(MI_NOOP); + 239 0.0085 : ADVANCE_LP_RING(); + : } + : { + 5017 0.1790 : BEGIN_LP_RING(12); + : + : /* Match Mesa driver setup */ + 57 0.0020 : OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + : + 130 0.0046 : OUT_RING(BRW_CS_URB_STATE | 0); + 136 0.0049 : OUT_RING((0 << 4) | /* URB Entry Allocation Size */ + : (0 << 0)); /* Number of URB Entries */ + : + : /* Zero out the two base address registers so all offsets are + : * absolute. + : */ + 71 0.0025 : OUT_RING(BRW_STATE_BASE_ADDRESS | 4); + 79 0.0028 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + 68 0.0024 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ + 167 0.0060 : OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + : /* general state max addr, disabled */ + 62 0.0022 : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); + : /* media object state max addr, disabled */ + 60 0.0021 : OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); + : + : /* Set system instruction pointer */ + 159 0.0057 : OUT_RING(BRW_STATE_SIP | 0); + 223 0.0080 : OUT_RING(state_base_offset + sip_kernel_offset); + 137 0.0049 : OUT_RING(MI_NOOP); + 1438 0.0513 : ADVANCE_LP_RING(); + : } + : { + 16676 0.5951 : BEGIN_LP_RING(26); + : /* Pipe control */ + 53 0.0019 : OUT_RING(BRW_PIPE_CONTROL | + : BRW_PIPE_CONTROL_NOWRITE | + : BRW_PIPE_CONTROL_IS_FLUSH | + : 2); + 181 0.0065 : OUT_RING(0); /* Destination address */ + 62 0.0022 : OUT_RING(0); /* Immediate data low DW */ + 127 0.0045 : OUT_RING(0); /* Immediate data high DW */ + : + : /* Binding table pointers */ + 77 0.0027 : OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + 60 0.0021 : OUT_RING(0); /* vs */ + 90 0.0032 : OUT_RING(0); /* gs */ + 57 0.0020 : OUT_RING(0); /* clip */ + 160 0.0057 : OUT_RING(0); /* sf */ + : /* Only the PS uses the binding table */ + 200 0.0071 : OUT_RING(state_base_offset + binding_table_offset); /* ps */ + : + : /* The drawing rectangle clipping is always on. Set it to values that + : * shouldn't do any clipping. + : */ + 282 0.0101 : OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ + 156 0.0056 : OUT_RING(0x00000000); /* ymin, xmin */ + 640 0.0228 : OUT_RING(DRAW_YMAX(pDst->drawable.height - 1) | + : DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */ + 463 0.0165 : OUT_RING(0x00000000); /* yorigin, xorigin */ + : + : /* skip the depth buffer */ + : /* skip the polygon stipple */ + : /* skip the polygon stipple offset */ + : /* skip the line stipple */ + : + : /* Set the pointers to the 3d pipeline state */ + 132 0.0047 : OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5); + 529 0.0189 : OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */ + 181 0.0065 : OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ + 3012 0.1075 : OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ + 4257 0.1519 : OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */ + 423 0.0151 : OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */ + 224 0.0080 : OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */ + : + : /* URB fence */ + 31 0.0011 : OUT_RING(BRW_URB_FENCE | + : UF0_CS_REALLOC | + : UF0_SF_REALLOC | + : UF0_CLIP_REALLOC | + : UF0_GS_REALLOC | + : UF0_VS_REALLOC | + : 1); + 522 0.0186 : OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + : ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + : ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + 466 0.0166 : OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + : ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + : + : /* Constant buffer state */ + 180 0.0064 : OUT_RING(BRW_CS_URB_STATE | 0); + 31 0.0011 : OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | + : (URB_CS_ENTRIES << 0)); + 1134 0.0405 : ADVANCE_LP_RING(); + : } + : { + 10119 0.3611 : int nelem = pMask ? 3: 2; + 1022 0.0365 : BEGIN_LP_RING(pMask?12:10); + : /* Set up the pointer to our vertex buffer */ + 64 0.0023 : OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); + 61 0.0022 : OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) | + : VB0_VERTEXDATA | + : ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT)); + 119 0.0042 : OUT_RING(state_base_offset + vb_offset); + 35 0.0012 : OUT_RING(3); + 94 0.0034 : OUT_RING(0); // ignore for VERTEXDATA, but still there + : + : /* Set up our vertex elements, sourced from the single vertex buffer. + : */ + 64 0.0023 : OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); + : /* vertex coordinates */ + 65 0.0023 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (0 << VE0_OFFSET_SHIFT)); + 33 0.0012 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + : (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + : /* u0, v0 */ + 45 0.0016 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (8 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + 151 0.0054 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + : (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + : /* u1, v1 */ + 42 0.0015 : if (pMask) { + 35 0.0012 : OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + : VE0_VALID | + : (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + : (16 << VE0_OFFSET_SHIFT)); + 31 0.0011 : OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + : (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + : (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + : (10 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + : } + : + 1064 0.0380 : ADVANCE_LP_RING(); + : } + : + :#ifdef I830DEBUG + : ErrorF("try to sync to show any errors..."); + : I830Sync(pScrn); + :#endif + : return TRUE; + 16868 0.6020 :} -- 2.43.0