本文整理汇总了C++中OUT_BATCH函数的典型用法代码示例。如果您正苦于以下问题:C++ OUT_BATCH函数的具体用法?C++ OUT_BATCH怎么用?C++ OUT_BATCH使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了OUT_BATCH函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: gen6_blorp_emit_gs_disable
/* 3DSTATE_GS
*
* Disable the geometry shader.
*/
void
gen6_blorp_emit_gs_disable(struct brw_context *brw,
const brw_blorp_params *params)
{
/* Disable all the constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
示例2: gen7_upload_hs_state
static void
gen7_upload_hs_state(struct brw_context *brw)
{
const struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
/* BRW_NEW_TCS_PROG_DATA */
const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base;
if (active) {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
GEN7_HS_SAMPLER_COUNT) |
SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
(brw->max_hs_threads - 1));
OUT_BATCH(GEN7_HS_ENABLE |
GEN7_HS_STATISTICS_ENABLE |
SET_FIELD(brw->tcs.prog_data->instances - 1,
GEN7_HS_INSTANCE_COUNT));
OUT_BATCH(stage_state->prog_offset);
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES |
SET_FIELD(prog_data->base.dispatch_grf_start_reg,
GEN7_HS_DISPATCH_START_GRF));
/* Ignore URB semaphores */
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
brw->tcs.enabled = active;
}
示例3: upload_gs_state
static void
upload_gs_state(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->gs.base;
const int max_threads_shift = brw->is_haswell ?
HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT;
/* BRW_NEW_GEOMETRY_PROGRAM */
bool active = brw->geometry_program;
/* BRW_NEW_GS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
const struct brw_vue_prog_data *vue_prog_data =
brw_vue_prog_data(stage_state->prog_data);
const struct brw_gs_prog_data *gs_prog_data =
brw_gs_prog_data(stage_state->prog_data);
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
gen7_emit_cs_stall_flush(brw);
if (active) {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->binding_table.size_bytes / 4) <<
GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (prog_data->total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
uint32_t dw4 =
((gs_prog_data->output_vertex_size_hwords * 2 - 1) <<
GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
(gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
(vue_prog_data->urb_read_length <<
GEN6_GS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
(prog_data->dispatch_grf_start_reg <<
GEN6_GS_DISPATCH_START_GRF_SHIFT);
/* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
* Ivy Bridge and Haswell.
*
* On Ivy Bridge, setting this bit causes the vertices of a triangle
* strip to be delivered to the geometry shader in an order that does
* not strictly follow the OpenGL spec, but preserves triangle
* orientation. For example, if the vertices are (1, 2, 3, 4, 5), then
* the geometry shader sees triangles:
*
* (1, 2, 3), (2, 4, 3), (3, 4, 5)
*
* (Clearing the bit is even worse, because it fails to preserve
* orientation).
*
* Triangle strips with adjacency always ordered in a way that preserves
* triangle orientation but does not strictly follow the OpenGL spec,
* regardless of the setting of this bit.
*
* On Haswell, both triangle strips and triangle strips with adjacency
* are always ordered in a way that preserves triangle orientation.
* Setting this bit causes the ordering to strictly follow the OpenGL
* spec.
*
* So in either case we want to set the bit. Unfortunately on Ivy
* Bridge this will get the order close to correct but not perfect.
*/
uint32_t dw5 =
((devinfo->max_gs_threads - 1) << max_threads_shift) |
(gs_prog_data->control_data_header_size_hwords <<
GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
((gs_prog_data->invocations - 1) <<
GEN7_GS_INSTANCE_CONTROL_SHIFT) |
SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
GEN6_GS_STATISTICS_ENABLE |
(gs_prog_data->include_primitive_id ?
GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
GEN7_GS_REORDER_TRAILING |
GEN7_GS_ENABLE;
uint32_t dw6 = 0;
if (brw->is_haswell) {
dw6 |= gs_prog_data->control_data_format <<
//.........这里部分代码省略.........
示例4: i915_composite
void
i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int w, int h)
{
ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
/* 28 + 16 + 10 + 20 + 32 + 16 */
intel_batch_start_atomic(scrn, 150);
if (intel->needs_render_state_emit)
i915_emit_composite_setup(scrn);
if (intel->needs_render_vertex_emit ||
intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
i915_vertex_flush(intel);
if (intel_vertex_space(intel) < 256) {
intel_next_vertex(intel);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
(intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
intel->vertex_index = 0;
} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(1) | 0);
OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
(intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
intel->vertex_index =
(intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex;
intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
}
intel->last_floats_per_vertex = intel->floats_per_vertex;
intel->needs_render_vertex_emit = FALSE;
}
if (intel->prim_offset == 0) {
if (intel->needs_render_ca_pass) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse,
intel->render_mask_picture,
intel->render_dest_picture->format));
i915_composite_emit_shader(intel, PictOpOutReverse);
}
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
OUT_BATCH(intel->vertex_index);
}
intel->vertex_count += 3;
intel->prim_emit(intel,
srcX, srcY,
maskX, maskY,
dstX, dstY,
w, h);
intel_batch_end_atomic(scrn);
}
示例5: upload_state_base_address
/**
* Define the base addresses which some state is referenced from.
*
* This allows us to avoid having to emit relocations for the objects,
* and is actually required for binding table pointers on gen6.
*
* Surface state base address covers binding table pointers and
* surface state objects, but not the surfaces that the surface state
* objects point to.
*/
static void upload_state_base_address( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
/* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
* vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
* programmed prior to STATE_BASE_ADDRESS.
*
* However, given that the instruction SBA (general state base
* address) on this chipset is always set to 0 across X and GL,
* maybe this isn't required for us in particular.
*/
if (intel->gen >= 6) {
if (intel->gen == 6)
intel_emit_post_sync_nonzero_flush(intel);
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
/* General state base address: stateless DP read/write requests */
OUT_BATCH(1);
/* Surface state base address:
* BINDING_TABLE_STATE
* SURFACE_STATE
*/
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
/* Dynamic state base address:
* SAMPLER_STATE
* SAMPLER_BORDER_COLOR_STATE
* CLIP, SF, WM/CC viewport state
* COLOR_CALC_STATE
* DEPTH_STENCIL_STATE
* BLEND_STATE
* Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
* Disable is clear, which we rely on)
*/
OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1); /* Instruction base address: shader kernels (incl. SIP) */
OUT_BATCH(1); /* General state upper bound */
/* Dynamic state upper bound. Although the documentation says that
* programming it to zero will cause it to be ignored, that is a lie.
* If this isn't programmed to a real bound, the sampler border color
* pointer is rejected, causing border color to mysteriously fail.
*/
OUT_BATCH(0xfffff001);
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
} else if (intel->gen == 5) {
BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
1); /* Instruction base address */
OUT_BATCH(0xfffff001); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
OUT_BATCH(1); /* Instruction access upper bound */
ADVANCE_BATCH();
} else {
BEGIN_BATCH(6);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
ADVANCE_BATCH();
}
/* According to section 3.6.1 of VOL1 of the 965 PRM,
* STATE_BASE_ADDRESS updates require a reissue of:
*
* 3DSTATE_PIPELINE_POINTERS
* 3DSTATE_BINDING_TABLE_POINTERS
* MEDIA_STATE_POINTERS
*
* and this continues through Ironlake. The Sandy Bridge PRM, vol
* 1 part 1 says that the folowing packets must be reissued:
*
* 3DSTATE_CC_POINTERS
* 3DSTATE_BINDING_TABLE_POINTERS
//.........这里部分代码省略.........
示例6: emit_hw_vertex
/**
* Extract the needed fields from vertex_header and emit i915 dwords.
* Recall that the vertices are constructed by the 'draw' module and
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here.
*/
static INLINE void
emit_hw_vertex( struct i915_context *i915,
const struct vertex_header *vertex)
{
const struct vertex_info *vinfo = &i915->current.vertex_info;
uint i;
uint count = 0; /* for debug/sanity */
assert(!i915->dirty);
for (i = 0; i < vinfo->num_attribs; i++) {
const uint j = vinfo->attrib[i].src_index;
const float *attrib = vertex->data[j];
switch (vinfo->attrib[i].emit) {
case EMIT_1F:
OUT_BATCH( fui(attrib[0]) );
count++;
break;
case EMIT_2F:
OUT_BATCH( fui(attrib[0]) );
OUT_BATCH( fui(attrib[1]) );
count += 2;
break;
case EMIT_3F:
OUT_BATCH( fui(attrib[0]) );
OUT_BATCH( fui(attrib[1]) );
OUT_BATCH( fui(attrib[2]) );
count += 3;
break;
case EMIT_4F:
OUT_BATCH( fui(attrib[0]) );
OUT_BATCH( fui(attrib[1]) );
OUT_BATCH( fui(attrib[2]) );
OUT_BATCH( fui(attrib[3]) );
count += 4;
break;
case EMIT_4UB:
OUT_BATCH( pack_ub4(float_to_ubyte( attrib[0] ),
float_to_ubyte( attrib[1] ),
float_to_ubyte( attrib[2] ),
float_to_ubyte( attrib[3] )) );
count += 1;
break;
case EMIT_4UB_BGRA:
OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
float_to_ubyte( attrib[1] ),
float_to_ubyte( attrib[0] ),
float_to_ubyte( attrib[3] )) );
count += 1;
break;
default:
assert(0);
}
}
assert(count == vinfo->size);
}
示例7: I915DisplayVideoTextured
void
I915DisplayVideoTextured(ScrnInfoPtr scrn,
intel_adaptor_private *adaptor_priv, int id,
RegionPtr dstRegion,
short width, short height, int video_pitch,
int video_pitch2,
short src_w, short src_h, short drw_w, short drw_h,
PixmapPtr pixmap)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
uint32_t format, ms3, s5, tiling;
BoxPtr pbox = REGION_RECTS(dstRegion);
int nbox_total = REGION_NUM_RECTS(dstRegion);
int nbox_this_time;
int dxo, dyo, pix_xoff, pix_yoff;
PixmapPtr target;
#if 0
ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
video_pitch);
#endif
dxo = dstRegion->extents.x1;
dyo = dstRegion->extents.y1;
if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 ||
!intel_uxa_check_pitch_3d(pixmap)) {
ScreenPtr screen = pixmap->drawable.pScreen;
target = screen->CreatePixmap(screen,
dstRegion->extents.x2 - dxo,
dstRegion->extents.y2 - dyo,
pixmap->drawable.depth,
CREATE_PIXMAP_USAGE_SCRATCH);
if (target == NULL)
return;
if (intel_uxa_get_pixmap_bo(target) == NULL) {
screen->DestroyPixmap(target);
return;
}
pix_xoff = -dxo;
pix_yoff = -dyo;
} else {
target = pixmap;
/* Set up the offset for translating from the given region
* (in screen coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -target->screen_x + target->drawable.x;
pix_yoff = -target->screen_y + target->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
}
#define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4)
#define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20)
#define BATCH_BYTES(p) ((p)->batch_bo->size - 16)
while (nbox_total) {
nbox_this_time = nbox_total;
if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel))
nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
nbox_total -= nbox_this_time;
intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);
IntelEmitInvarientState(scrn);
intel->last_3d = LAST_3D_VIDEO;
/* draw rect -- just clipping */
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
OUT_BATCH(0x00000000); /* ymin, xmin */
/* ymax, xmax */
OUT_BATCH((target->drawable.width - 1) |
(target->drawable.height - 1) << 16);
OUT_BATCH(0x00000000); /* yorigin, xorigin */
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
I1_LOAD_S(5) | I1_LOAD_S(6) | 2);
OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
s5 = 0x0;
if (intel->cpp == 2)
s5 |= S5_COLOR_DITHER_ENABLE;
OUT_BATCH(s5); /* S5 - enable bits */
OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
(2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
//.........这里部分代码省略.........
示例8: upload_sbe_state
static void
upload_sbe_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
/* _NEW_LIGHT */
bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
uint32_t dw1, dw10, dw11;
int i;
int attr = 0, input_index = 0;
int urb_entry_read_offset = 1;
uint16_t attr_overrides[FRAG_ATTRIB_MAX];
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
uint32_t point_sprite_origin;
/* FINISHME: Attribute Swizzle Control Mode? */
dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT;
/* _NEW_POINT
*
* Window coordinates in an FBO are inverted, which means point
* sprite origin must be inverted.
*/
if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
} else {
point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
}
dw1 |= point_sprite_origin;
dw10 = 0;
dw11 = 0;
/* Create the mapping from the FS inputs we produce to the VS outputs
* they source from.
*/
uint32_t max_source_attr = 0;
for (; attr < FRAG_ATTRIB_MAX; attr++) {
enum glsl_interp_qualifier interp_qualifier =
brw->fragment_program->InterpQualifier[attr];
bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1;
if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
continue;
if (ctx->Point.PointSprite &&
attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 &&
ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) {
dw10 |= (1 << input_index);
}
if (attr == FRAG_ATTRIB_PNTC)
dw10 |= (1 << input_index);
/* flat shading */
if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
(shade_model_flat && is_gl_Color &&
interp_qualifier == INTERP_QUALIFIER_NONE))
dw11 |= (1 << input_index);
/* The hardware can only do the overrides on 16 overrides at a
* time, and the other up to 16 have to be lined up so that the
* input index = the output index. We'll need to do some
* tweaking to make sure that's the case.
*/
assert(input_index < 16 || attr == input_index);
/* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
attr_overrides[input_index++] =
get_attr_override(&brw->vs.prog_data->vue_map,
urb_entry_read_offset, attr,
ctx->VertexProgram._TwoSideEnabled,
&max_source_attr);
}
/* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for
* 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length":
*
* "This field should be set to the minimum length required to read the
* maximum source attribute. The maximum source attribute is indicated
* by the maximum value of the enabled Attribute # Source Attribute if
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
* enable is not set.
*
* read_length = ceiling((max_source_attr + 1) / 2)"
*/
uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
for (; input_index < FRAG_ATTRIB_MAX; input_index++)
attr_overrides[input_index] = 0;
BEGIN_BATCH(14);
OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
OUT_BATCH(dw1);
//.........这里部分代码省略.........
示例9: gen7_blorp_emit_depth_stencil_config
static void
gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
const brw_blorp_params *params)
{
struct intel_context *intel = &brw->intel;
uint32_t draw_x = params->depth.x_offset;
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y);
/* 3DSTATE_DEPTH_BUFFER */
{
uint32_t tile_x = draw_x & tile_mask_x;
uint32_t tile_y = draw_y & tile_mask_y;
uint32_t offset =
intel_region_get_aligned_offset(params->depth.mt->region,
draw_x & ~tile_mask_x,
draw_y & ~tile_mask_y, false);
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*
* We have no guarantee that tile_x and tile_y are correctly aligned,
* since they are determined by the mipmap layout, which is only aligned
* to multiples of 4.
*
* So, to avoid hanging the GPU, just smash the low order 3 bits of
* tile_x and tile_y to 0. This is a temporary workaround until we come
* up with a better solution.
*/
tile_x &= ~7;
tile_y &= ~7;
intel_emit_depth_stall_flushes(intel);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
uint32_t pitch_bytes =
params->depth.mt->region->pitch * params->depth.mt->region->cpp;
OUT_BATCH((pitch_bytes - 1) |
params->depth_format << 18 |
1 << 22 | /* hiz enable */
1 << 28 | /* depth write */
BRW_SURFACE_2D << 29);
OUT_RELOC(params->depth.mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
(params->depth.height + tile_y - 1) << 18);
OUT_BATCH(0);
OUT_BATCH(tile_x |
tile_y << 16);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_region,
draw_x & ~tile_mask_x,
(draw_y & ~tile_mask_y) / 2, false);
BEGIN_BATCH(3);
OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
ADVANCE_BATCH();
}
/* 3DSTATE_STENCIL_BUFFER */
{
BEGIN_BATCH(3);
OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
示例10: brw_upload_constant_buffer
//.........这里部分代码省略.........
for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) {
buf[offset + i] = *brw->wm.prog_data->base.param[i];
}
}
/* clipper constants */
if (brw->curbe.clip_size) {
GLuint offset = brw->curbe.clip_start * 16;
GLuint j;
/* If any planes are going this way, send them all this way:
*/
for (i = 0; i < 6; i++) {
buf[offset + i * 4 + 0].f = fixed_plane[i][0];
buf[offset + i * 4 + 1].f = fixed_plane[i][1];
buf[offset + i * 4 + 2].f = fixed_plane[i][2];
buf[offset + i * 4 + 3].f = fixed_plane[i][3];
}
/* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
* clip-space:
*/
clip_planes = brw_select_clip_planes(ctx);
for (j = 0; j < MAX_CLIP_PLANES; j++) {
if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
buf[offset + i * 4 + 0].f = clip_planes[j][0];
buf[offset + i * 4 + 1].f = clip_planes[j][1];
buf[offset + i * 4 + 2].f = clip_planes[j][2];
buf[offset + i * 4 + 3].f = clip_planes[j][3];
i++;
}
}
}
/* vertex shader constants */
if (brw->curbe.vs_size) {
_mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters);
GLuint offset = brw->curbe.vs_start * 16;
/* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) {
buf[offset + i] = *brw->vs.prog_data->base.base.param[i];
}
}
if (0) {
for (i = 0; i < sz*16; i+=4)
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
}
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
* previous time - because eg. the urb destination may have
* changed, or the urb contents different to last time.
*
* Note that the data referred to is actually copied internally,
* not just used in place according to passed pointer.
*
* It appears that the CS unit takes care of using each available
* URB entry (Const URB Entry == CURBE) in turn, and issuing
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
emit:
/* Work around mysterious 965 hangs that appear to happen if you do
* two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween. If we
* haven't already flushed for some other reason, explicitly do so.
*
* We've found no documented reason why this should be necessary.
*/
if (brw->gen == 4 && !brw->is_g4x &&
(brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) {
BEGIN_BATCH(1);
OUT_BATCH(MI_FLUSH);
ADVANCE_BATCH();
}
/* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
* (CONSTANT_BUFFER (CURBE Load)):
*
* "Modifying the CS URB allocation via URB_FENCE invalidates any
* previous CURBE entries. Therefore software must subsequently
* [re]issue a CONSTANT_BUFFER command before CURBE data can be used
* in the pipeline."
*/
BEGIN_BATCH(2);
if (brw->curbe.total_size == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
OUT_BATCH(0);
} else {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
OUT_RELOC(brw->curbe.curbe_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
}
ADVANCE_BATCH();
}
示例11: upload_sf_state
//.........这里部分代码省略.........
break;
}
switch (ctx->Polygon.BackMode) {
case GL_FILL:
dw1 |= GEN6_SF_BACK_SOLID;
break;
case GL_LINE:
dw1 |= GEN6_SF_BACK_WIREFRAME;
break;
case GL_POINT:
dw1 |= GEN6_SF_BACK_POINT;
break;
default:
assert(0);
break;
}
dw2 = 0;
if (ctx->Polygon.CullFlag) {
switch (ctx->Polygon.CullFaceMode) {
case GL_FRONT:
dw2 |= GEN6_SF_CULL_FRONT;
break;
case GL_BACK:
dw2 |= GEN6_SF_CULL_BACK;
break;
case GL_FRONT_AND_BACK:
dw2 |= GEN6_SF_CULL_BOTH;
break;
default:
assert(0);
break;
}
} else {
dw2 |= GEN6_SF_CULL_NONE;
}
/* _NEW_SCISSOR */
if (ctx->Scissor.Enabled)
dw2 |= GEN6_SF_SCISSOR_ENABLE;
/* _NEW_LINE */
{
uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
/* TODO: line width of 0 is not allowed when MSAA enabled */
if (line_width_u3_7 == 0)
line_width_u3_7 = 1;
dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
}
if (ctx->Line.SmoothFlag) {
dw2 |= GEN6_SF_LINE_AA_ENABLE;
dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
if (ctx->Line.StippleFlag && intel->is_haswell) {
dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
}
/* _NEW_MULTISAMPLE */
if (multisampled_fbo && ctx->Multisample.Enabled)
dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
/* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select?
*/
dw3 = GEN6_SF_LINE_AA_MODE_TRUE;
/* _NEW_PROGRAM | _NEW_POINT */
if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
/* Clamp to ARB_point_parameters user limits */
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
/* Clamp to the hardware limits and convert to fixed point */
dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
/* _NEW_LIGHT */
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
dw3 |=
(2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
(2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
(1 << GEN6_SF_LINE_PROVOKE_SHIFT);
} else {
dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
}
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
OUT_BATCH(dw1);
OUT_BATCH(dw2);
OUT_BATCH(dw3);
OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */
OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
ADVANCE_BATCH();
}
示例12: brw_emit_vertices
static void brw_emit_vertices(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
GLuint i, nr_elements;
brw_prepare_vertices(brw);
brw_emit_query_begin(brw);
/* If the VS doesn't read any inputs (calculating vertex position from
* a state variable for some reason, for example), emit a single pad
* VERTEX_ELEMENT struct and bail.
*
* The stale VB state stays in place, but they don't do anything unless
* a VE loads from them.
*/
if (brw->vb.nr_enabled == 0) {
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
if (intel->gen >= 6) {
OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
} else {
OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
BRW_VE0_VALID |
(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
}
OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
CACHED_BATCH();
return;
}
/* Now emit VB and VEP state packets.
*/
if (brw->vb.nr_buffers) {
if (intel->gen >= 6) {
assert(brw->vb.nr_buffers <= 33);
} else {
assert(brw->vb.nr_buffers <= 17);
}
BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
for (i = 0; i < brw->vb.nr_buffers; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
uint32_t dw0;
if (intel->gen >= 6) {
dw0 = buffer->step_rate
? GEN6_VB0_ACCESS_INSTANCEDATA
: GEN6_VB0_ACCESS_VERTEXDATA;
dw0 |= i << GEN6_VB0_INDEX_SHIFT;
} else {
dw0 = buffer->step_rate
? BRW_VB0_ACCESS_INSTANCEDATA
: BRW_VB0_ACCESS_VERTEXDATA;
dw0 |= i << BRW_VB0_INDEX_SHIFT;
}
if (intel->gen >= 7)
dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
if (intel->gen >= 5) {
OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
} else
OUT_BATCH(0);
OUT_BATCH(buffer->step_rate);
brw->vb.current_buffers[i].handle = buffer->bo->handle;
brw->vb.current_buffers[i].offset = buffer->offset;
brw->vb.current_buffers[i].stride = buffer->stride;
brw->vb.current_buffers[i].step_rate = buffer->step_rate;
}
brw->vb.nr_current_buffers = i;
ADVANCE_BATCH();
}
nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
/* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
* for VertexID/InstanceID.
*/
if (intel->gen >= 6) {
assert(nr_elements <= 34);
} else {
assert(nr_elements <= 18);
}
BEGIN_BATCH(1 + nr_elements * 2);
OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
//.........这里部分代码省略.........
示例13: gen6_blorp_emit_depth_stencil_config
static void
gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
const brw_blorp_params *params)
{
struct gl_context *ctx = &brw->ctx;
uint32_t draw_x = params->depth.x_offset;
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
brw_get_depthstencil_tile_masks(params->depth.mt,
params->depth.level,
params->depth.layer,
NULL,
&tile_mask_x, &tile_mask_y);
/* 3DSTATE_DEPTH_BUFFER */
{
uint32_t tile_x = draw_x & tile_mask_x;
uint32_t tile_y = draw_y & tile_mask_y;
uint32_t offset =
intel_region_get_aligned_offset(params->depth.mt->region,
draw_x & ~tile_mask_x,
draw_y & ~tile_mask_y, false);
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*
* We have no guarantee that tile_x and tile_y are correctly aligned,
* since they are determined by the mipmap layout, which is only aligned
* to multiples of 4.
*
* So, to avoid hanging the GPU, just smash the low order 3 bits of
* tile_x and tile_y to 0. This is a temporary workaround until we come
* up with a better solution.
*/
WARN_ONCE((tile_x & 7) || (tile_y & 7),
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
"Truncating offset, bad rendering may occur.\n");
tile_x &= ~7;
tile_y &= ~7;
intel_emit_post_sync_nonzero_flush(brw);
intel_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
OUT_BATCH((params->depth.mt->region->pitch - 1) |
params->depth_format << 18 |
1 << 21 | /* separate stencil enable */
1 << 22 | /* hiz enable */
BRW_TILEWALK_YMAJOR << 26 |
1 << 27 | /* y-tiled */
BRW_SURFACE_2D << 29);
OUT_RELOC(params->depth.mt->region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
(params->depth.width + tile_x - 1) << 6 |
(params->depth.height + tile_y - 1) << 19);
OUT_BATCH(0);
OUT_BATCH(tile_x |
tile_y << 16);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_region,
draw_x & ~tile_mask_x,
(draw_y & ~tile_mask_y) / 2, false);
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_region->pitch - 1);
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
ADVANCE_BATCH();
}
/* 3DSTATE_STENCIL_BUFFER */
{
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
示例14: gen6_blorp_emit_wm_config
/**
* Enable or disable thread dispatch and set the HiZ op appropriately.
*/
static void
gen6_blorp_emit_wm_config(struct brw_context *brw,
const brw_blorp_params *params,
uint32_t prog_offset,
brw_blorp_prog_data *prog_data)
{
uint32_t dw2, dw4, dw5, dw6;
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
* nonzero to prevent the GPU from hanging. While the documentation doesn't
* mention this explicitly, it notes that the valid range for the field is
* [1,39] = [2,40] threads, which excludes zero.
*
* To be safe (and to minimize extraneous code) we go ahead and fully
* configure the WM state whether or not there is a WM program.
*/
dw2 = dw4 = dw5 = dw6 = 0;
switch (params->hiz_op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
dw4 |= GEN6_WM_DEPTH_CLEAR;
break;
case GEN6_HIZ_OP_DEPTH_RESOLVE:
dw4 |= GEN6_WM_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_HIZ_RESOLVE:
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_NONE:
break;
default:
assert(0);
break;
}
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
if (params->use_wm_prog) {
dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
}
if (params->num_samples > 1) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
if (prog_data && prog_data->persample_msaa_dispatch)
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
else
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
OUT_BATCH(dw2);
OUT_BATCH(0); /* No scratch needed */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* No other programs */
OUT_BATCH(0); /* No other programs */
ADVANCE_BATCH();
}
示例15: gen7_upload_ps_state
static void
gen7_upload_ps_state(struct brw_context *brw,
const struct brw_stage_state *stage_state,
const struct brw_wm_prog_data *prog_data,
bool enable_dual_src_blend, unsigned sample_mask,
unsigned fast_clear_op)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint32_t dw2, dw4, dw5, ksp0, ksp2;
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
dw2 = dw4 = dw5 = ksp2 = 0;
const unsigned sampler_count =
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
dw2 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT);
dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
if (prog_data->base.use_alt_mode)
dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
/* Haswell requires the sample mask to be set in this packet as well as
* in 3DSTATE_SAMPLE_MASK; the values should match. */
/* _NEW_BUFFERS, _NEW_MULTISAMPLE */
if (brw->is_haswell)
dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK);
dw4 |= (devinfo->max_wm_threads - 1) << max_threads_shift;
if (prog_data->base.nr_params > 0)
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
/* From the IVB PRM, volume 2 part 1, page 287:
* "This bit is inserted in the PS payload header and made available to
* the DataPort (either via the message header or via header bypass) to
* indicate that oMask data (one or two phases) is included in Render
* Target Write messages. If present, the oMask data is used to mask off
* samples."
*/
if (prog_data->uses_omask)
dw4 |= GEN7_PS_OMASK_TO_RENDER_TARGET;
/* From the IVB PRM, volume 2 part 1, page 287:
* "If the PS kernel does not need the Position XY Offsets to
* compute a Position Value, then this field should be programmed
* to POSOFFSET_NONE."
* "SW Recommendation: If the PS kernel needs the Position Offsets
* to compute a Position XY value, this field should match Position
* ZW Interpolation Mode to ensure a consistent position.xyzw
* computation."
* We only require XY sample offsets. So, this recommendation doesn't
* look useful at the moment. We might need this in future.
*/
if (prog_data->uses_pos_offset)
dw4 |= GEN7_PS_POSOFFSET_SAMPLE;
else
dw4 |= GEN7_PS_POSOFFSET_NONE;
/* The hardware wedges if you have this bit set but don't turn on any dual
* source blend factors.
*/
if (enable_dual_src_blend)
dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
/* BRW_NEW_FS_PROG_DATA */
if (prog_data->num_varying_inputs != 0)
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
dw4 |= fast_clear_op;
if (prog_data->dispatch_16)
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (prog_data->dispatch_8)
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
dw5 |= prog_data->base.dispatch_grf_start_reg <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
dw5 |= prog_data->dispatch_grf_start_reg_2 <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
ksp0 = stage_state->prog_offset;
ksp2 = stage_state->prog_offset + prog_data->prog_offset_2;
BEGIN_BATCH(8);
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
OUT_BATCH(ksp0);
OUT_BATCH(dw2);
if (prog_data->base.total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH(dw4);
OUT_BATCH(dw5);
//.........这里部分代码省略.........