/rpython/jit/metainterp/optimizeopt/schedule.py
Python | 1100 lines | 856 code | 125 blank | 119 comment | 183 complexity | e3c5c7ca4b441bf71fbcd24d5558721f MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
- from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT,
- ConstInt, ConstFloat, TargetToken)
- from rpython.jit.metainterp.resoperation import (rop, ResOperation,
- GuardResOp, VecOperation, OpHelpers, VecOperationNew,
- VectorizationInfo)
- from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
- MemoryRef, Node, IndexVar)
- from rpython.jit.metainterp.optimizeopt.renamer import Renamer
- from rpython.jit.metainterp.resume import AccumInfo
- from rpython.rlib.objectmodel import we_are_translated
- from rpython.jit.metainterp.jitexc import NotAProfitableLoop
- from rpython.rlib.objectmodel import specialize, always_inline
- from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop, NotAProfitableLoop
- from rpython.rtyper.lltypesystem.lloperation import llop
- from rpython.rtyper.lltypesystem import lltype
- def forwarded_vecinfo(op):
- fwd = op.get_forwarded()
- if fwd is None or not isinstance(fwd, VectorizationInfo):
- # the optimizer clears getforwarded AFTER
- # vectorization, it happens that this is not clean
- fwd = VectorizationInfo(op)
- if not op.is_constant():
- op.set_forwarded(fwd)
- return fwd
- class SchedulerState(object):
- def __init__(self, graph):
- self.renamer = Renamer()
- self.graph = graph
- self.oplist = []
- self.worklist = []
- self.invariant_oplist = []
- self.invariant_vector_vars = []
- self.seen = {}
- def post_schedule(self):
- loop = self.graph.loop
- self.renamer.rename(loop.jump)
- self.ensure_args_unpacked(loop.jump)
- loop.operations = self.oplist
- loop.prefix = self.invariant_oplist
- if len(self.invariant_vector_vars) + len(self.invariant_oplist) > 0:
- # label
- args = loop.label.getarglist_copy() + self.invariant_vector_vars
- opnum = loop.label.getopnum()
- op = loop.label.copy_and_change(opnum, args)
- self.renamer.rename(op)
- loop.prefix_label = op
- # jump
- args = loop.jump.getarglist_copy() + self.invariant_vector_vars
- opnum = loop.jump.getopnum()
- op = loop.jump.copy_and_change(opnum, args)
- self.renamer.rename(op)
- loop.jump = op
- def profitable(self):
- return True
- def prepare(self):
- for node in self.graph.nodes:
- if node.depends_count() == 0:
- self.worklist.insert(0, node)
- def emit(self, node, scheduler):
- # implement me in subclass. e.g. as in VecScheduleState
- return False
- def delay(self, node):
- return False
- def has_more(self):
- return len(self.worklist) > 0
- def ensure_args_unpacked(self, op):
- pass
- def post_emit(self, node):
- pass
- def pre_emit(self, node):
- pass
- class Scheduler(object):
- """ Create an instance of this class to (re)schedule a vector trace. """
- def __init__(self):
- pass
- def next(self, state):
- """ select the next candidate node to be emitted, or None """
- worklist = state.worklist
- visited = 0
- while len(worklist) > 0:
- if visited == len(worklist):
- return None
- node = worklist.pop()
- if node.emitted:
- continue
- if not self.delay(node, state):
- return node
- worklist.insert(0, node)
- visited += 1
- return None
- def try_to_trash_pack(self, state):
- # one element a pack has several dependencies pointing to
- # it thus we MUST skip this pack!
- if len(state.worklist) > 0:
- # break the first!
- i = 0
- node = state.worklist[i]
- i += 1
- while i < len(state.worklist) and not node.pack:
- node = state.worklist[i]
- i += 1
- if not node.pack:
- return False
- pack = node.pack
- for n in node.pack.operations:
- if n.depends_count() > 0:
- pack.clear()
- return True
- else:
- return False
- return False
- def delay(self, node, state):
- """ Delay this operation?
- Only if any dependency has not been resolved """
- if state.delay(node):
- return True
- return node.depends_count() != 0
- def mark_emitted(self, node, state, unpack=True):
- """ An operation has been emitted, adds new operations to the worklist
- whenever their dependency count drops to zero.
- Keeps worklist sorted (see priority) """
- worklist = state.worklist
- provides = node.provides()[:]
- for dep in provides: # COPY
- target = dep.to
- node.remove_edge_to(target)
- if not target.emitted and target.depends_count() == 0:
- # sorts them by priority
- i = len(worklist)-1
- while i >= 0:
- cur = worklist[i]
- c = (cur.priority - target.priority)
- if c < 0: # meaning itnode.priority < target.priority:
- worklist.insert(i+1, target)
- break
- elif c == 0:
- # if they have the same priority, sort them
- # using the original position in the trace
- if target.getindex() < cur.getindex():
- worklist.insert(i+1, target)
- break
- i -= 1
- else:
- worklist.insert(0, target)
- node.clear_dependencies()
- node.emitted = True
- if not node.is_imaginary():
- op = node.getoperation()
- state.renamer.rename(op)
- if unpack:
- state.ensure_args_unpacked(op)
- state.post_emit(node)
- def walk_and_emit(self, state):
- """ Emit all the operations into the oplist parameter.
- Initiates the scheduling. """
- assert isinstance(state, SchedulerState)
- while state.has_more():
- node = self.next(state)
- if node:
- if not state.emit(node, self):
- if not node.emitted:
- state.pre_emit(node)
- self.mark_emitted(node, state)
- if not node.is_imaginary():
- op = node.getoperation()
- state.seen[op] = None
- state.oplist.append(op)
- continue
- # it happens that packs can emit many nodes that have been
- # added to the scheuldable_nodes list, in this case it could
- # be that no next exists even though the list contains elements
- if not state.has_more():
- break
- if self.try_to_trash_pack(state):
- continue
- raise AssertionError("schedule failed cannot continue. possible reason: cycle")
- if not we_are_translated():
- for node in state.graph.nodes:
- assert node.emitted
- def failnbail_transformation(msg):
- msg = '%s\n' % msg
- if we_are_translated():
- llop.debug_print(lltype.Void, msg)
- else:
- import pdb; pdb.set_trace()
- raise NotImplementedError(msg)
- class TypeRestrict(object):
- ANY_TYPE = '\x00'
- ANY_SIZE = -1
- ANY_SIGN = -1
- ANY_COUNT = -1
- SIGNED = 1
- UNSIGNED = 0
- def __init__(self,
- type=ANY_TYPE,
- bytesize=ANY_SIZE,
- count=ANY_SIGN,
- sign=ANY_COUNT):
- self.type = type
- self.bytesize = bytesize
- self.sign = sign
- self.count = count
- @always_inline
- def any_size(self):
- return self.bytesize == TypeRestrict.ANY_SIZE
- @always_inline
- def any_count(self):
- return self.count == TypeRestrict.ANY_COUNT
- def check(self, value):
- vecinfo = forwarded_vecinfo(value)
- assert vecinfo.datatype != '\x00'
- if self.type != TypeRestrict.ANY_TYPE:
- if self.type != vecinfo.datatype:
- msg = "type mismatch %s != %s" % \
- (self.type, vecinfo.datatype)
- failnbail_transformation(msg)
- assert vecinfo.bytesize > 0
- if not self.any_size():
- if self.bytesize != vecinfo.bytesize:
- msg = "bytesize mismatch %s != %s" % \
- (self.bytesize, vecinfo.bytesize)
- failnbail_transformation(msg)
- assert vecinfo.count > 0
- if self.count != TypeRestrict.ANY_COUNT:
- if vecinfo.count < self.count:
- msg = "count mismatch %s < %s" % \
- (self.count, vecinfo.count)
- failnbail_transformation(msg)
- if self.sign != TypeRestrict.ANY_SIGN:
- if bool(self.sign) == vecinfo.sign:
- msg = "sign mismatch %s < %s" % \
- (self.sign, vecinfo.sign)
- failnbail_transformation(msg)
- def max_input_count(self, count):
- """ How many """
- if self.count != TypeRestrict.ANY_COUNT:
- return self.count
- return count
- class OpRestrict(object):
- def __init__(self, argument_restris):
- self.argument_restrictions = argument_restris
- def check_operation(self, state, pack, op):
- pass
- def crop_vector(self, op, newsize, size):
- return newsize, size
- def must_crop_vector(self, op, index):
- restrict = self.argument_restrictions[index]
- vecinfo = forwarded_vecinfo(op.getarg(index))
- size = vecinfo.bytesize
- newsize = self.crop_to_size(op, index)
- return not restrict.any_size() and newsize != size
- @always_inline
- def crop_to_size(self, op, index):
- restrict = self.argument_restrictions[index]
- return restrict.bytesize
- def opcount_filling_vector_register(self, op, vec_reg_size):
- """ How many operations of that kind can one execute
- with a machine instruction of register size X?
- """
- if op.is_typecast():
- if op.casts_down():
- size = op.cast_input_bytesize(vec_reg_size)
- return size // op.cast_from_bytesize()
- else:
- return vec_reg_size // op.cast_to_bytesize()
- vecinfo = forwarded_vecinfo(op)
- return vec_reg_size // vecinfo.bytesize
- class GuardRestrict(OpRestrict):
- def opcount_filling_vector_register(self, op, vec_reg_size):
- arg = op.getarg(0)
- vecinfo = forwarded_vecinfo(arg)
- return vec_reg_size // vecinfo.bytesize
- class LoadRestrict(OpRestrict):
- def opcount_filling_vector_register(self, op, vec_reg_size):
- assert rop.is_primitive_load(op.opnum)
- descr = op.getdescr()
- return vec_reg_size // descr.get_item_size_in_bytes()
- class StoreRestrict(OpRestrict):
- def __init__(self, argument_restris):
- self.argument_restrictions = argument_restris
- def must_crop_vector(self, op, index):
- vecinfo = forwarded_vecinfo(op.getarg(index))
- bytesize = vecinfo.bytesize
- return self.crop_to_size(op, index) != bytesize
- @always_inline
- def crop_to_size(self, op, index):
- # there is only one parameter that needs to be transformed!
- descr = op.getdescr()
- return descr.get_item_size_in_bytes()
- def opcount_filling_vector_register(self, op, vec_reg_size):
- assert rop.is_primitive_store(op.opnum)
- descr = op.getdescr()
- return vec_reg_size // descr.get_item_size_in_bytes()
- class OpMatchSizeTypeFirst(OpRestrict):
- def check_operation(self, state, pack, op):
- i = 0
- infos = [forwarded_vecinfo(o) for o in op.getarglist()]
- arg0 = op.getarg(i)
- while arg0.is_constant() and i < op.numargs():
- i += 1
- arg0 = op.getarg(i)
- vecinfo = forwarded_vecinfo(arg0)
- bytesize = vecinfo.bytesize
- datatype = vecinfo.datatype
- for arg in op.getarglist():
- if arg.is_constant():
- continue
- curvecinfo = forwarded_vecinfo(arg)
- if curvecinfo.bytesize != bytesize:
- raise NotAVectorizeableLoop()
- if curvecinfo.datatype != datatype:
- raise NotAVectorizeableLoop()
- class trans(object):
- TR_ANY = TypeRestrict()
- TR_ANY_FLOAT = TypeRestrict(FLOAT)
- TR_ANY_INTEGER = TypeRestrict(INT)
- TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
- TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
- TR_INT32_2 = TypeRestrict(INT, 4, 2)
- OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
- OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
- STORE_RESTRICT = StoreRestrict([None, None, TR_ANY])
- LOAD_RESTRICT = LoadRestrict([])
- GUARD_RESTRICT = GuardRestrict([TR_ANY_INTEGER])
- # note that the following definition is x86 arch specific
- MAPPING = {
- rop.VEC_INT_ADD: OR_MSTF_I,
- rop.VEC_INT_SUB: OR_MSTF_I,
- rop.VEC_INT_MUL: OR_MSTF_I,
- rop.VEC_INT_AND: OR_MSTF_I,
- rop.VEC_INT_OR: OR_MSTF_I,
- rop.VEC_INT_XOR: OR_MSTF_I,
- rop.VEC_INT_EQ: OR_MSTF_I,
- rop.VEC_INT_NE: OR_MSTF_I,
- rop.VEC_FLOAT_ADD: OR_MSTF_F,
- rop.VEC_FLOAT_SUB: OR_MSTF_F,
- rop.VEC_FLOAT_MUL: OR_MSTF_F,
- rop.VEC_FLOAT_TRUEDIV: OR_MSTF_F,
- rop.VEC_FLOAT_ABS: OpRestrict([TR_ANY_FLOAT]),
- rop.VEC_FLOAT_NEG: OpRestrict([TR_ANY_FLOAT]),
- rop.VEC_RAW_STORE: STORE_RESTRICT,
- rop.VEC_SETARRAYITEM_RAW: STORE_RESTRICT,
- rop.VEC_SETARRAYITEM_GC: STORE_RESTRICT,
- rop.VEC_RAW_LOAD_I: LOAD_RESTRICT,
- rop.VEC_RAW_LOAD_F: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_RAW_I: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_RAW_F: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_GC_I: LOAD_RESTRICT,
- rop.VEC_GETARRAYITEM_GC_F: LOAD_RESTRICT,
- rop.VEC_GUARD_TRUE: GUARD_RESTRICT,
- rop.VEC_GUARD_FALSE: GUARD_RESTRICT,
- ## irregular
- rop.VEC_INT_SIGNEXT: OpRestrict([TR_ANY_INTEGER]),
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpRestrict([TR_DOUBLE_2]),
- # weird but the trace will store single floats in int boxes
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpRestrict([TR_INT32_2]),
- rop.VEC_CAST_FLOAT_TO_INT: OpRestrict([TR_DOUBLE_2]),
- rop.VEC_CAST_INT_TO_FLOAT: OpRestrict([TR_INT32_2]),
- rop.VEC_FLOAT_EQ: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
- rop.VEC_FLOAT_NE: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
- rop.VEC_INT_IS_TRUE: OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
- }
- @staticmethod
- def get(op):
- res = trans.MAPPING.get(op.vector, None)
- if not res:
- failnbail_transformation("could not get OpRestrict for " + str(op))
- return res
- def turn_into_vector(state, pack):
- """ Turn a pack into a vector instruction """
- check_if_pack_supported(state, pack)
- state.costmodel.record_pack_savings(pack, pack.numops())
- left = pack.leftmost()
- oprestrict = trans.get(left)
- if oprestrict is not None:
- oprestrict.check_operation(state, pack, left)
- args = left.getarglist_copy()
- prepare_arguments(state, pack, args)
- vecop = VecOperation(left.vector, args, left,
- pack.numops(), left.getdescr())
- for i,node in enumerate(pack.operations):
- op = node.getoperation()
- if op.returns_void():
- continue
- state.setvector_of_box(op,i,vecop)
- if pack.is_accumulating() and not op.is_guard():
- state.renamer.start_renaming(op, vecop)
- if left.is_guard():
- prepare_fail_arguments(state, pack, left, vecop)
- state.oplist.append(vecop)
- assert vecop.count >= 1
- def prepare_arguments(state, pack, args):
- # Transforming one argument to a vector box argument
- # The following cases can occur:
- # 1) argument is present in the box_to_vbox map.
- # a) vector can be reused immediatly (simple case)
- # b) the size of the input is mismatching (crop the vector)
- # c) values are scattered in differnt registers
- # d) the operand is not at the right position in the vector
- # 2) argument is not known to reside in a vector
- # a) expand vars/consts before the label and add as argument
- # b) expand vars created in the loop body
- #
- oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
- if not oprestrict:
- return
- restrictions = oprestrict.argument_restrictions
- for i,arg in enumerate(args):
- if i >= len(restrictions) or restrictions[i] is None:
- # ignore this argument
- continue
- restrict = restrictions[i]
- if arg.returns_vector():
- restrict.check(arg)
- continue
- pos, vecop = state.getvector_of_box(arg)
- if not vecop:
- # 2) constant/variable expand this box
- expand(state, pack, args, arg, i)
- restrict.check(args[i])
- continue
- # 1)
- args[i] = vecop # a)
- assemble_scattered_values(state, pack, args, i) # c)
- crop_vector(state, oprestrict, restrict, pack, args, i) # b)
- position_values(state, restrict, pack, args, i, pos) # d)
- restrict.check(args[i])
- def prepare_fail_arguments(state, pack, left, vecop):
- assert isinstance(left, GuardResOp)
- assert isinstance(vecop, GuardResOp)
- args = left.getfailargs()[:]
- for i, arg in enumerate(args):
- pos, newarg = state.getvector_of_box(arg)
- if newarg is None:
- newarg = arg
- if newarg.is_vector(): # can be moved to guard exit!
- newarg = unpack_from_vector(state, newarg, 0, 1)
- args[i] = newarg
- vecop.setfailargs(args)
- # TODO vecop.rd_snapshot = left.rd_snapshot
- @always_inline
- def crop_vector(state, oprestrict, restrict, pack, args, i):
- # convert size i64 -> i32, i32 -> i64, ...
- arg = args[i]
- vecinfo = forwarded_vecinfo(arg)
- size = vecinfo.bytesize
- left = pack.leftmost()
- if oprestrict.must_crop_vector(left, i):
- newsize = oprestrict.crop_to_size(left, i)
- assert arg.type == 'i'
- state._prevent_signext(newsize, size)
- count = vecinfo.count
- vecop = VecOperationNew(rop.VEC_INT_SIGNEXT, [arg, ConstInt(newsize)],
- 'i', newsize, vecinfo.signed, count)
- state.oplist.append(vecop)
- state.costmodel.record_cast_int(size, newsize, count)
- args[i] = vecop
- @always_inline
- def assemble_scattered_values(state, pack, args, index):
- args_at_index = [node.getoperation().getarg(index) for node in pack.operations]
- args_at_index[0] = args[index]
- vectors = pack.argument_vectors(state, pack, index, args_at_index)
- if len(vectors) > 1:
- # the argument is scattered along different vector boxes
- args[index] = gather(state, vectors, pack.numops())
- state.remember_args_in_vector(pack, index, args[index])
- @always_inline
- def gather(state, vectors, count): # packed < packable and packed < stride:
- (_, arg) = vectors[0]
- i = 1
- while i < len(vectors):
- (newarg_pos, newarg) = vectors[i]
- vecinfo = forwarded_vecinfo(arg)
- newvecinfo = forwarded_vecinfo(newarg)
- if vecinfo.count + newvecinfo.count <= count:
- arg = pack_into_vector(state, arg, vecinfo.count, newarg, newarg_pos, newvecinfo.count)
- i += 1
- return arg
- @always_inline
- def position_values(state, restrict, pack, args, index, position):
- arg = args[index]
- vecinfo = forwarded_vecinfo(arg)
- count = vecinfo.count
- newcount = restrict.count
- if not restrict.any_count() and newcount != count:
- if position == 0:
- pass
- pass
- if position != 0:
- # The vector box is at a position != 0 but it
- # is required to be at position 0. Unpack it!
- arg = args[index]
- vecinfo = forwarded_vecinfo(arg)
- count = restrict.max_input_count(vecinfo.count)
- args[index] = unpack_from_vector(state, arg, position, count)
- state.remember_args_in_vector(pack, index, args[index])
- def check_if_pack_supported(state, pack):
- left = pack.leftmost()
- vecinfo = forwarded_vecinfo(left)
- insize = vecinfo.bytesize
- if left.is_typecast():
- # prohibit the packing of signext calls that
- # cast to int16/int8.
- state._prevent_signext(left.cast_to_bytesize(),
- left.cast_from_bytesize())
- if left.getopnum() == rop.INT_MUL:
- if insize == 8 or insize == 1:
- # see assembler for comment why
- raise NotAProfitableLoop
- def unpack_from_vector(state, arg, index, count):
- """ Extract parts of the vector box into another vector box """
- assert count > 0
- vecinfo = forwarded_vecinfo(arg)
- assert index + count <= vecinfo.count
- args = [arg, ConstInt(index), ConstInt(count)]
- vecop = OpHelpers.create_vec_unpack(arg.type, args, vecinfo.bytesize,
- vecinfo.signed, count)
- state.costmodel.record_vector_unpack(arg, index, count)
- state.oplist.append(vecop)
- return vecop
- def pack_into_vector(state, tgt, tidx, src, sidx, scount):
- """ tgt = [1,2,3,4,_,_,_,_]
- src = [5,6,_,_]
- new_box = [1,2,3,4,5,6,_,_] after the operation, tidx=4, scount=2
- """
- assert sidx == 0 # restriction
- vecinfo = forwarded_vecinfo(tgt)
- newcount = vecinfo.count + scount
- args = [tgt, src, ConstInt(tidx), ConstInt(scount)]
- vecop = OpHelpers.create_vec_pack(tgt.type, args, vecinfo.bytesize, vecinfo.signed, newcount)
- state.oplist.append(vecop)
- state.costmodel.record_vector_pack(src, sidx, scount)
- if not we_are_translated():
- _check_vec_pack(vecop)
- return vecop
- def _check_vec_pack(op):
- arg0 = op.getarg(0)
- arg1 = op.getarg(1)
- index = op.getarg(2)
- count = op.getarg(3)
- assert op.is_vector()
- assert arg0.is_vector()
- assert index.is_constant()
- assert isinstance(count, ConstInt)
- vecinfo = forwarded_vecinfo(op)
- argvecinfo = forwarded_vecinfo(arg0)
- assert argvecinfo.bytesize == vecinfo.bytesize
- if arg1.is_vector():
- assert argvecinfo.bytesize == vecinfo.bytesize
- else:
- assert count.value == 1
- assert index.value < vecinfo.count
- assert index.value + count.value <= vecinfo.count
- assert vecinfo.count > argvecinfo.count
- def expand(state, pack, args, arg, index):
- """ Expand a value into a vector box. useful for arith metic
- of one vector with a scalar (either constant/varialbe)
- """
- left = pack.leftmost()
- box_type = arg.type
- expanded_map = state.expanded_map
- ops = state.invariant_oplist
- variables = state.invariant_vector_vars
- if not arg.is_constant() and arg not in state.inputargs:
- # cannot be created before the loop, expand inline
- ops = state.oplist
- variables = None
- for i, node in enumerate(pack.operations):
- op = node.getoperation()
- if not arg.same_box(op.getarg(index)):
- break
- i += 1
- else:
- # note that heterogenous nodes are not yet tracked
- vecop = state.find_expanded([arg])
- if vecop:
- args[index] = vecop
- return vecop
- left = pack.leftmost()
- vecinfo = forwarded_vecinfo(left)
- vecop = OpHelpers.create_vec_expand(arg, vecinfo.bytesize, vecinfo.signed, pack.numops())
- ops.append(vecop)
- if variables is not None:
- variables.append(vecop)
- state.expand([arg], vecop)
- args[index] = vecop
- return vecop
- # quick search if it has already been expanded
- expandargs = [op.getoperation().getarg(index) for op in pack.operations]
- vecop = state.find_expanded(expandargs)
- if vecop:
- args[index] = vecop
- return vecop
- arg_vecinfo = forwarded_vecinfo(arg)
- vecop = OpHelpers.create_vec(arg.type, arg_vecinfo.bytesize, arg_vecinfo.signed, pack.opnum())
- ops.append(vecop)
- for i,node in enumerate(pack.operations):
- op = node.getoperation()
- arg = op.getarg(index)
- arguments = [vecop, arg, ConstInt(i), ConstInt(1)]
- vecinfo = forwarded_vecinfo(vecop)
- vecop = OpHelpers.create_vec_pack(arg.type, arguments, vecinfo.bytesize,
- vecinfo.signed, vecinfo.count+1)
- ops.append(vecop)
- state.expand(expandargs, vecop)
- if variables is not None:
- variables.append(vecop)
- args[index] = vecop
- class VecScheduleState(SchedulerState):
- def __init__(self, graph, packset, cpu, costmodel):
- SchedulerState.__init__(self, graph)
- self.box_to_vbox = {}
- self.cpu = cpu
- self.vec_reg_size = cpu.vector_register_size
- self.expanded_map = {}
- self.costmodel = costmodel
- self.inputargs = {}
- self.packset = packset
- for arg in graph.loop.inputargs:
- self.inputargs[arg] = None
- self.accumulation = {}
- def expand(self, args, vecop):
- index = 0
- if len(args) == 1:
- # loop is executed once, thus sets -1 as index
- index = -1
- for arg in args:
- self.expanded_map.setdefault(arg, []).append((vecop, index))
- index += 1
- def find_expanded(self, args):
- if len(args) == 1:
- candidates = self.expanded_map.get(args[0], [])
- for (vecop, index) in candidates:
- if index == -1:
- # found an expanded variable/constant
- return vecop
- return None
- possible = {}
- for i, arg in enumerate(args):
- expansions = self.expanded_map.get(arg, [])
- candidates = [vecop for (vecop, index) in expansions \
- if i == index and possible.get(vecop,True)]
- for vecop in candidates:
- for key in possible.keys():
- if key not in candidates:
- # delete every not possible key,value
- possible[key] = False
- # found a candidate, append it if not yet present
- possible[vecop] = True
- if not possible:
- # no possibility left, this combination is not expanded
- return None
- for vecop,valid in possible.items():
- if valid:
- return vecop
- return None
- def post_emit(self, node):
- pass
- def pre_emit(self, node):
- op = node.getoperation()
- if op.is_guard():
- # add accumulation info to the descriptor
- failargs = op.getfailargs()[:]
- descr = op.getdescr()
- # note: stitching a guard must resemble the order of the label
- # otherwise a wrong mapping is handed to the register allocator
- for i,arg in enumerate(failargs):
- if arg is None:
- continue
- accum = self.accumulation.get(arg, None)
- if accum:
- from rpython.jit.metainterp.compile import AbstractResumeGuardDescr
- assert isinstance(accum, AccumPack)
- assert isinstance(descr, AbstractResumeGuardDescr)
- info = AccumInfo(i, arg, accum.operator)
- descr.attach_vector_info(info)
- seed = accum.getleftmostseed()
- failargs[i] = self.renamer.rename_map.get(seed, seed)
- op.setfailargs(failargs)
- def profitable(self):
- return self.costmodel.profitable()
- def prepare(self):
- SchedulerState.prepare(self)
- self.packset.accumulate_prepare(self)
- for arg in self.graph.loop.label.getarglist():
- self.seen[arg] = None
- def emit(self, node, scheduler):
- """ If you implement a scheduler this operations is called
- to emit the actual operation into the oplist of the scheduler.
- """
- if node.pack:
- assert node.pack.numops() > 1
- for node in node.pack.operations:
- self.pre_emit(node)
- scheduler.mark_emitted(node, self, unpack=False)
- turn_into_vector(self, node.pack)
- return True
- return False
- def delay(self, node):
- if node.pack:
- pack = node.pack
- if pack.is_accumulating():
- for node in pack.operations:
- for dep in node.depends():
- if dep.to.pack is not pack:
- return True
- else:
- for node in pack.operations:
- if node.depends_count() > 0:
- return True
- return False
- def ensure_args_unpacked(self, op):
- """ If a box is needed that is currently stored within a vector
- box, this utility creates a unpacking instruction.
- """
- # unpack for an immediate use
- for i, argument in enumerate(op.getarglist()):
- if not argument.is_constant():
- arg = self.ensure_unpacked(i, argument)
- if argument is not arg:
- op.setarg(i, arg)
- # unpack for a guard exit
- if op.is_guard():
- # could be moved to the guard exit
- fail_args = op.getfailargs()
- for i, argument in enumerate(fail_args):
- if argument and not argument.is_constant():
- arg = self.ensure_unpacked(i, argument)
- if argument is not arg:
- fail_args[i] = arg
- op.setfailargs(fail_args)
- def ensure_unpacked(self, index, arg):
- if arg in self.seen or arg.is_vector():
- return arg
- (pos, var) = self.getvector_of_box(arg)
- if var:
- if var in self.invariant_vector_vars:
- return arg
- if arg in self.accumulation:
- return arg
- args = [var, ConstInt(pos), ConstInt(1)]
- vecinfo = forwarded_vecinfo(var)
- vecop = OpHelpers.create_vec_unpack(var.type, args, vecinfo.bytesize,
- vecinfo.signed, 1)
- self.renamer.start_renaming(arg, vecop)
- self.seen[vecop] = None
- self.costmodel.record_vector_unpack(var, pos, 1)
- self.oplist.append(vecop)
- return vecop
- return arg
- def _prevent_signext(self, outsize, insize):
- if insize != outsize:
- if outsize < 4 or insize < 4:
- raise NotAProfitableLoop
- def getvector_of_box(self, arg):
- return self.box_to_vbox.get(arg, (-1, None))
- def setvector_of_box(self, var, off, vector):
- if var.returns_void():
- assert 0, "not allowed to rename void resop"
- vecinfo = forwarded_vecinfo(vector)
- assert off < vecinfo.count
- assert not var.is_vector()
- self.box_to_vbox[var] = (off, vector)
- def remember_args_in_vector(self, pack, index, box):
- arguments = [op.getoperation().getarg(index) for op in pack.operations]
- for i,arg in enumerate(arguments):
- vecinfo = forwarded_vecinfo(arg)
- if i >= vecinfo.count:
- break
- self.setvector_of_box(arg, i, box)
- class Pack(object):
- """ A pack is a set of n statements that are:
- * isomorphic
- * independent
- """
- FULL = 0
- _attrs_ = ('operations', 'accumulator', 'operator', 'position')
- operator = '\x00'
- position = -1
- accumulator = None
- def __init__(self, ops):
- self.operations = ops
- self.update_pack_of_nodes()
- def numops(self):
- return len(self.operations)
- @specialize.arg(1)
- def leftmost(self, node=False):
- if node:
- return self.operations[0]
- return self.operations[0].getoperation()
- @specialize.arg(1)
- def rightmost(self, node=False):
- if node:
- return self.operations[-1]
- return self.operations[-1].getoperation()
- def pack_type(self):
- ptype = self.input_type
- if self.input_type is None:
- # load does not have an input type, but only an output type
- ptype = self.output_type
- return ptype
- def input_byte_size(self):
- """ The amount of bytes the operations need with the current
- entries in self.operations. E.g. cast_singlefloat_to_float
- takes only #2 operations.
- """
- return self._byte_size(self.input_type)
- def output_byte_size(self):
- """ The amount of bytes the operations need with the current
- entries in self.operations. E.g. vec_load(..., descr=short)
- with 10 operations returns 20
- """
- return self._byte_size(self.output_type)
- def pack_load(self, vec_reg_size):
- """ Returns the load of the pack a vector register would hold
- just after executing the operation.
- returns: < 0 - empty, nearly empty
- = 0 - full
- > 0 - overloaded
- """
- left = self.leftmost()
- if left.returns_void():
- if rop.is_primitive_store(left.opnum):
- # make this case more general if it turns out this is
- # not the only case where packs need to be trashed
- descr = left.getdescr()
- bytesize = descr.get_item_size_in_bytes()
- return bytesize * self.numops() - vec_reg_size
- else:
- assert left.is_guard() and left.getopnum() in \
- (rop.GUARD_TRUE, rop.GUARD_FALSE)
- vecinfo = forwarded_vecinfo(left.getarg(0))
- bytesize = vecinfo.bytesize
- return bytesize * self.numops() - vec_reg_size
- return 0
- if self.numops() == 0:
- return -1
- if left.is_typecast():
- # casting is special, often only takes a half full vector
- if left.casts_down():
- # size is reduced
- size = left.cast_input_bytesize(vec_reg_size)
- return left.cast_from_bytesize() * self.numops() - size
- else:
- # size is increased
- #size = left.cast_input_bytesize(vec_reg_size)
- return left.cast_to_bytesize() * self.numops() - vec_reg_size
- vecinfo = forwarded_vecinfo(left)
- return vecinfo.bytesize * self.numops() - vec_reg_size
- def is_full(self, vec_reg_size):
- """ If one input element times the opcount is equal
- to the vector register size, we are full!
- """
- return self.pack_load(vec_reg_size) == Pack.FULL
- def opnum(self):
- assert len(self.operations) > 0
- return self.operations[0].getoperation().getopnum()
- def clear(self):
- for node in self.operations:
- node.pack = None
- node.pack_position = -1
- def update_pack_of_nodes(self):
- for i,node in enumerate(self.operations):
- node.pack = self
- node.pack_position = i
- def split(self, packlist, vec_reg_size):
- """ Combination phase creates the biggest packs that are possible.
- In this step the pack is reduced in size to fit into an
- vector register.
- """
- before_count = len(packlist)
- pack = self
- while pack.pack_load(vec_reg_size) > Pack.FULL:
- pack.clear()
- oplist, newoplist = pack.slice_operations(vec_reg_size)
- pack.operations = oplist
- pack.update_pack_of_nodes()
- if not pack.leftmost().is_typecast():
- assert pack.is_full(vec_reg_size)
- #
- newpack = pack.clone(newoplist)
- load = newpack.pack_load(vec_reg_size)
- if load >= Pack.FULL:
- pack.update_pack_of_nodes()
- pack = newpack
- packlist.append(newpack)
- else:
- newpack.clear()
- newpack.operations = []
- break
- pack.update_pack_of_nodes()
- def opcount_filling_vector_register(self, vec_reg_size):
- left = self.leftmost()
- oprestrict = trans.get(left)
- return oprestrict.opcount_filling_vector_register(left, vec_reg_size)
- def slice_operations(self, vec_reg_size):
- count = self.opcount_filling_vector_register(vec_reg_size)
- assert count > 0
- newoplist = self.operations[count:]
- oplist = self.operations[:count]
- assert len(newoplist) + len(oplist) == len(self.operations)
- assert len(newoplist) != 0
- return oplist, newoplist
- def rightmost_match_leftmost(self, other):
- """ Check if pack A can be combined with pack B """
- assert isinstance(other, Pack)
- rightmost = self.operations[-1]
- leftmost = other.operations[0]
- # if it is not accumulating it is valid
- if self.is_accumulating():
- if not other.is_accumulating():
- return False
- elif self.position != other.position:
- return False
- return rightmost is leftmost
- def argument_vectors(self, state, pack, index, pack_args_index):
- vectors = []
- last = None
- for arg in pack_args_index:
- pos, vecop = state.getvector_of_box(arg)
- if vecop is not last and vecop is not None:
- vectors.append((pos, vecop))
- last = vecop
- return vectors
- def __repr__(self):
- if len(self.operations) == 0:
- return "Pack(empty)"
- packs = self.operations[0].op.getopname() + '[' + ','.join(['%2d' % (o.opidx) for o in self.operations]) + ']'
- if self.operations[0].op.getdescr():
- packs += 'descr=' + str(self.operations[0].op.getdescr())
- return "Pack(%dx %s)" % (self.numops(), packs)
- def is_accumulating(self):
- return False
- def clone(self, oplist):
- return Pack(oplist)
- class Pair(Pack):
- """ A special Pack object with only two statements. """
- def __init__(self, left, right):
- assert isinstance(left, Node)
- assert isinstance(right, Node)
- Pack.__init__(self, [left, right])
- def __eq__(self, other):
- if isinstance(other, Pair):
- return self.left is other.left and \
- self.right is other.right
- class AccumPack(Pack):
- SUPPORTED = { rop.FLOAT_ADD: '+',
- rop.INT_ADD: '+',
- rop.FLOAT_MUL: '*',
- }
- def __init__(self, nodes, operator, position):
- Pack.__init__(self, nodes)
- self.operator = operator
- self.position = position
- def getdatatype(self):
- accum = self.leftmost().getarg(self.position)
- vecinfo = forwarded_vecinfo(accum)
- return vecinfo.datatype
- def getbytesize(self):
- accum = self.leftmost().getarg(self.position)
- vecinfo = forwarded_vecinfo(accum)
- return vecinfo.bytesize
- def getleftmostseed(self):
- return self.leftmost().getarg(self.position)
- def getseeds(self):
- """ The accumulatoriable holding the seed value """
- return [op.getoperation().getarg(self.position) for op in self.operations]
- def reduce_init(self):
- if self.operator == '*':
- return 1
- return 0
- def is_accumulating(self):
- return True
- def clone(self, oplist):
- return AccumPack(oplist, self.operator, self.position)