LLVM学习笔记(12)

来源:互联网 发布:超融合与云计算的区别 编辑:程序博客网 时间:2024/06/05 11:47

3.3.5.  信息的完善

在RegisterInfoEmitter::run,接下来执行CodeGenRegBank::computeDerivedInfo方法。

1802   void CodeGenRegBank::computeDerivedInfo(){

1803     computeComposites();

1804     computeSubRegLaneMasks();

1805  

1806     // Compute aweight for each register unit created during getSubRegs.

1807     // This maycreate adopted register units (with unit # >= NumNativeRegUnits).

1808     computeRegUnitWeights();

1809  

1810     // Compute aunique set of RegUnitSets. One for each RegClass and inferred

1811     // supersets forthe union of overlapping sets.

1812     computeRegUnitSets();

1813  

1814     computeRegUnitLaneMasks();

1815  

1816     // Computeregister class HasDisjunctSubRegs flag.

1817     for(CodeGenRegisterClass &RC : RegClasses) {

1818       RC.HasDisjunctSubRegs = false;

1819       for (const CodeGenRegister *Reg : RC.getMembers())

1820         RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs;

1821     }

1822  

1823     // Get the weightof each set.

1824     for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)

1825       RegUnitSets[Idx].Weight =getRegUnitSetWeight(RegUnitSets[Idx].Units);

1826  

1827     // Find the orderof each set.

1828     RegUnitSetOrder.reserve(RegUnitSets.size());

1829     for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)

1830       RegUnitSetOrder.push_back(Idx);

1831  

1832     std::stable_sort(RegUnitSetOrder.begin(),RegUnitSetOrder.end(),

1833                      [this](unsignedID1, unsigned ID2) {

1834       returngetRegPressureSet(ID1).Units.size() <

1835              getRegPressureSet(ID2).Units.size();

1836     });

1837     for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {

1838       RegUnitSets[RegUnitSetOrder[Idx]].Order =Idx;

1839     }

1840   }

CodeGenRegBank::computeDerivedInfo方法计算的所谓衍生信息包括以下内容。

3.3.5.1.       完整的索引信息

前面我们已经建立了复合索引,但这些索引不一定完整,比如可用孙子寄存器的复合索引就没有认真构建过。下面就要为所有可用的寄存器建立起完整的索引系统。

1117   void CodeGenRegBank::computeComposites(){

1118     // Keep track ofTopoSigs visited. We only need to visit each TopoSig once,

1119     // and manyregisters will share TopoSigs on regular architectures.

1120     BitVector TopoSigs(getNumTopoSigs());

1121  

1122     for (const auto &Reg1: Registers) {

1123       // Skipidentical subreg structures already processed.

1124       if (TopoSigs.test(Reg1.getTopoSig()))

1125         continue;

1126       TopoSigs.set(Reg1.getTopoSig());

1127  

1128       constCodeGenRegister::SubRegMap &SRM1 = Reg1.getSubRegs();

1129       for(CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),

1130            e1 = SRM1.end(); i1 != e1; ++i1) {

1131         CodeGenSubRegIndex *Idx1 = i1->first;

1132         CodeGenRegister *Reg2 = i1->second;

1133         // Ignoreidentity compositions.

1134         if (&Reg1 == Reg2)

1135           continue;

1136         constCodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs();

1137         // Trycomposing Idx1 with another SubRegIndex.

1138         for(CodeGenRegister::SubRegMap::const_iterator i2 = SRM2.begin(),

1139              e2 = SRM2.end(); i2 != e2; ++i2) {

1140           CodeGenSubRegIndex *Idx2 =i2->first;

1141           CodeGenRegister *Reg3 = i2->second;

1142           // Ignoreidentity compositions.

1143           if (Reg2 == Reg3)

1144             continue;

1145           // OKReg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3.

1146           CodeGenSubRegIndex *Idx3 =Reg1.getSubRegIndex(Reg3);

1147           assert(Idx3&& "Sub-register doesn't have an index");

1148  

1149           //Conflicting composition? Emit a warning but allow it.

1150           if (CodeGenSubRegIndex *Prev =Idx1->addComposite(Idx2,Idx3))

1151             PrintWarning(Twine("SubRegIndex") + Idx1->getQualifiedName() +

1152                          " and " +Idx2->getQualifiedName() +

1153                          " composeambiguously as " + Prev->getQualifiedName() +

1154                          " or " +Idx3->getQualifiedName());

1155         }

1156       }

1157     }

1158   }

这里的处理是遍历所有的CodeGenRegister实例,如果已知AàB,BàC的索引(C是B的子寄存器,B是A的子寄存器),建立AàC的(复合)索引。如此一来,绝不会有漏网之鱼。

接下来,我们需要让TableGen能有办法高效、简单地表示这些关系。比如,A覆盖了C。

CodeGenSubRegIndex定义里设置了LaneMask域。大致上LaneMask表示寄存器中可分割的部分。叶子索引(Composed为空)在LaneMask中会设置一个比特,位置是它自己在SubRegIndices容器里出现的次序(1174行),序号超过31的索引将共享同一个LaneMask比特(1187行),表示寄存器中一个特定的部分。而Composed非空索引的LaneMask则是它构成的复合索引的LaneMask的集合(1189行首先将它们的LaneMask置0),表示它所代表的寄存器部分覆盖了哪些更小的部分。

1167   void CodeGenRegBank::computeSubRegLaneMasks(){

1168     // First assignindividual bits to all the leaf indices.

1169     unsigned Bit = 0;

1170     // Determine maskof lanes that cover their registers.

1171     CoveringLanes = ~0u;

1172     for (auto &Idx : SubRegIndices) {

1173       if (Idx.getComposites().empty()) {

1174         Idx.LaneMask = 1u << Bit;

1175         // Share bit31 in the unlikely case there are more than 32 leafs.

1176         //

1177         // Sharingbits is harmless; it allows graceful degradation in targets

1178         // with morethan 32 vector lanes. They simply get a limited resolution

1179         // view oflanes beyond the 32nd.

1180         //

1181         // See alsothe comment for getSubRegIndexLaneMask().

1182         if (Bit < 31)

1183           ++Bit;

1184         else

1185           // Once bit31 is shared among multiple leafs, the 'lane' it represents

1186           // is nolonger covering its registers.

1187           CoveringLanes &= ~(1u <<Bit);

1188       } else {

1189         Idx.LaneMask = 0;

1190       }

1191     }

1192  

1193     // Computetransformation sequences for composeSubRegIndexLaneMask. The idea

1194     // here is thatfor each possible target subregister we look at the leafs

1195     // in thesubregister graph that compose for this target and create

1196     // transformationsequences for the lanemasks. Each step in the sequence

1197     // consists of abitmask and a bitrotate operation. As the rotation amounts

1198     // are usuallythe same for many subregisters we can easily combine the steps

1199     // by combiningthe masks.

1200     for (const auto &Idx :SubRegIndices) {

1201       const auto &Composites = Idx.getComposites();

1202       auto&LaneTransforms = Idx.CompositionLaneMaskTransform;

1203       // Go throughall leaf subregisters and find the ones that compose with Idx.

1204       // These makeout all possible valid bits in the lane mask we want to

1205       // transform.Looking only at the leafs ensure that only a single bit in

1206       // the mask isset.

1207       unsigned NextBit = 0;

1208       for (auto &Idx2 : SubRegIndices) {

1209         // Skipnon-leaf subregisters.

1210         if (!Idx2.getComposites().empty())

1211           continue;

1212         // Replicatethe behaviour from the lane mask generation loop above.

1213         unsigned SrcBit = NextBit;

1214         unsigned SrcMask = 1u << SrcBit;

1215         if (NextBit < 31)

1216           ++NextBit;

1217         assert(Idx2.LaneMask== SrcMask);

1218  

1219         // Get thecomposed subregister if there is any.

1220         auto C =Composites.find(&Idx2);

1221         if (C == Composites.end())

1222           continue;

1223         constCodeGenSubRegIndex *Composite = C->second;

1224         // TheComposed subreg should be a leaf subreg too

1225         assert(Composite->getComposites().empty());

1226  

1227         // CreateMask+Rotate operation and merge with existing ops if possible.

1228         unsigned DstBit =Log2_32(Composite->LaneMask);

1229         int Shift = DstBit - SrcBit;

1230         uint8_t RotateLeft = Shift >= 0 ?(uint8_t)Shift : 32+Shift;

1231         for (auto &I : LaneTransforms) {

1232           if (I.RotateLeft == RotateLeft) {

1233             I.Mask |= SrcMask;

1234             SrcMask = 0;

1235           }

1236         }

1237         if (SrcMask != 0) {

1238           MaskRolPair MaskRol = { SrcMask,RotateLeft };

1239           LaneTransforms.push_back(MaskRol);

1240         }

1241       }

1242       // Optimize ifthe transformation consists of one step only: Set mask to

1243       // 0xffffffff(including some irrelevant invalid bits) so that it should

1244       // merge withmore entries later while compressing the table.

1245       if (LaneTransforms.size() == 1)

1246         LaneTransforms[0].Mask = ~0u;

1247  

1248       // Furthercompression optimization: For invalid compositions resulting

1249       // in asequence with 0 entries we can just pick any other. Choose

1250       // Mask0xffffffff with Rotation 0.

1251       if (LaneTransforms.size() == 0) {

1252         MaskRolPair P = { ~0u, 0 };

1253         LaneTransforms.push_back(P);

1254       }

1255     }

1256  

1257     // FIXME: What ifad-hoc aliasing introduces overlaps that aren't represented

1258     // by thesub-register graph? This doesn't occur in any known targets.

1259  

1260     // Inherit lanesfrom composites.

1261     for (const auto &Idx :SubRegIndices) {

1262       unsigned Mask = Idx.computeLaneMask();

1263       // If somesuper-registers without CoveredBySubRegs use this index, we can

1264       // no longerassume that the lanes are covering their registers.

1265       if (!Idx.AllSuperRegsCovered)

1266         CoveringLanes &= ~Mask;

1267     }

1268  

1269     // Compute lane maskcombinations for register classes.

1270     for (auto &RegClass : RegClasses) {

1271       unsigned LaneMask = 0;

1272       for (const auto&SubRegIndex : SubRegIndices) {

1273         if(RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr)

1274           continue;

1275         LaneMask |= SubRegIndex.LaneMask;

1276       }

1277       RegClass.LaneMask = LaneMask;

1278     }

1279   }

CodeGenSubRegIndex的另一个域CompositionLaneMaskTransform是类型为mutableSmallVector< MaskRolPair,1>的容器,其中MaskRolPair是这样定义的:

42          struct MaskRolPair {

43            unsigned Mask;

44            uint8_t RotateLeft;

45            bool operator==(const MaskRolPair Other) const{

46              returnMask == Other.Mask && RotateLeft == Other.RotateLeft;

47            }

48            bool operator!=(const MaskRolPair Other) const{

49              returnMask != Other.Mask || RotateLeft != Other.RotateLeft;

50            }

51          };

对Composed为空的索引,CompositionLaneMaskTransform没有用。但对Composed非空的索引,以A+B=C为例,A要记录这样一项:“ (B的LaneMask, 从B的LaneMask得到C的LaneMask所需的向左偏转比特数)”。显然,这种情形下,B和C的Composed都必须为空。

1262行循环开始对所有Composed非空的寄存器索引计算LaneMask。这时Composed为空的索引的LaneMask已经设置好了,Composed非空的寄存器的LaneMask则是0。我们希望Composed非空的索引的LaneMask是所有从它得到的复合索引LaneMask的合集,这由computeLaneMask方法实现。

85        unsigned CodeGenSubRegIndex::computeLaneMask()const {

86          // Alreadycomputed?

87          if (LaneMask)

88            returnLaneMask;

89       

90          // Recursionguard, shouldn't be required.

91          LaneMask = ~0u;

92       

93          // The lane maskis simply the union of all sub-indices.

94          unsigned M = 0;

95          for (const auto &C :Composed)

96            M |= C.second->computeLaneMask();

97          assert(M&& "Missing lane mask, sub-register cycle?");

98          LaneMask = M;

99          returnLaneMask;

100      }

CodeGenRegisterClass也有一个LaneMask成员。它是该类所有寄存器包含的所有寄存器索引的LaneMask的合集。最后,在1270行循环设置它们。

3.3.5.2.       寄存器单元的权重

前面为每个最小可援引的寄存器配了一个RegUnit实例,而这些寄存器的父寄存器则使用子寄存器的RegUnit实例。这些RegUnit实例将产生用于寄存器相干与压力模型的数据。其中RegUnit的Weight域表示该单元的权重,无特殊情况Weight是1,包含多个寄存器单元的寄存器的权重就是它们的总和。RegUnit的Weight值还用于计算寄存器类的权重。不过有些寄存器属于多个寄存器类,因此这些类要一起考虑,其中一个重要的考量是属于同一个寄存器类的寄存器权重应该一致(不过下面可以看到,只能做到尽量一致),并由此调整RegUnit的权重为后面构建RegUnit同类集做好准备。

1489   void CodeGenRegBank::computeRegUnitWeights(){

1490     std::vector<UberRegSet> UberSets;

1491     std::vector<UberRegSet*>RegSets(Registers.size());

1492     computeUberSets(UberSets,RegSets, *this);

1493     // UberSets andRegSets are now immutable.

1494  

1495     computeUberWeights(UberSets,*this);

1496  

1497     // Iterate overeach Register, normalizing the unit weights until reaching

1498     // a fix point.

1499     unsigned NumIters = 0;

1500     for (boolChanged = true; Changed; ++NumIters) {

1501       assert(NumIters<= NumNativeRegUnits && "Runaway register unit weights");

1502       Changed = false;

1503       for (auto &Reg : Registers) {

1504         CodeGenRegister::RegUnitList NormalUnits;

1505         SparseBitVector<> NormalRegs;

1506         Changed |= normalizeWeight(&Reg,UberSets, RegSets, NormalRegs,

1507                                    NormalUnits, *this);

1508       }

1509     }

1510   }

因此,首先对寄存器进行划分(以寄存器类为依据),分为若干不相关的寄存器集合。然后,对每个集合中的寄存器计算其权重,选出其中最大的权重作为集合的权重。最后,将集合中寄存器的权重尽量调整为集合的权重一致。

在1490行出现的UberRegSet定义如下:

1298   structUberRegSet{

1299     CodeGenRegister::Vec Regs;

1300     unsigned Weight;

1301     CodeGenRegister::RegUnitList SingularDeterminants;

1302  

1303     UberRegSet(): Weight(0) {}

1304   };

每个UberRegSet是包含相同寄存器的寄存器类的联合传递闭包。所有UberRegSet合在一起构成了所有寄存器的一个划分。每个UberRegSet实例也称为一个同类集,computeUberSets方法使用同类集算法进行寄存器划分。

1311   static voidcomputeUberSets(std::vector<UberRegSet>&UberSets,

1312                               std::vector<UberRegSet*>&RegSets,

1313                               CodeGenRegBank&RegBank) {

1314  

1315     const auto &Registers = RegBank.getRegisters();

1316  

1317     // The RegisterEnumValue is one greater than its index into Registers.

1318     assert(Registers.size()== Registers.back().EnumValue &&

1319            "register enum valuemismatch");

1320  

1321     // Forsimplicitly make the SetID the same as EnumValue.

1322     IntEqClasses UberSetIDs(Registers.size()+1);

1323     std::set<unsigned> AllocatableRegs;

1324     for (auto &RegClass : RegBank.getRegClasses()) {

1325       if (!RegClass.Allocatable)

1326         continue;

1327  

1328       constCodeGenRegister::Vec &Regs = RegClass.getMembers();

1329       if (Regs.empty())

1330         continue;

1331  

1332       unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue);

1333       assert(USetID&& "register number 0 is invalid");

1334  

1335      AllocatableRegs.insert((*Regs.begin())->EnumValue);

1336       for (auto I = std::next(Regs.begin()), E = Regs.end(); I!= E; ++I) {

1337        AllocatableRegs.insert((*I)->EnumValue);

1338         UberSetIDs.join(USetID,(*I)->EnumValue);

1339       }

1340     }

1341     // Combinenon-allocatable regs.

1342     for (const auto &Reg :Registers) {

1343       unsigned RegNum = Reg.EnumValue;

1344       if (AllocatableRegs.count(RegNum))

1345         continue;

1346  

1347       UberSetIDs.join(0,RegNum);

1348     }

1349     UberSetIDs.compress();

1350  

1351     // Make the firstUberSet a special unallocatable set.

1352     unsigned ZeroID = UberSetIDs[0];

1353  

1354     // InsertRegisters into the UberSets formed by union-find.

1355     // Do not resizeafter this.

1356     UberSets.resize(UberSetIDs.getNumClasses());

1357     unsigned i = 0;

1358     for (const CodeGenRegister &Reg : Registers) {

1359       unsigned USetID =UberSetIDs[Reg.EnumValue];

1360       if (!USetID)

1361         USetID = ZeroID;

1362       else if (USetID == ZeroID)

1363         USetID = 0;

1364  

1365       UberRegSet *USet = &UberSets[USetID];

1366       USet->Regs.push_back(&Reg);

1367       sortAndUniqueRegisters(USet->Regs);

1368       RegSets[i++] = USet;

1369     }

1370   }

在1322行,调用IntEqClasses构造函数来初始化一个IntEqClasses实例。这也是一个同类集,但保存的是寄存器对象的EnumValue,寄存器的EnumValue对这个同类集算法是重要的,因为我们需要知道一个同类集中最小的寄存器,而恰好它的EnumValue也是最小的(这是我们在TD文件里必须遵循的规则:按从小到大的次序定义寄存器)。

42          IntEqClasses(unsigned N = 0) : NumClasses(0){grow(N); }

IntEqClasses在容器EC里保存同类集,这个容器的类型是SmallVector<unsigned,8>。在同类集中存在一个元素作为这个同类集的代表,EC这样记录同类集,以某个元素的值为下标,EC[elem]就是该同类集的代表元素,代表元素的EC项映射回自己。

在同类集扩展期间,EC的代表元素是同类集中较小的成员。在同类集成型后,代表元素将是同类集中最小的成员(即最小的寄存器)。

25        void IntEqClasses::grow(unsignedN) {

26          assert(NumClasses== 0 && "grow() called after compress().");

27          EC.reserve(N);

28          while(EC.size() < N)

29            EC.push_back(EC.size());

30        }

方法grow准备初始同类集,一开始每个寄存器划归一个独立的同类集。接下来,以寄存器类为依据,合并属于同一个寄存器类的寄存器的同类集。一个同类集至少对应一个寄存器类,在寄存器类有重叠时,会对应多个寄存器类。

1332行的findLeader方法查找这个同类集的代表元素:

46        unsigned IntEqClasses::findLeader(unsigneda) const {

47          assert(NumClasses== 0 && "findLeader() called after compress().");

48          while (a !=EC[a])

49            a = EC[a];

50          return a;

51        }

在1336行的循环里,join方法将两个同类集合二为一。假设一开始有两个同类集:EC[5]= 5,EC[3]= 3。那么下面39行的循环会产生这样的EC:EC[5] =3,EC[3]= 3。接下来,如果这个同类集(现在有两个成员)要与同类集EC[4]= 4合并,那么39行的循环会产生这样的结果:EC[5]= 3,EC[4]= 3,EC[3]= 3。

32        void IntEqClasses::join(unsigneda, unsigned b) {

33          assert(NumClasses== 0 && "join() called after compress().");

34          unsigned eca = EC[a];

35          unsigned ecb = EC[b];

36          // Updatepointers while searching for the leaders, compressing the paths

37          // incrementally.The larger leader will eventually be updated, joining the

38          // classes.

39          while (eca !=ecb)

40            if (eca < ecb)

41              EC[b] = eca, b = ecb, ecb = EC[b];

42            else

43              EC[a] = ecb, a = eca, eca = EC[a];

44        }

最后,1342行循环将不可分配的寄存器归入同类集0(EC[0])。

前面的处理不能保证同类集的元素都映射到代表,比如同类集EC[5] = 3,EC[4] =3,EC[3]= 3在与同类集EC[1]= 1合并时,只会产生结果:EC[5]= 3,EC[4]= 3,EC[3]= 1,EC[1]= 1。EC[5]与EC[4]并没有映射到代表,因此需要compress方法予以更正。注意,“EC[i]== i”是一个同类集代表,该表达式成立的次数就是同类集的个数。

53        void IntEqClasses::compress(){

54          if (NumClasses)

55            return;

56          for (unsignedi = 0, e = EC.size(); i != e; ++i)

57            EC[i] = (EC[i] == i) ? NumClasses++ :EC[EC[i]];

58        }

1358行循环生成以CodeGenRegister对象为内容的同类集对象——UberRegSet实例,容器RegSets将寄存器关联到所属同类集。接下来调用computeUberWeights计算这些UberRegSet的权重数据。

1373   static voidcomputeUberWeights(std::vector<UberRegSet>&UberSets,

1374                                  CodeGenRegBank&RegBank) {

1375     // Skip the firstunallocatable set.

1376     for(std::vector<UberRegSet>::iterator I = std::next(UberSets.begin()),

1377            E = UberSets.end(); I != E; ++I) {

1378  

1379       // Initializeall unit weights in this set, and remember the max units/reg.

1380       constCodeGenRegister *Reg = nullptr;

1381       unsigned MaxWeight = 0, Weight = 0;

1382       for(RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) {

1383         if (Reg != UnitI.getReg()) {

1384           if (Weight > MaxWeight)

1385             MaxWeight = Weight;

1386           Reg = UnitI.getReg();

1387           Weight = 0;

1388         }

1389         unsigned UWeight =RegBank.getRegUnit(*UnitI).Weight;

1390         if (!UWeight) {

1391           UWeight = 1;

1392           RegBank.increaseRegUnitWeight(*UnitI,UWeight);

1393         }

1394         Weight += UWeight;

1395       }

1396       if (Weight > MaxWeight)

1397         MaxWeight = Weight;

1398       if (I->Weight != MaxWeight) {

1399         DEBUG(

1400           dbgs() << "UberSet "<< I - UberSets.begin() << " Weight " << MaxWeight;

1401           for (auto &Unit : I->Regs)

1402             dbgs() << " "<< Unit->getName();

1403           dbgs() << "\n");

1404         // Update theset weight.

1405         I->Weight = MaxWeight;

1406       }

1407  

1408       // Findsingular determinants.

1409       for (const auto R :I->Regs) {

1410         if (R->getRegUnits().count() == 1&& R->getWeight(RegBank) ==I->Weight) {

1411           I->SingularDeterminants |=R->getRegUnits();

1412         }

1413       }

1414     }

1415   }

1376行循环遍历同类集的组成寄存器,查找其中最大的寄存器权重,将其设为同类集权重。寄存器权重是其组成RegUnit对象Weight值的总和。但在创建之初,RegUnit对象的Weight都初始化为0。现在,在1390~1393行将RegUnit对象的Weight都设为1。

1382行的迭代器RegUnitIterator的构造函数如下。在上面的调用里,它使用UberRegSet的Regs容器作为参数,RegI及RegE分别是这个容器的首尾迭代器,而UnitI与UnitE则指向RegI所援引的CodeGenRegister对象的RegUnits容器(它记录了这些RegUnit对象在CodeGenRegBank::RegUnits容器中的序号),advance方法确保跳过空的RegUnits容器。

160       RegUnitIterator(constCodeGenRegister::Vec &Regs):

161          RegI(Regs.begin()), RegE(Regs.end()),UnitI(), UnitE() {

162     

163          if (RegI != RegE) {

164            UnitI = (*RegI)->getRegUnits().begin();

165            UnitE = (*RegI)->getRegUnits().end();

166            advance();

167          }

168        }

很显然,1382行循环完成对一个寄存器的RegUnit集遍历,计算其RegUnit的Weight总和(即寄存器权重,参见下面的getWeight方法)。同类集的Weight则是其中最大的寄存器权重。如果一个仅有单个RegUnit的寄存器具有同类集中最大的权重(1410行条件),在后面的权重调整中其权重不允许改变,因此通过UberRegSet的SingularDeterminants容器记录它。同一行上的getWeight方法是这样定义的:

524      unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank)const {

525        unsigned Weight = 0;

526        for(RegUnitList::iterator I = RegUnits.begin(), E = RegUnits.end();

527             I != E; ++I) {

528          Weight += RegBank.getRegUnit(*I).Weight;

529        }

530        returnWeight;

531      }

接下来将一个同类集的寄存器的权重调整为一致。方式是调整寄存器组成RegUnit的Weight。normalizeWeight是对寄存器DAG的一个后序(post-order)遍历(1439行循环),即从寄存器的最小可援引部分开始。

1427   static boolnormalizeWeight(CodeGenRegister *Reg,

1428                              std::vector<UberRegSet> &UberSets,

1429                              std::vector<UberRegSet*> &RegSets,

1430                              SparseBitVector<> &NormalRegs,

1431                               CodeGenRegister::RegUnitList&NormalUnits,

1432                               CodeGenRegBank&RegBank) {

1433     if (NormalRegs.test(Reg->EnumValue))

1434       returnfalse;

1435     NormalRegs.set(Reg->EnumValue);

1436  

1437     bool Changed = false;

1438     constCodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();

1439     for(CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(),

1440            SRE = SRM.end(); SRI != SRE; ++SRI) {

1441       if (SRI->second == Reg)

1442         continue;// self-cycles happen

1443  

1444       Changed |= normalizeWeight(SRI->second,UberSets, RegSets,

1445                                  NormalRegs,NormalUnits, RegBank);

1446     }

1447     // Postorderregister normalization.

1448  

1449     // Inheritregister units newly adopted by subregisters.

1450     if (Reg->inheritRegUnits(RegBank))

1451       computeUberWeights(UberSets,RegBank);

1452  

1453     // Check if thisregister is too skinny for its UberRegSet.

1454     UberRegSet *UberSet =RegSets[RegBank.getRegIndex(Reg)];

1455  

1456     unsigned RegWeight = Reg->getWeight(RegBank);

1457     if (UberSet->Weight > RegWeight) {

1458       // A registerunit's weight can be adjusted only if it is the singular unit

1459       // for thisregister, has not been used to normalize a subregister's set,

1460       // and has notalready been used to singularly determine this UberRegSet.

1461       unsigned AdjustUnit =*Reg->getRegUnits().begin();

1462       if (Reg->getRegUnits().count() != 1

1463           || hasRegUnit(NormalUnits, AdjustUnit)

1464           ||hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) {

1465         // We don'thave an adjustable unit, so adopt a new one.

1466         AdjustUnit = RegBank.newRegUnit(UberSet->Weight- RegWeight);

1467         Reg->adoptRegUnit(AdjustUnit);

1468         // Adopting aunit does not immediately require recomputing set weights.

1469       }

1470       else {

1471         // Adjust the existing single unit.

1472         RegBank.increaseRegUnitWeight(AdjustUnit,UberSet->Weight - RegWeight);

1473         // The unitmay be shared among sets and registers within this set.

1474         computeUberWeights(UberSets,RegBank);

1475       }

1476       Changed = true;

1477     }

1478  

1479     // Mark theseunits normalized so superregisters can't change their weights.

1480     NormalUnits |= Reg->getRegUnits();

1481  

1482     returnChanged;

1483   }

到达1450行,当前寄存器的子寄存器已经由normalizeWeight处理,这时调用CodeGenRegister::inheritRegUnits将寄存器的RegUnits设置为子寄存器RegUnits的合集,只要存在子寄存器,changed就一定是true。显然,如果寄存器RegUnits发生变化,需要重新调用computeUberWeights来重新计算该寄存器的Weight值。

202      bool CodeGenRegister::inheritRegUnits(CodeGenRegBank&RegBank) {

203        bool changed = false;

204        for(SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();

205             I != E; ++I) {

206          CodeGenRegister *SR = I->second;

207          // Merge thesubregister's units into this register's RegUnits.

208          changed |= (RegUnits |= SR->RegUnits);

209        }

210     

211        returnchanged;

212      }

1454行的getRegIndex实际上返回该寄存器的EnumValue-1,因此得到的UberSet就是这个寄存器所属的同类集。NormalUnits与NormalRegs都是从外部传进来、一开始为空的容器,用于控制这个调整的过程。

如果寄存器的权重小于其同类集的权重(1457行),满足以下所有条件才可以调整其RegUnit的Weight值,然后调用computeUberWeights重新计算该寄存器权重:1.仅包含一个RegUnit,2. 还没调整过,3.不在UberSet的SingularDeterminants容器(仅包含一个RegUnit且权重就是同类集权重)中。否则,只能向这个寄存器多加一个RegUnit来记录这个差值。

由于上述严格的条件,实际上权重的调整只是一个尽力而为的过程。为调整权重而加入的RegUnit实例只是为了使上述算法尽快收敛,这些RegUnit实例在后续处理里根本没有作用。

3.3.5.3.       寄存器单元的同类集

前面以寄存器为粒度构建了同类集信息,完善了寄存器单元(RegUnit)的Weight值设置。现在接着以RegUnit为单位构建同类集,这个同类集由RegUnitSet来表示:

461        struct RegUnitSet {

462          typedefstd::vector<unsigned>::const_iterator iterator;

463     

464          std::string Name;

465          std::vector<unsigned> Units;

466          unsigned Weight; //Cache the sum of all unit weights.

467          unsigned Order;  // Cache the sortkey.

468     

469          RegUnitSet() : Weight(0), Order(0) {}

470        };

类似的,在CodeGenRegBank中定义了容器RegUnitSets(std::vector<RegUnitSet>)来保存这些同类集。这些同类集由方法CodeGenRegBank::computeRegUnitSets构造。

1594   void CodeGenRegBank::computeRegUnitSets(){

1595     assert(RegUnitSets.empty()&& "dirty RegUnitSets");

1596  

1597     // Compute aunique RegUnitSet for each RegClass.

1598     auto&RegClasses = getRegClasses();

1599     for (auto &RC : RegClasses) {

1600       if (!RC.Allocatable)

1601         continue;

1602  

1603       //Speculatively grow the RegUnitSets to hold the new set.

1604       RegUnitSets.resize(RegUnitSets.size() + 1);

1605       RegUnitSets.back().Name = RC.getName();

1606  

1607       // Compute asorted list of units in this class.

1608       RC.buildRegUnitSet(RegUnitSets.back().Units);

1609  

1610       // Find anexisting RegUnitSet.

1611      std::vector<RegUnitSet>::const_iterator SetI =

1612         findRegUnitSet(RegUnitSets,RegUnitSets.back());

1613       if (SetI != std::prev(RegUnitSets.end()))

1614         RegUnitSets.pop_back();

1615     }

1616  

1617     DEBUG(dbgs() << "\nBeforepruning:\n";

1618           for(unsigned USIdx = 0, USEnd = RegUnitSets.size();

1619                USIdx < USEnd; ++USIdx) {

1620             dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1621                    << ":";

1622             for (auto &U : RegUnitSets[USIdx].Units)

1623               dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1624             dbgs() << "\n";

1625           });

1626  

1627     // Iterativelyprune unit sets.

1628     pruneUnitSets();

1629  

1630     DEBUG(dbgs() << "\nBeforeunion:\n";

1631           for(unsigned USIdx = 0, USEnd = RegUnitSets.size();

1632                USIdx < USEnd; ++USIdx) {

1633             dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1634                    << ":";

1635             for (auto &U : RegUnitSets[USIdx].Units)

1636               dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1637             dbgs() << "\n";

1638           }

1639           dbgs() << "\nUnionsets:\n");

1640  

1641     // Iterate overall unit sets, including new ones added by this loop.

1642     unsigned NumRegUnitSubSets =RegUnitSets.size();

1643     for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {

1644       // In theory,this is combinatorial. In practice, it needs to be bounded

1645       // by a smallnumber of sets for regpressure to be efficient.

1646       // If theassert is hit, we need to implement pruning.

1647       assert(Idx< (2*NumRegUnitSubSets) && "runaway unit set inference");

1648  

1649       // Compare newsets with all original classes.

1650       for(unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1;

1651            SearchIdx != EndIdx; ++SearchIdx) {

1652         std::set<unsigned> Intersection;

1653        std::set_intersection(RegUnitSets[Idx].Units.begin(),

1654                              RegUnitSets[Idx].Units.end(),

1655                              RegUnitSets[SearchIdx].Units.begin(),

1656                               RegUnitSets[SearchIdx].Units.end(),

1657                              std::inserter(Intersection, Intersection.begin()));

1658         if (Intersection.empty())

1659           continue;

1660  

1661         //Speculatively grow the RegUnitSets to hold the new set.

1662         RegUnitSets.resize(RegUnitSets.size() +1);

1663         RegUnitSets.back().Name =

1664           RegUnitSets[Idx].Name + "+" +RegUnitSets[SearchIdx].Name;

1665  

1666        std::set_union(RegUnitSets[Idx].Units.begin(),

1667                        RegUnitSets[Idx].Units.end(),

1668                       RegUnitSets[SearchIdx].Units.begin(),

1669                       RegUnitSets[SearchIdx].Units.end(),

1670                       std::inserter(RegUnitSets.back().Units,

1671                                      RegUnitSets.back().Units.begin()));

1672  

1673         // Find anexisting RegUnitSet, or add the union to the unique sets.

1674        std::vector<RegUnitSet>::const_iterator SetI =

1675           findRegUnitSet(RegUnitSets,RegUnitSets.back());

1676         if (SetI != std::prev(RegUnitSets.end()))

1677           RegUnitSets.pop_back();

1678         else {

1679           DEBUG(dbgs() << "UnitSet" << RegUnitSets.size()-1

1680                 << " " <<RegUnitSets.back().Name << ":";

1681                 for(auto &U : RegUnitSets.back().Units)

1682                   dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1683                 dbgs() << "\n";);

1684         }

1685       }

1686     }

1687  

1688     // Iterativelyprune unit sets after inferring supersets.

1689     pruneUnitSets();

1690  

1691     DEBUG(dbgs() << "\n";

1692           for(unsigned USIdx = 0, USEnd = RegUnitSets.size();

1693                USIdx < USEnd; ++USIdx) {

1694             dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1695                    << ":";

1696             for (auto &U : RegUnitSets[USIdx].Units)

1697               dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1698             dbgs() << "\n";

1699          });

类似的,首先也是将属于一个可分配寄存器类的寄存器的RegUnit构建为一个同类集,因为同一个寄存器类中的寄存器往往要相同的RegUnit,所以还要确保它们不重复,且以序号升序排序。这就是在1608行的CodeGenRegisterClass::buildRegUnitSet方法所要完成的工作。

909      void CodeGenRegisterClass::buildRegUnitSet(

910        std::vector<unsigned> &RegUnits)const {

911        std::vector<unsigned> TmpUnits;

912        for(RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI)

913          TmpUnits.push_back(*UnitI);

914        std::sort(TmpUnits.begin(), TmpUnits.end());

915        std::unique_copy(TmpUnits.begin(),TmpUnits.end(),

916                         std::back_inserter(RegUnits));

917      }

因为在去除了重复的RegUnit后,不排除两个寄存器类形成两个相同的RegUnit同类集,因此还需要在1612行调用findRegUnitSet方法删除重复的同类集。

1514   staticstd::vector<RegUnitSet>::const_iterator

1515  findRegUnitSet(const std::vector<RegUnitSet> &UniqueSets,

1516                  constRegUnitSet &Set) {

1517     std::vector<RegUnitSet>::const_iterator

1518       I = UniqueSets.begin(), E =UniqueSets.end();

1519     for(;I != E;++I) {

1530       if (I->Units == Set.Units)

1531         break;

1532     }

1533     return I;

1534   }

接下来在1628行调用CodeGenRegBank::pruneUnitSets方法裁剪掉某些同类集。裁剪的准则是:对两个同类集,如果其中一个RegUnit的数量比另一个少1或2,而且这两个同类集所有的RegUnit权重都相同。CodeGenRegBank::pruneUnitSets的注释谈到了这个方法的必要性:我们偶尔会将类似APSRPC和通用寄存器混在一起。我们也会看到许多特殊寄存器的子集,比如尾调用以及Thumb编码。所有可能的重叠集的数量是组合数,全部生成它们对寄存器压力建模而言太多了。理想地,我们可以在TableGen中通过以下方面静态地避免这个问题。1.目标机器定义仅包含可分配寄存器的寄存器类,将其他类标记为不可分配。2.将特殊寄存器标记为寄存器压力“不在意的”类。不过,我们尝试通过静态地将几乎一样的RegUnitSet合并,来处理没有很好定义的目标机器

1550   void CodeGenRegBank::pruneUnitSets(){

1551     assert(RegClassUnitSets.empty()&& "this invalidates RegClassUnitSets");

1552  

1553     // Form anequivalence class of UnitSets with no significant difference.

1554     std::vector<unsigned> SuperSetIDs;

1555     for (unsignedSubIdx = 0, EndIdx = RegUnitSets.size();

1556          SubIdx != EndIdx; ++SubIdx) {

1557       constRegUnitSet &SubSet = RegUnitSets[SubIdx];

1558       unsigned SuperIdx = 0;

1559       for (;SuperIdx != EndIdx; ++SuperIdx) {

1560         if (SuperIdx == SubIdx)

1561           continue;

1562  

1563         unsigned UnitWeight =RegUnits[SubSet.Units[0]].Weight;

1564         constRegUnitSet &SuperSet = RegUnitSets[SuperIdx];

1565         if (isRegUnitSubSet(SubSet.Units,SuperSet.Units)

1566             && (SubSet.Units.size() + 3> SuperSet.Units.size())

1567             && UnitWeight ==RegUnits[SuperSet.Units[0]].Weight

1568             && UnitWeight ==RegUnits[SuperSet.Units.back()].Weight) {

1569           DEBUG(dbgs() << "UnitSet" << SubIdx << " subsumed by " << SuperIdx

1570                 << "\n");

1571           break;

1572         }

1573       }

1574       if (SuperIdx == EndIdx)

1575         SuperSetIDs.push_back(SubIdx);

1576    }

1577     // PopulatePrunedUnitSets with each equivalence class's superset.

1578     std::vector<RegUnitSet>PrunedUnitSets(SuperSetIDs.size());

1579     for (unsignedi = 0, e = SuperSetIDs.size(); i != e; ++i) {

1580       unsigned SuperIdx = SuperSetIDs[i];

1581       PrunedUnitSets[i].Name =RegUnitSets[SuperIdx].Name;

1582      PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units);

1583     }

1584     RegUnitSets.swap(PrunedUnitSets);

1585   }

执行裁剪之后,在1643行循环创建这些同类集的联合闭包,从1647行的断言可以看到,我们不希望同类集的膨胀超过2倍。之后在1689行再次调用pruneUnitSets来再次裁剪同类集。

得到这些同类集之后,我们还需要知道,就RegUnit同类集而言,寄存器类间的包含关系。这就是CodeGenRegBank的容器RegClassUnitSets(std::vector<std::vector<unsigned>>)的作用。这个容器将记录一个指定寄存器类的RegUnit同类集的超集的集合。

CodeGenRegBank::computeRegUnitSets(续)

1701     // For each register class, list the UnitSets that aresupersets.

1702     RegClassUnitSets.resize(RegClasses.size());

1703     int RCIdx = -1;

1704     for (auto &RC : RegClasses) {

1705       ++RCIdx;

1706       if (!RC.Allocatable)

1707         continue;

1708  

1709       // Recomputethe sorted list of units in this class.

1710       std::vector<unsigned> RCRegUnits;

1711       RC.buildRegUnitSet(RCRegUnits);

1712  

1713       // Don'tincrease pressure for unallocatable regclasses.

1714       if (RCRegUnits.empty())

1715         continue;

1716  

1717       DEBUG(dbgs() << "RC "<< RC.getName() << " Units: \n";

1718             for (auto &U : RCRegUnits)

1719               dbgs() << RegUnits[U].getRoots()[0]->getName()<< " ";

1720             dbgs() << "\n  UnitSetIDs:");

1721  

1722       // Find allsupersets.

1723       for(unsigned USIdx = 0, USEnd = RegUnitSets.size();

1724            USIdx != USEnd; ++USIdx) {

1725         if (isRegUnitSubSet(RCRegUnits,RegUnitSets[USIdx].Units)) {

1726           DEBUG(dbgs() << " "<< USIdx);

1727          RegClassUnitSets[RCIdx].push_back(USIdx);

1728         }

1729       }

1730       DEBUG(dbgs() << "\n");

1731       assert(!RegClassUnitSets[RCIdx].empty()&& "missing unit set for regclass");

1732     }

1733  

1734     // For eachregister unit, ensure that we have the list of UnitSets that

1735     // contain theunit. Normally, this matches an existing list of UnitSets for a

1736     // registerclass. If not, we create a new entry in RegClassUnitSets as a

1737     //"fake" register class.

1738     for (unsignedUnitIdx = 0, UnitEnd = NumNativeRegUnits;

1739          UnitIdx < UnitEnd; ++UnitIdx) {

1740       std::vector<unsigned> RUSets;

1741       for(unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {

1742         RegUnitSet &RUSet = RegUnitSets[i];

1743         if (std::find(RUSet.Units.begin(),RUSet.Units.end(), UnitIdx)

1744             == RUSet.Units.end())

1745           continue;

1746         RUSets.push_back(i);

1747       }

1748       unsigned RCUnitSetsIdx = 0;

1749       for(unsigned e = RegClassUnitSets.size();

1750            RCUnitSetsIdx != e; ++RCUnitSetsIdx) {

1751         if (RegClassUnitSets[RCUnitSetsIdx] ==RUSets) {

1752           break;

1753         }

1754       }

1755       RegUnits[UnitIdx].RegClassUnitSetsIdx =RCUnitSetsIdx;

1756       if (RCUnitSetsIdx ==RegClassUnitSets.size()) {

1757         // Create anew list of UnitSets as a "fake" register class.

1758         RegClassUnitSets.resize(RCUnitSetsIdx +1);

1759         RegClassUnitSets[RCUnitSetsIdx].swap(RUSets);

1760       }

1761     }

1762   }

在1738行,NumNativeRegUnits是原生(即非为调整权重加入的)RegUnit数量。在1738行这个循环里,我们将RegUnit的RegClassUnitSetsIdx域设置为RegClassUnitSets中同类集都包含该RegUnit的项的下标。

3.3.5.4.       寄存器的包含关系

前面我们已经为每个寄存器索引计算的Lane掩码。这个Lane掩码指出从该寄存器索引援引的部分可以覆盖哪些更小的可援引部分。对于叶子寄存器索引,Lane掩码就是它在SubRegIndices容器里的序号。但寄存器索引是一个抽象的概念,脱离了具体的寄存器。很显然,对应具体的寄存器也存在相应的包含关系。因此CodeGenRegister里有一个类型为SmallVector<unsigned,16>的容器RegUnitLaneMasks来记录它的子寄存器的Lane掩码,也就不足为奇了。

1764   void CodeGenRegBank::computeRegUnitLaneMasks(){

1765     for (auto &Register : Registers) {

1766       // Create aninitial lane mask for all register units.

1767       const auto &RegUnits = Register.getRegUnits();

1768       CodeGenRegister::RegUnitLaneMaskListRegUnitLaneMasks(RegUnits.count(), 0);

1769       // Iteratethrough SubRegisters.

1770       typedefCodeGenRegister::SubRegMap SubRegMap;

1771       constSubRegMap &SubRegs = Register.getSubRegs();

1772       for(SubRegMap::const_iterator S = SubRegs.begin(),

1773            SE = SubRegs.end(); S != SE; ++S) {

1774         CodeGenRegister *SubReg = S->second;

1775         // Ignore non-leafsubregisters, their lane masks are fully covered by

1776         // the leafsubregisters anyway.

1777         if (SubReg->getSubRegs().size() != 0)

1778           continue;

1779         CodeGenSubRegIndex *SubRegIndex =S->first;

1780         constCodeGenRegister *SubRegister = S->second;

1781         unsigned LaneMask =SubRegIndex->LaneMask;

1782         // DistributeLaneMask to Register Units touched.

1783         for(unsigned SUI : SubRegister->getRegUnits()) {

1784           bool Found = false;

1785           unsigned u = 0;

1786           for(unsigned RU : RegUnits) {

1787             if (SUI == RU) {

1788               RegUnitLaneMasks[u] |= LaneMask;

1789               assert(!Found);

1790               Found = true;

1791             }

1792             ++u;

1793           }

1794           (void)Found;

1795           assert(Found);

1796         }

1797       }

1798      Register.setRegUnitLaneMasks(RegUnitLaneMasks);

1799     }

1800   }

1765行的循环表明,这个过程是对每个寄存器分别进行的。1772行的循环进入当前寄存器的子寄存器集合,1777行滤掉非叶子寄存器。1783行的循环遍历该叶子寄存器的RegUnit组成位图(注意该位图中只能有一位是1),而1786行的循环则遍历当前寄存器的RegUnit组成位图。如果1787行条件满足,循环变量u记录了这是当前寄存器RegUnit组成位图的第几位(当前寄存器一定包含这个叶子寄存器),在RegUnitLaneMasks中u所给出的位置记录该叶子寄存器索引的Lane掩码。这样,RegUnit组成位图与RegUnitLaneMasks也就关联起来了。

3.3.5.5.       其他信息的设置

回到CodeGenRegBank::computeDerivedInfo。1817行循环对寄存器类别设置HasDisjunctSubRegs,这个标记用于显示是否至少有两个子寄存器是互不不干扰的。这个标记的内容来自于寄存器对象中同名标记,在寄存器对象里,如果它包含1个以上的子寄存器,它的HasDisjunctSubRegs标记就是true。1824行循环设置RegUnitSets的Weight(即指定寄存器类所包含寄存器单元的总数)。余下的代码设置RegUnitSets成员的Order值,以及对RegUnitSetOrder容器(类型std::vector<unsigned>)的填充。1832行是对RegUnitSets成员以集合大小排序,结果保存在RegUnitSetOrder容器。那么当给出RegUnitSetOrder[RegUnit.Order]时,我们就能知道这个RegUnit的优先级。


0 0
原创粉丝点击