| 1 | //===- Barvinok.cpp - Barvinok's Algorithm ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "mlir/Analysis/Presburger/Barvinok.h" |
| 10 | #include "mlir/Analysis/Presburger/Utils.h" |
| 11 | #include "llvm/ADT/Sequence.h" |
| 12 | #include <algorithm> |
| 13 | |
| 14 | using namespace mlir; |
| 15 | using namespace presburger; |
| 16 | using namespace mlir::presburger::detail; |
| 17 | |
| 18 | /// Assuming that the input cone is pointed at the origin, |
| 19 | /// converts it to its dual in V-representation. |
| 20 | /// Essentially we just remove the all-zeroes constant column. |
| 21 | ConeV mlir::presburger::detail::getDual(ConeH cone) { |
| 22 | unsigned numIneq = cone.getNumInequalities(); |
| 23 | unsigned numVar = cone.getNumCols() - 1; |
| 24 | ConeV dual(numIneq, numVar, 0, 0); |
| 25 | // Assuming that an inequality of the form |
| 26 | // a1*x1 + ... + an*xn + b ≥ 0 |
| 27 | // is represented as a row [a1, ..., an, b] |
| 28 | // and that b = 0. |
| 29 | |
| 30 | for (auto i : llvm::seq<int>(Begin: 0, End: numIneq)) { |
| 31 | assert(cone.atIneq(i, numVar) == 0 && |
| 32 | "H-representation of cone is not centred at the origin!" ); |
| 33 | for (unsigned j = 0; j < numVar; ++j) { |
| 34 | dual.at(row: i, column: j) = cone.atIneq(i, j); |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | // Now dual is of the form [ [a1, ..., an] , ... ] |
| 39 | // which is the V-representation of the dual. |
| 40 | return dual; |
| 41 | } |
| 42 | |
| 43 | /// Converts a cone in V-representation to the H-representation |
| 44 | /// of its dual, pointed at the origin (not at the original vertex). |
| 45 | /// Essentially adds a column consisting only of zeroes to the end. |
| 46 | ConeH mlir::presburger::detail::getDual(ConeV cone) { |
| 47 | unsigned rows = cone.getNumRows(); |
| 48 | unsigned columns = cone.getNumColumns(); |
| 49 | ConeH dual = defineHRep(numVars: columns); |
| 50 | // Add a new column (for constants) at the end. |
| 51 | // This will be initialized to zero. |
| 52 | cone.insertColumn(pos: columns); |
| 53 | |
| 54 | for (unsigned i = 0; i < rows; ++i) |
| 55 | dual.addInequality(inEq: cone.getRow(row: i)); |
| 56 | |
| 57 | // Now dual is of the form [ [a1, ..., an, 0] , ... ] |
| 58 | // which is the H-representation of the dual. |
| 59 | return dual; |
| 60 | } |
| 61 | |
| 62 | /// Find the index of a cone in V-representation. |
| 63 | DynamicAPInt mlir::presburger::detail::getIndex(const ConeV &cone) { |
| 64 | if (cone.getNumRows() > cone.getNumColumns()) |
| 65 | return DynamicAPInt(0); |
| 66 | |
| 67 | return cone.determinant(); |
| 68 | } |
| 69 | |
| 70 | /// Compute the generating function for a unimodular cone. |
| 71 | /// This consists of a single term of the form |
| 72 | /// sign * x^num / prod_j (1 - x^den_j) |
| 73 | /// |
| 74 | /// sign is either +1 or -1. |
| 75 | /// den_j is defined as the set of generators of the cone. |
| 76 | /// num is computed by expressing the vertex as a weighted |
| 77 | /// sum of the generators, and then taking the floor of the |
| 78 | /// coefficients. |
| 79 | GeneratingFunction |
| 80 | mlir::presburger::detail::computeUnimodularConeGeneratingFunction( |
| 81 | ParamPoint vertex, int sign, const ConeH &cone) { |
| 82 | // Consider a cone with H-representation [0 -1]. |
| 83 | // [-1 -2] |
| 84 | // Let the vertex be given by the matrix [ 2 2 0], with 2 params. |
| 85 | // [-1 -1/2 1] |
| 86 | |
| 87 | // `cone` must be unimodular. |
| 88 | assert(abs(getIndex(getDual(cone))) == 1 && "input cone is not unimodular!" ); |
| 89 | |
| 90 | unsigned numVar = cone.getNumVars(); |
| 91 | unsigned numIneq = cone.getNumInequalities(); |
| 92 | |
| 93 | // Thus its ray matrix, U, is the inverse of the |
| 94 | // transpose of its inequality matrix, `cone`. |
| 95 | // The last column of the inequality matrix is null, |
| 96 | // so we remove it to obtain a square matrix. |
| 97 | FracMatrix transp = FracMatrix(cone.getInequalities()).transpose(); |
| 98 | transp.removeRow(pos: numVar); |
| 99 | |
| 100 | FracMatrix generators(numVar, numIneq); |
| 101 | transp.determinant(/*inverse=*/&generators); // This is the U-matrix. |
| 102 | // Thus the generators are given by U = [2 -1]. |
| 103 | // [-1 0] |
| 104 | |
| 105 | // The powers in the denominator of the generating |
| 106 | // function are given by the generators of the cone, |
| 107 | // i.e., the rows of the matrix U. |
| 108 | std::vector<Point> denominator(numIneq); |
| 109 | ArrayRef<Fraction> row; |
| 110 | for (auto i : llvm::seq<int>(Begin: 0, End: numVar)) { |
| 111 | row = generators.getRow(row: i); |
| 112 | denominator[i] = Point(row); |
| 113 | } |
| 114 | |
| 115 | // The vertex is v \in Z^{d x (n+1)} |
| 116 | // We need to find affine functions of parameters λ_i(p) |
| 117 | // such that v = Σ λ_i(p)*u_i, |
| 118 | // where u_i are the rows of U (generators) |
| 119 | // The λ_i are given by the columns of Λ = v^T U^{-1}, and |
| 120 | // we have transp = U^{-1}. |
| 121 | // Then the exponent in the numerator will be |
| 122 | // Σ -floor(-λ_i(p))*u_i. |
| 123 | // Thus we store the (exponent of the) numerator as the affine function -Λ, |
| 124 | // since the generators u_i are already stored as the exponent of the |
| 125 | // denominator. Note that the outer -1 will have to be accounted for, as it is |
| 126 | // not stored. See end for an example. |
| 127 | |
| 128 | unsigned numColumns = vertex.getNumColumns(); |
| 129 | unsigned numRows = vertex.getNumRows(); |
| 130 | ParamPoint numerator(numColumns, numRows); |
| 131 | SmallVector<Fraction> ithCol(numRows); |
| 132 | for (auto i : llvm::seq<int>(Begin: 0, End: numColumns)) { |
| 133 | for (auto j : llvm::seq<int>(Begin: 0, End: numRows)) |
| 134 | ithCol[j] = vertex(j, i); |
| 135 | numerator.setRow(row: i, elems: transp.preMultiplyWithRow(rowVec: ithCol)); |
| 136 | numerator.negateRow(row: i); |
| 137 | } |
| 138 | // Therefore Λ will be given by [ 1 0 ] and the negation of this will be |
| 139 | // [ 1/2 -1 ] |
| 140 | // [ -1 -2 ] |
| 141 | // stored as the numerator. |
| 142 | // Algebraically, the numerator exponent is |
| 143 | // [ -2 ⌊ - N - M/2 + 1 ⌋ + 1 ⌊ 0 + M + 2 ⌋ ] -> first COLUMN of U is [2, -1] |
| 144 | // [ 1 ⌊ - N - M/2 + 1 ⌋ + 0 ⌊ 0 + M + 2 ⌋ ] -> second COLUMN of U is [-1, 0] |
| 145 | |
| 146 | return GeneratingFunction(numColumns - 1, SmallVector<int>(1, sign), |
| 147 | std::vector({numerator}), |
| 148 | std::vector({denominator})); |
| 149 | } |
| 150 | |
| 151 | /// We use Gaussian elimination to find the solution to a set of d equations |
| 152 | /// of the form |
| 153 | /// a_1 x_1 + ... + a_d x_d + b_1 m_1 + ... + b_p m_p + c = 0 |
| 154 | /// where x_i are variables, |
| 155 | /// m_i are parameters and |
| 156 | /// a_i, b_i, c are rational coefficients. |
| 157 | /// |
| 158 | /// The solution expresses each x_i as an affine function of the m_i, and is |
| 159 | /// therefore represented as a matrix of size d x (p+1). |
| 160 | /// If there is no solution, we return null. |
| 161 | std::optional<ParamPoint> |
| 162 | mlir::presburger::detail::solveParametricEquations(FracMatrix equations) { |
| 163 | // equations is a d x (d + p + 1) matrix. |
| 164 | // Each row represents an equation. |
| 165 | unsigned d = equations.getNumRows(); |
| 166 | unsigned numCols = equations.getNumColumns(); |
| 167 | |
| 168 | // If the determinant is zero, there is no unique solution. |
| 169 | // Thus we return null. |
| 170 | if (FracMatrix(equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1, |
| 171 | /*fromColumn=*/0, |
| 172 | /*toColumn=*/d - 1)) |
| 173 | .determinant() == 0) |
| 174 | return std::nullopt; |
| 175 | |
| 176 | // Perform row operations to make each column all zeros except for the |
| 177 | // diagonal element, which is made to be one. |
| 178 | for (unsigned i = 0; i < d; ++i) { |
| 179 | // First ensure that the diagonal element is nonzero, by swapping |
| 180 | // it with a row that is non-zero at column i. |
| 181 | if (equations(i, i) != 0) |
| 182 | continue; |
| 183 | for (unsigned j = i + 1; j < d; ++j) { |
| 184 | if (equations(j, i) == 0) |
| 185 | continue; |
| 186 | equations.swapRows(row: j, otherRow: i); |
| 187 | break; |
| 188 | } |
| 189 | |
| 190 | Fraction diagElement = equations(i, i); |
| 191 | |
| 192 | // Apply row operations to make all elements except the diagonal to zero. |
| 193 | for (unsigned j = 0; j < d; ++j) { |
| 194 | if (i == j) |
| 195 | continue; |
| 196 | if (equations(j, i) == 0) |
| 197 | continue; |
| 198 | // Apply row operations to make element (j, i) zero by subtracting the |
| 199 | // ith row, appropriately scaled. |
| 200 | Fraction currentElement = equations(j, i); |
| 201 | equations.addToRow(/*sourceRow=*/i, /*targetRow=*/j, |
| 202 | /*scale=*/-currentElement / diagElement); |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | // Rescale diagonal elements to 1. |
| 207 | for (unsigned i = 0; i < d; ++i) |
| 208 | equations.scaleRow(row: i, scale: 1 / equations(i, i)); |
| 209 | |
| 210 | // Now we have reduced the equations to the form |
| 211 | // x_i + b_1' m_1 + ... + b_p' m_p + c' = 0 |
| 212 | // i.e. each variable appears exactly once in the system, and has coefficient |
| 213 | // one. |
| 214 | // |
| 215 | // Thus we have |
| 216 | // x_i = - b_1' m_1 - ... - b_p' m_p - c |
| 217 | // and so we return the negation of the last p + 1 columns of the matrix. |
| 218 | // |
| 219 | // We copy these columns and return them. |
| 220 | ParamPoint vertex = |
| 221 | equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1, |
| 222 | /*fromColumn=*/d, /*toColumn=*/numCols - 1); |
| 223 | vertex.negateMatrix(); |
| 224 | return vertex; |
| 225 | } |
| 226 | |
| 227 | /// This is an implementation of the Clauss-Loechner algorithm for chamber |
| 228 | /// decomposition. |
| 229 | /// |
| 230 | /// We maintain a list of pairwise disjoint chambers and the generating |
| 231 | /// functions corresponding to each one. We iterate over the list of regions, |
| 232 | /// each time adding the current region's generating function to the chambers |
| 233 | /// where it is active and separating the chambers where it is not. |
| 234 | /// |
| 235 | /// Given the region each generating function is active in, for each subset of |
| 236 | /// generating functions the region that (the sum of) precisely this subset is |
| 237 | /// in, is the intersection of the regions that these are active in, |
| 238 | /// intersected with the complements of the remaining regions. |
| 239 | std::vector<std::pair<PresburgerSet, GeneratingFunction>> |
| 240 | mlir::presburger::detail::computeChamberDecomposition( |
| 241 | unsigned numSymbols, ArrayRef<std::pair<PresburgerSet, GeneratingFunction>> |
| 242 | regionsAndGeneratingFunctions) { |
| 243 | assert(!regionsAndGeneratingFunctions.empty() && |
| 244 | "there must be at least one chamber!" ); |
| 245 | // We maintain a list of regions and their associated generating function |
| 246 | // initialized with the universe and the empty generating function. |
| 247 | std::vector<std::pair<PresburgerSet, GeneratingFunction>> chambers = { |
| 248 | {PresburgerSet::getUniverse(space: PresburgerSpace::getSetSpace(numDims: numSymbols)), |
| 249 | GeneratingFunction(numSymbols, {}, {}, {})}}; |
| 250 | |
| 251 | // We iterate over the region list. |
| 252 | // |
| 253 | // For each activity region R_j (corresponding to the generating function |
| 254 | // gf_j), we examine all the current chambers R_i. |
| 255 | // |
| 256 | // If R_j has a full-dimensional intersection with an existing chamber R_i, |
| 257 | // then that chamber is replaced by two new ones: |
| 258 | // 1. the intersection R_i \cap R_j, where the generating function is |
| 259 | // gf_i + gf_j. |
| 260 | // 2. the difference R_i - R_j, where the generating function is gf_i. |
| 261 | // |
| 262 | // At each step, we define a new chamber list after considering gf_j, |
| 263 | // replacing and appending chambers as discussed above. |
| 264 | // |
| 265 | // The loop has the invariant that the union over all the chambers gives the |
| 266 | // universe at every step. |
| 267 | for (const auto &[region, generatingFunction] : |
| 268 | regionsAndGeneratingFunctions) { |
| 269 | std::vector<std::pair<PresburgerSet, GeneratingFunction>> newChambers; |
| 270 | |
| 271 | for (const auto &[currentRegion, currentGeneratingFunction] : chambers) { |
| 272 | PresburgerSet intersection = currentRegion.intersect(set: region); |
| 273 | |
| 274 | // If the intersection is not full-dimensional, we do not modify |
| 275 | // the chamber list. |
| 276 | if (!intersection.isFullDim()) { |
| 277 | newChambers.emplace_back(args: currentRegion, args: currentGeneratingFunction); |
| 278 | continue; |
| 279 | } |
| 280 | |
| 281 | // If it is, we add the intersection and the difference as chambers. |
| 282 | newChambers.emplace_back(args&: intersection, |
| 283 | args: currentGeneratingFunction + generatingFunction); |
| 284 | newChambers.emplace_back(args: currentRegion.subtract(set: region), |
| 285 | args: currentGeneratingFunction); |
| 286 | } |
| 287 | chambers = std::move(newChambers); |
| 288 | } |
| 289 | |
| 290 | return chambers; |
| 291 | } |
| 292 | |
| 293 | /// For a polytope expressed as a set of n inequalities, compute the generating |
| 294 | /// function corresponding to the lattice points included in the polytope. This |
| 295 | /// algorithm has three main steps: |
| 296 | /// 1. Enumerate the vertices, by iterating over subsets of inequalities and |
| 297 | /// checking for satisfiability. For each d-subset of inequalities (where d |
| 298 | /// is the number of variables), we solve to obtain the vertex in terms of |
| 299 | /// the parameters, and then check for the region in parameter space where |
| 300 | /// this vertex satisfies the remaining (n - d) inequalities. |
| 301 | /// 2. For each vertex, identify the tangent cone and compute the generating |
| 302 | /// function corresponding to it. The generating function depends on the |
| 303 | /// parametric expression of the vertex and the (non-parametric) generators |
| 304 | /// of the tangent cone. |
| 305 | /// 3. [Clauss-Loechner decomposition] Identify the regions in parameter space |
| 306 | /// (chambers) where each vertex is active, and accordingly compute the |
| 307 | /// GF of the polytope in each chamber. |
| 308 | /// |
| 309 | /// Verdoolaege, Sven, et al. "Counting integer points in parametric |
| 310 | /// polytopes using Barvinok's rational functions." Algorithmica 48 (2007): |
| 311 | /// 37-66. |
| 312 | std::vector<std::pair<PresburgerSet, GeneratingFunction>> |
| 313 | mlir::presburger::detail::computePolytopeGeneratingFunction( |
| 314 | const PolyhedronH &poly) { |
| 315 | unsigned numVars = poly.getNumRangeVars(); |
| 316 | unsigned numSymbols = poly.getNumSymbolVars(); |
| 317 | unsigned numIneqs = poly.getNumInequalities(); |
| 318 | |
| 319 | // We store a list of the computed vertices. |
| 320 | std::vector<ParamPoint> vertices; |
| 321 | // For each vertex, we store the corresponding active region and the |
| 322 | // generating functions of the tangent cone, in order. |
| 323 | std::vector<std::pair<PresburgerSet, GeneratingFunction>> |
| 324 | regionsAndGeneratingFunctions; |
| 325 | |
| 326 | // We iterate over all subsets of inequalities with cardinality numVars, |
| 327 | // using permutations of numVars 1's and (numIneqs - numVars) 0's. |
| 328 | // |
| 329 | // For a given permutation, we consider a subset which contains |
| 330 | // the i'th inequality if the i'th bit in the bitset is 1. |
| 331 | // |
| 332 | // We start with the permutation that takes the last numVars inequalities. |
| 333 | SmallVector<int> indicator(numIneqs); |
| 334 | for (unsigned i = numIneqs - numVars; i < numIneqs; ++i) |
| 335 | indicator[i] = 1; |
| 336 | |
| 337 | do { |
| 338 | // Collect the inequalities corresponding to the bits which are set |
| 339 | // and the remaining ones. |
| 340 | auto [subset, remainder] = poly.getInequalities().splitByBitset(indicator); |
| 341 | // All other inequalities are stored in a2 and b2c2. |
| 342 | // |
| 343 | // These are column-wise splits of the inequalities; |
| 344 | // a2 stores the coefficients of the variables, and |
| 345 | // b2c2 stores the coefficients of the parameters and the constant term. |
| 346 | FracMatrix a2(numIneqs - numVars, numVars); |
| 347 | FracMatrix b2c2(numIneqs - numVars, numSymbols + 1); |
| 348 | a2 = FracMatrix( |
| 349 | remainder.getSubMatrix(fromRow: 0, toRow: numIneqs - numVars - 1, fromColumn: 0, toColumn: numVars - 1)); |
| 350 | b2c2 = FracMatrix(remainder.getSubMatrix(fromRow: 0, toRow: numIneqs - numVars - 1, fromColumn: numVars, |
| 351 | toColumn: numVars + numSymbols)); |
| 352 | |
| 353 | // Find the vertex, if any, corresponding to the current subset of |
| 354 | // inequalities. |
| 355 | std::optional<ParamPoint> vertex = |
| 356 | solveParametricEquations(equations: FracMatrix(subset)); // d x (p+1) |
| 357 | |
| 358 | if (!vertex) |
| 359 | continue; |
| 360 | if (llvm::is_contained(Range&: vertices, Element: vertex)) |
| 361 | continue; |
| 362 | // If this subset corresponds to a vertex that has not been considered, |
| 363 | // store it. |
| 364 | vertices.emplace_back(args&: *vertex); |
| 365 | |
| 366 | // If a vertex is formed by the intersection of more than d facets, we |
| 367 | // assume that any d-subset of these facets can be solved to obtain its |
| 368 | // expression. This assumption is valid because, if the vertex has two |
| 369 | // distinct parametric expressions, then a nontrivial equality among the |
| 370 | // parameters holds, which is a contradiction as we know the parameter |
| 371 | // space to be full-dimensional. |
| 372 | |
| 373 | // Let the current vertex be [X | y], where |
| 374 | // X represents the coefficients of the parameters and |
| 375 | // y represents the constant term. |
| 376 | // |
| 377 | // The region (in parameter space) where this vertex is active is given |
| 378 | // by substituting the vertex into the *remaining* inequalities of the |
| 379 | // polytope (those which were not collected into `subset`), i.e., into the |
| 380 | // inequalities [A2 | B2 | c2]. |
| 381 | // |
| 382 | // Thus, the coefficients of the parameters after substitution become |
| 383 | // (A2 • X + B2) |
| 384 | // and the constant terms become |
| 385 | // (A2 • y + c2). |
| 386 | // |
| 387 | // The region is therefore given by |
| 388 | // (A2 • X + B2) p + (A2 • y + c2) ≥ 0 |
| 389 | // |
| 390 | // This is equivalent to A2 • [X | y] + [B2 | c2]. |
| 391 | // |
| 392 | // Thus we premultiply [X | y] with each row of A2 |
| 393 | // and add each row of [B2 | c2]. |
| 394 | FracMatrix activeRegion(numIneqs - numVars, numSymbols + 1); |
| 395 | for (unsigned i = 0; i < numIneqs - numVars; i++) { |
| 396 | activeRegion.setRow(row: i, elems: vertex->preMultiplyWithRow(rowVec: a2.getRow(row: i))); |
| 397 | activeRegion.addToRow(row: i, rowVec: b2c2.getRow(row: i), scale: 1); |
| 398 | } |
| 399 | |
| 400 | // We convert the representation of the active region to an integers-only |
| 401 | // form so as to store it as a PresburgerSet. |
| 402 | IntegerPolyhedron activeRegionRel( |
| 403 | PresburgerSpace::getRelationSpace(numDomain: 0, numRange: numSymbols, numSymbols: 0, numLocals: 0), activeRegion); |
| 404 | |
| 405 | // Now, we compute the generating function at this vertex. |
| 406 | // We collect the inequalities corresponding to each vertex to compute |
| 407 | // the tangent cone at that vertex. |
| 408 | // |
| 409 | // We only need the coefficients of the variables (NOT the parameters) |
| 410 | // as the generating function only depends on these. |
| 411 | // We translate the cones to be pointed at the origin by making the |
| 412 | // constant terms zero. |
| 413 | ConeH tangentCone = defineHRep(numVars); |
| 414 | for (unsigned j = 0, e = subset.getNumRows(); j < e; ++j) { |
| 415 | SmallVector<DynamicAPInt> ineq(numVars + 1); |
| 416 | for (unsigned k = 0; k < numVars; ++k) |
| 417 | ineq[k] = subset(j, k); |
| 418 | tangentCone.addInequality(inEq: ineq); |
| 419 | } |
| 420 | // We assume that the tangent cone is unimodular, so there is no need |
| 421 | // to decompose it. |
| 422 | // |
| 423 | // In the general case, the unimodular decomposition may have several |
| 424 | // cones. |
| 425 | GeneratingFunction vertexGf(numSymbols, {}, {}, {}); |
| 426 | SmallVector<std::pair<int, ConeH>, 4> unimodCones = {{1, tangentCone}}; |
| 427 | for (const std::pair<int, ConeH> &signedCone : unimodCones) { |
| 428 | auto [sign, cone] = signedCone; |
| 429 | vertexGf = vertexGf + |
| 430 | computeUnimodularConeGeneratingFunction(vertex: *vertex, sign, cone); |
| 431 | } |
| 432 | // We store the vertex we computed with the generating function of its |
| 433 | // tangent cone. |
| 434 | regionsAndGeneratingFunctions.emplace_back(args: PresburgerSet(activeRegionRel), |
| 435 | args&: vertexGf); |
| 436 | } while (std::next_permutation(first: indicator.begin(), last: indicator.end())); |
| 437 | |
| 438 | // Now, we use Clauss-Loechner decomposition to identify regions in parameter |
| 439 | // space where each vertex is active. These regions (chambers) have the |
| 440 | // property that no two of them have a full-dimensional intersection, i.e., |
| 441 | // they may share "facets" or "edges", but their intersection can only have |
| 442 | // up to numVars - 1 dimensions. |
| 443 | // |
| 444 | // In each chamber, we sum up the generating functions of the active vertices |
| 445 | // to find the generating function of the polytope. |
| 446 | return computeChamberDecomposition(numSymbols, regionsAndGeneratingFunctions); |
| 447 | } |
| 448 | |
| 449 | /// We use an iterative procedure to find a vector not orthogonal |
| 450 | /// to a given set, ignoring the null vectors. |
| 451 | /// Let the inputs be {x_1, ..., x_k}, all vectors of length n. |
| 452 | /// |
| 453 | /// In the following, |
| 454 | /// vs[:i] means the elements of vs up to and including the i'th one, |
| 455 | /// <vs, us> means the dot product of vs and us, |
| 456 | /// vs ++ [v] means the vector vs with the new element v appended to it. |
| 457 | /// |
| 458 | /// We proceed iteratively; for steps d = 0, ... n-1, we construct a vector |
| 459 | /// which is not orthogonal to any of {x_1[:d], ..., x_n[:d]}, ignoring |
| 460 | /// the null vectors. |
| 461 | /// At step d = 0, we let vs = [1]. Clearly this is not orthogonal to |
| 462 | /// any vector in the set {x_1[0], ..., x_n[0]}, except the null ones, |
| 463 | /// which we ignore. |
| 464 | /// At step d > 0 , we need a number v |
| 465 | /// s.t. <x_i[:d], vs++[v]> != 0 for all i. |
| 466 | /// => <x_i[:d-1], vs> + x_i[d]*v != 0 |
| 467 | /// => v != - <x_i[:d-1], vs> / x_i[d] |
| 468 | /// We compute this value for all x_i, and then |
| 469 | /// set v to be the maximum element of this set plus one. Thus |
| 470 | /// v is outside the set as desired, and we append it to vs |
| 471 | /// to obtain the result of the d'th step. |
| 472 | Point mlir::presburger::detail::getNonOrthogonalVector( |
| 473 | ArrayRef<Point> vectors) { |
| 474 | unsigned dim = vectors[0].size(); |
| 475 | assert(llvm::all_of( |
| 476 | vectors, |
| 477 | [&dim](const Point &vector) { return vector.size() == dim; }) && |
| 478 | "all vectors need to be the same size!" ); |
| 479 | |
| 480 | SmallVector<Fraction> newPoint = {Fraction(1, 1)}; |
| 481 | Fraction maxDisallowedValue = -Fraction(1, 0), |
| 482 | disallowedValue = Fraction(0, 1); |
| 483 | |
| 484 | for (unsigned d = 1; d < dim; ++d) { |
| 485 | // Compute the disallowed values - <x_i[:d-1], vs> / x_i[d] for each i. |
| 486 | maxDisallowedValue = -Fraction(1, 0); |
| 487 | for (const Point &vector : vectors) { |
| 488 | if (vector[d] == 0) |
| 489 | continue; |
| 490 | disallowedValue = |
| 491 | -dotProduct(a: ArrayRef(vector).slice(N: 0, M: d), b: newPoint) / vector[d]; |
| 492 | |
| 493 | // Find the biggest such value |
| 494 | maxDisallowedValue = std::max(a: maxDisallowedValue, b: disallowedValue); |
| 495 | } |
| 496 | newPoint.emplace_back(Args: maxDisallowedValue + 1); |
| 497 | } |
| 498 | return newPoint; |
| 499 | } |
| 500 | |
| 501 | /// We use the following recursive formula to find the coefficient of |
| 502 | /// s^power in the rational function given by P(s)/Q(s). |
| 503 | /// |
| 504 | /// Let P[i] denote the coefficient of s^i in the polynomial P(s). |
| 505 | /// (P/Q)[r] = |
| 506 | /// if (r == 0) then |
| 507 | /// P[0]/Q[0] |
| 508 | /// else |
| 509 | /// (P[r] - {Σ_{i=1}^r (P/Q)[r-i] * Q[i])}/(Q[0]) |
| 510 | /// We therefore recursively call `getCoefficientInRationalFunction` on |
| 511 | /// all i \in [0, power). |
| 512 | /// |
| 513 | /// https://math.ucdavis.edu/~deloera/researchsummary/ |
| 514 | /// barvinokalgorithm-latte1.pdf, p. 1285 |
| 515 | QuasiPolynomial mlir::presburger::detail::getCoefficientInRationalFunction( |
| 516 | unsigned power, ArrayRef<QuasiPolynomial> num, ArrayRef<Fraction> den) { |
| 517 | assert(!den.empty() && "division by empty denominator in rational function!" ); |
| 518 | |
| 519 | unsigned numParam = num[0].getNumInputs(); |
| 520 | // We use the `isEqual` method of PresburgerSpace, which QuasiPolynomial |
| 521 | // inherits from. |
| 522 | assert(llvm::all_of(num, |
| 523 | [&num](const QuasiPolynomial &qp) { |
| 524 | return num[0].isEqual(qp); |
| 525 | }) && |
| 526 | "the quasipolynomials should all belong to the same space!" ); |
| 527 | |
| 528 | std::vector<QuasiPolynomial> coefficients; |
| 529 | coefficients.reserve(n: power + 1); |
| 530 | |
| 531 | coefficients.emplace_back(args: num[0] / den[0]); |
| 532 | for (unsigned i = 1; i <= power; ++i) { |
| 533 | // If the power is not there in the numerator, the coefficient is zero. |
| 534 | coefficients.emplace_back(args: i < num.size() ? num[i] |
| 535 | : QuasiPolynomial(numParam, 0)); |
| 536 | |
| 537 | // After den.size(), the coefficients are zero, so we stop |
| 538 | // subtracting at that point (if it is less than i). |
| 539 | unsigned limit = std::min<unsigned long>(a: i, b: den.size() - 1); |
| 540 | for (unsigned j = 1; j <= limit; ++j) |
| 541 | coefficients[i] = coefficients[i] - |
| 542 | coefficients[i - j] * QuasiPolynomial(numParam, den[j]); |
| 543 | |
| 544 | coefficients[i] = coefficients[i] / den[0]; |
| 545 | } |
| 546 | return coefficients[power].simplify(); |
| 547 | } |
| 548 | |
| 549 | /// Substitute x_i = t^μ_i in one term of a generating function, returning |
| 550 | /// a quasipolynomial which represents the exponent of the numerator |
| 551 | /// of the result, and a vector which represents the exponents of the |
| 552 | /// denominator of the result. |
| 553 | /// If the returned value is {num, dens}, it represents the function |
| 554 | /// t^num / \prod_j (1 - t^dens[j]). |
| 555 | /// v represents the affine functions whose floors are multiplied by the |
| 556 | /// generators, and ds represents the list of generators. |
| 557 | std::pair<QuasiPolynomial, std::vector<Fraction>> |
| 558 | substituteMuInTerm(unsigned numParams, const ParamPoint &v, |
| 559 | const std::vector<Point> &ds, const Point &mu) { |
| 560 | unsigned numDims = mu.size(); |
| 561 | #ifndef NDEBUG |
| 562 | for (const Point &d : ds) |
| 563 | assert(d.size() == numDims && |
| 564 | "μ has to have the same number of dimensions as the generators!" ); |
| 565 | #endif |
| 566 | |
| 567 | // First, the exponent in the numerator becomes |
| 568 | // - (μ • u_1) * (floor(first col of v)) |
| 569 | // - (μ • u_2) * (floor(second col of v)) - ... |
| 570 | // - (μ • u_d) * (floor(d'th col of v)) |
| 571 | // So we store the negation of the dot products. |
| 572 | |
| 573 | // We have d terms, each of whose coefficient is the negative dot product. |
| 574 | SmallVector<Fraction> coefficients; |
| 575 | coefficients.reserve(N: numDims); |
| 576 | for (const Point &d : ds) |
| 577 | coefficients.emplace_back(Args: -dotProduct(a: mu, b: d)); |
| 578 | |
| 579 | // Then, the affine function is a single floor expression, given by the |
| 580 | // corresponding column of v. |
| 581 | ParamPoint vTranspose = v.transpose(); |
| 582 | std::vector<std::vector<SmallVector<Fraction>>> affine; |
| 583 | affine.reserve(n: numDims); |
| 584 | for (unsigned j = 0; j < numDims; ++j) |
| 585 | affine.push_back(x: {SmallVector<Fraction>{vTranspose.getRow(row: j)}}); |
| 586 | |
| 587 | QuasiPolynomial num(numParams, coefficients, affine); |
| 588 | num = num.simplify(); |
| 589 | |
| 590 | std::vector<Fraction> dens; |
| 591 | dens.reserve(n: ds.size()); |
| 592 | // Similarly, each term in the denominator has exponent |
| 593 | // given by the dot product of μ with u_i. |
| 594 | for (const Point &d : ds) { |
| 595 | // This term in the denominator is |
| 596 | // (1 - t^dens.back()) |
| 597 | dens.emplace_back(args: dotProduct(a: d, b: mu)); |
| 598 | } |
| 599 | |
| 600 | return {num, dens}; |
| 601 | } |
| 602 | |
| 603 | /// Normalize all denominator exponents `dens` to their absolute values |
| 604 | /// by multiplying and dividing by the inverses, in a function of the form |
| 605 | /// sign * t^num / prod_j (1 - t^dens[j]). |
| 606 | /// Here, sign = ± 1, |
| 607 | /// num is a QuasiPolynomial, and |
| 608 | /// each dens[j] is a Fraction. |
| 609 | void normalizeDenominatorExponents(int &sign, QuasiPolynomial &num, |
| 610 | std::vector<Fraction> &dens) { |
| 611 | // We track the number of exponents that are negative in the |
| 612 | // denominator, and convert them to their absolute values. |
| 613 | unsigned numNegExps = 0; |
| 614 | Fraction sumNegExps(0, 1); |
| 615 | for (const auto &den : dens) { |
| 616 | if (den < 0) { |
| 617 | numNegExps += 1; |
| 618 | sumNegExps += den; |
| 619 | } |
| 620 | } |
| 621 | |
| 622 | // If we have (1 - t^-c) in the denominator, for positive c, |
| 623 | // multiply and divide by t^c. |
| 624 | // We convert all negative-exponent terms at once; therefore |
| 625 | // we multiply and divide by t^sumNegExps. |
| 626 | // Then we get |
| 627 | // -(1 - t^c) in the denominator, |
| 628 | // increase the numerator by c, and |
| 629 | // flip the sign of the function. |
| 630 | if (numNegExps % 2 == 1) |
| 631 | sign = -sign; |
| 632 | num = num - QuasiPolynomial(num.getNumInputs(), sumNegExps); |
| 633 | } |
| 634 | |
| 635 | /// Compute the binomial coefficients nCi for 0 ≤ i ≤ r, |
| 636 | /// where n is a QuasiPolynomial. |
| 637 | std::vector<QuasiPolynomial> getBinomialCoefficients(const QuasiPolynomial &n, |
| 638 | unsigned r) { |
| 639 | unsigned numParams = n.getNumInputs(); |
| 640 | std::vector<QuasiPolynomial> coefficients; |
| 641 | coefficients.reserve(n: r + 1); |
| 642 | coefficients.emplace_back(args&: numParams, args: 1); |
| 643 | for (unsigned j = 1; j <= r; ++j) |
| 644 | // We use the recursive formula for binomial coefficients here and below. |
| 645 | coefficients.emplace_back( |
| 646 | args: (coefficients[j - 1] * (n - QuasiPolynomial(numParams, j - 1)) / |
| 647 | Fraction(j, 1)) |
| 648 | .simplify()); |
| 649 | return coefficients; |
| 650 | } |
| 651 | |
| 652 | /// Compute the binomial coefficients nCi for 0 ≤ i ≤ r, |
| 653 | /// where n is a QuasiPolynomial. |
| 654 | std::vector<Fraction> getBinomialCoefficients(const Fraction &n, |
| 655 | const Fraction &r) { |
| 656 | std::vector<Fraction> coefficients; |
| 657 | coefficients.reserve(n: (int64_t)floor(f: r)); |
| 658 | coefficients.emplace_back(args: 1); |
| 659 | for (unsigned j = 1; j <= r; ++j) |
| 660 | coefficients.emplace_back(args: coefficients[j - 1] * (n - (j - 1)) / (j)); |
| 661 | return coefficients; |
| 662 | } |
| 663 | |
| 664 | /// We have a generating function of the form |
| 665 | /// f_p(x) = \sum_i sign_i * (x^n_i(p)) / (\prod_j (1 - x^d_{ij}) |
| 666 | /// |
| 667 | /// where sign_i is ±1, |
| 668 | /// n_i \in Q^p -> Q^d is the sum of the vectors d_{ij}, weighted by the |
| 669 | /// floors of d affine functions on p parameters. |
| 670 | /// d_{ij} \in Q^d are vectors. |
| 671 | /// |
| 672 | /// We need to find the number of terms of the form x^t in the expansion of |
| 673 | /// this function. |
| 674 | /// However, direct substitution (x = (1, ..., 1)) causes the denominator |
| 675 | /// to become zero. |
| 676 | /// |
| 677 | /// We therefore use the following procedure instead: |
| 678 | /// 1. Substitute x_i = (s+1)^μ_i for some vector μ. This makes the generating |
| 679 | /// function a function of a scalar s. |
| 680 | /// 2. Write each term in this function as P(s)/Q(s), where P and Q are |
| 681 | /// polynomials. P has coefficients as quasipolynomials in d parameters, while |
| 682 | /// Q has coefficients as scalars. |
| 683 | /// 3. Find the constant term in the expansion of each term P(s)/Q(s). This is |
| 684 | /// equivalent to substituting s = 0. |
| 685 | /// |
| 686 | /// Verdoolaege, Sven, et al. "Counting integer points in parametric |
| 687 | /// polytopes using Barvinok's rational functions." Algorithmica 48 (2007): |
| 688 | /// 37-66. |
| 689 | QuasiPolynomial |
| 690 | mlir::presburger::detail::computeNumTerms(const GeneratingFunction &gf) { |
| 691 | // Step (1) We need to find a μ such that we can substitute x_i = |
| 692 | // (s+1)^μ_i. After this substitution, the exponent of (s+1) in the |
| 693 | // denominator is (μ_i • d_{ij}) in each term. Clearly, this cannot become |
| 694 | // zero. Hence we find a vector μ that is not orthogonal to any of the |
| 695 | // d_{ij} and substitute x accordingly. |
| 696 | std::vector<Point> allDenominators; |
| 697 | for (ArrayRef<Point> den : gf.getDenominators()) |
| 698 | llvm::append_range(C&: allDenominators, R&: den); |
| 699 | Point mu = getNonOrthogonalVector(vectors: allDenominators); |
| 700 | |
| 701 | unsigned numParams = gf.getNumParams(); |
| 702 | const std::vector<std::vector<Point>> &ds = gf.getDenominators(); |
| 703 | QuasiPolynomial totalTerm(numParams, 0); |
| 704 | for (unsigned i = 0, e = ds.size(); i < e; ++i) { |
| 705 | int sign = gf.getSigns()[i]; |
| 706 | |
| 707 | // Compute the new exponents of (s+1) for the numerator and the |
| 708 | // denominator after substituting μ. |
| 709 | auto [numExp, dens] = |
| 710 | substituteMuInTerm(numParams, v: gf.getNumerators()[i], ds: ds[i], mu); |
| 711 | // Now the numerator is (s+1)^numExp |
| 712 | // and the denominator is \prod_j (1 - (s+1)^dens[j]). |
| 713 | |
| 714 | // Step (2) We need to express the terms in the function as quotients of |
| 715 | // polynomials. Each term is now of the form |
| 716 | // sign_i * (s+1)^numExp / (\prod_j (1 - (s+1)^dens[j])) |
| 717 | // For the i'th term, we first normalize the denominator to have only |
| 718 | // positive exponents. We convert all the dens[j] to their |
| 719 | // absolute values and change the sign and exponent in the numerator. |
| 720 | normalizeDenominatorExponents(sign, num&: numExp, dens); |
| 721 | |
| 722 | // Then, using the formula for geometric series, we replace each (1 - |
| 723 | // (s+1)^(dens[j])) with |
| 724 | // (-s)(\sum_{0 ≤ k < dens[j]} (s+1)^k). |
| 725 | for (auto &j : dens) |
| 726 | j = abs(f: j) - 1; |
| 727 | // Note that at this point, the semantics of `dens[j]` changes to mean |
| 728 | // a term (\sum_{0 ≤ k ≤ dens[j]} (s+1)^k). The denominator is, as before, |
| 729 | // a product of these terms. |
| 730 | |
| 731 | // Since the -s are taken out, the sign changes if there is an odd number |
| 732 | // of such terms. |
| 733 | unsigned r = dens.size(); |
| 734 | if (dens.size() % 2 == 1) |
| 735 | sign = -sign; |
| 736 | |
| 737 | // Thus the term overall now has the form |
| 738 | // sign'_i * (s+1)^numExp / |
| 739 | // (s^r * \prod_j (\sum_{0 ≤ k < dens[j]} (s+1)^k)). |
| 740 | // This means that |
| 741 | // the numerator is a polynomial in s, with coefficients as |
| 742 | // quasipolynomials (given by binomial coefficients), and the denominator |
| 743 | // is a polynomial in s, with integral coefficients (given by taking the |
| 744 | // convolution over all j). |
| 745 | |
| 746 | // Step (3) We need to find the constant term in the expansion of each |
| 747 | // term. Since each term has s^r as a factor in the denominator, we avoid |
| 748 | // substituting s = 0 directly; instead, we find the coefficient of s^r in |
| 749 | // sign'_i * (s+1)^numExp / (\prod_j (\sum_k (s+1)^k)), |
| 750 | // Letting P(s) = (s+1)^numExp and Q(s) = \prod_j (...), |
| 751 | // we need to find the coefficient of s^r in P(s)/Q(s), |
| 752 | // for which we use the `getCoefficientInRationalFunction()` function. |
| 753 | |
| 754 | // First, we compute the coefficients of P(s), which are binomial |
| 755 | // coefficients. |
| 756 | // We only need the first r+1 of these, as higher-order terms do not |
| 757 | // contribute to the coefficient of s^r. |
| 758 | std::vector<QuasiPolynomial> numeratorCoefficients = |
| 759 | getBinomialCoefficients(n: numExp, r); |
| 760 | |
| 761 | // Then we compute the coefficients of each individual term in Q(s), |
| 762 | // which are (dens[i]+1) C (k+1) for 0 ≤ k ≤ dens[i]. |
| 763 | std::vector<std::vector<Fraction>> eachTermDenCoefficients; |
| 764 | std::vector<Fraction> singleTermDenCoefficients; |
| 765 | eachTermDenCoefficients.reserve(n: r); |
| 766 | for (const Fraction &den : dens) { |
| 767 | singleTermDenCoefficients = getBinomialCoefficients(n: den + 1, r: den + 1); |
| 768 | eachTermDenCoefficients.emplace_back( |
| 769 | args: ArrayRef<Fraction>(singleTermDenCoefficients).drop_front()); |
| 770 | } |
| 771 | |
| 772 | // Now we find the coefficients in Q(s) itself |
| 773 | // by taking the convolution of the coefficients |
| 774 | // of all the terms. |
| 775 | std::vector<Fraction> denominatorCoefficients; |
| 776 | denominatorCoefficients = eachTermDenCoefficients[0]; |
| 777 | for (unsigned j = 1, e = eachTermDenCoefficients.size(); j < e; ++j) |
| 778 | denominatorCoefficients = multiplyPolynomials(a: denominatorCoefficients, |
| 779 | b: eachTermDenCoefficients[j]); |
| 780 | |
| 781 | totalTerm = |
| 782 | totalTerm + getCoefficientInRationalFunction(power: r, num: numeratorCoefficients, |
| 783 | den: denominatorCoefficients) * |
| 784 | QuasiPolynomial(numParams, sign); |
| 785 | } |
| 786 | |
| 787 | return totalTerm.simplify(); |
| 788 | } |
| 789 | |