MatmulOptimizer.h source code [polly/include/polly/MatmulOptimizer.h]

1	//===- MatmulOptimizer.h -------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef POLLY_MATMULOPTIMIZER_H
10	#define POLLY_MATMULOPTIMIZER_H
11
12	#include "isl/isl-noexceptions.h"
13
14	namespace llvm {
15	class TargetTransformInfo;
16	}
17
18	namespace polly {
19	class Dependences;
20
21	/// Apply the BLIS matmul optimization pattern if possible.
22	///
23	/// Make the loops containing the matrix multiplication be the innermost
24	/// loops and apply the BLIS matmul optimization pattern. BLIS implements
25	/// gemm as three nested loops around a macro-kernel, plus two packing
26	/// routines. The macro-kernel is implemented in terms of two additional
27	/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
28	/// (i.e., outer product) update.
29	///
30	/// For a detailed description please see [1].
31	///
32	/// The order of the loops defines the data reused in the BLIS implementation
33	/// of gemm ([1]). In particular, elements of the matrix B, the second
34	/// operand of matrix multiplication, are reused between iterations of the
35	/// innermost loop. To keep the reused data in cache, only elements of matrix
36	/// A, the first operand of matrix multiplication, should be evicted during
37	/// an iteration of the innermost loop. To provide such a cache replacement
38	/// policy, elements of the matrix A can, in particular, be loaded first and,
39	/// consequently, be least-recently-used.
40	///
41	/// In our case matrices are stored in row-major order instead of
42	/// column-major order used in the BLIS implementation ([1]). It affects only
43	/// on the form of the BLIS micro kernel and the computation of its
44	/// parameters. In particular, reused elements of the matrix B are
45	/// successively multiplied by specific elements of the matrix A.
46	///
47	/// Refs.:
48	/// [1] - Analytical Modeling is Enough for High Performance BLIS
49	/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
50	/// Technical Report, 2014
51	/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
52	///
53	/// @see ScheduleTreeOptimizer::createMicroKernel
54	/// @see ScheduleTreeOptimizer::createMacroKernel
55	/// @see getMicroKernelParams
56	/// @see getMacroKernelParams
57	///
58	/// TODO: Implement the packing transformation.
59	///
60	/// @param Node The node that contains a band to be optimized. The node
61	/// is required to successfully pass
62	/// ScheduleTreeOptimizer::isMatrMultPattern.
63	/// @param TTI Target Transform Info.
64	/// @param D The dependencies.
65	///
66	/// @returns The transformed schedule or nullptr if the optimization
67	/// cannot be applied.
68	isl::schedule_node
69	tryOptimizeMatMulPattern(isl::schedule_node Node,
70	const llvm::TargetTransformInfo *TTI,
71	const Dependences *D);
72
73	} // namespace polly
74	#endif // POLLY_MATMULOPTIMIZER_H
75

source code of polly/include/polly/MatmulOptimizer.h