1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | |
10 | // Functions that implement common sequences in function prologues and epilogues |
11 | // used to save code size |
12 | |
13 | .macro FUNCTION_BEGIN name |
14 | .text |
15 | .globl \name |
16 | .type \name, @function |
17 | .falign |
18 | \name: |
19 | .endm |
20 | |
21 | .macro FUNCTION_END name |
22 | .size \name, . - \name |
23 | .endm |
24 | |
25 | .macro FALLTHROUGH_TAIL_CALL name0 name1 |
26 | .size \name0, . - \name0 |
27 | .globl \name1 |
28 | .type \name1, @function |
29 | .falign |
30 | \name1: |
31 | .endm |
32 | |
33 | |
34 | |
35 | |
36 | // Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at |
37 | // fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. |
38 | |
39 | |
40 | |
41 | |
42 | // The compiler knows that the __save_* functions clobber LR. No other |
43 | // registers should be used without informing the compiler. |
44 | |
45 | // Since we can only issue one store per packet, we don't hurt performance by |
46 | // simply jumping to the right point in this sequence of stores. |
47 | |
48 | FUNCTION_BEGIN __save_r27_through_r16 |
49 | memd(fp+#-48) = r17:16 |
50 | FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18 |
51 | memd(fp+#-40) = r19:18 |
52 | FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20 |
53 | memd(fp+#-32) = r21:20 |
54 | FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22 |
55 | memd(fp+#-24) = r23:22 |
56 | FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24 |
57 | memd(fp+#-16) = r25:24 |
58 | { |
59 | memd(fp+#-8) = r27:26 |
60 | jumpr lr |
61 | } |
62 | FUNCTION_END __save_r27_through_r24 |
63 | |
64 | |
65 | |
66 | |
67 | // For each of the *_before_sibcall functions, jumpr lr is executed in parallel |
68 | // with deallocframe. That way, the return gets the old value of lr, which is |
69 | // where these functions need to return, and at the same time, lr gets the value |
70 | // it needs going into the sibcall. |
71 | |
72 | FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall |
73 | { |
74 | r21:20 = memd(fp+#-32) |
75 | r23:22 = memd(fp+#-24) |
76 | } |
77 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall |
78 | { |
79 | r25:24 = memd(fp+#-16) |
80 | jump __restore_r27_through_r26_and_deallocframe_before_sibcall |
81 | } |
82 | FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall |
83 | |
84 | |
85 | |
86 | |
87 | FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall |
88 | r17:16 = memd(fp+#-48) |
89 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall |
90 | { |
91 | r19:18 = memd(fp+#-40) |
92 | r21:20 = memd(fp+#-32) |
93 | } |
94 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall |
95 | { |
96 | r23:22 = memd(fp+#-24) |
97 | r25:24 = memd(fp+#-16) |
98 | } |
99 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall |
100 | { |
101 | r27:26 = memd(fp+#-8) |
102 | deallocframe |
103 | jumpr lr |
104 | } |
105 | FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall |
106 | |
107 | |
108 | |
109 | |
110 | // Here we use the extra load bandwidth to restore LR early, allowing the return |
111 | // to occur in parallel with the deallocframe. |
112 | |
113 | FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe |
114 | { |
115 | r17:16 = memd(fp+#-48) |
116 | r19:18 = memd(fp+#-40) |
117 | } |
118 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe |
119 | { |
120 | r21:20 = memd(fp+#-32) |
121 | r23:22 = memd(fp+#-24) |
122 | } |
123 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe |
124 | { |
125 | lr = memw(fp+#4) |
126 | r25:24 = memd(fp+#-16) |
127 | } |
128 | { |
129 | r27:26 = memd(fp+#-8) |
130 | deallocframe |
131 | jumpr lr |
132 | } |
133 | FUNCTION_END __restore_r27_through_r24_and_deallocframe |
134 | |
135 | |
136 | |
137 | |
138 | // Here the load bandwidth is maximized for all three functions. |
139 | |
140 | FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe |
141 | { |
142 | r19:18 = memd(fp+#-40) |
143 | r21:20 = memd(fp+#-32) |
144 | } |
145 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe |
146 | { |
147 | r23:22 = memd(fp+#-24) |
148 | r25:24 = memd(fp+#-16) |
149 | } |
150 | FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe |
151 | { |
152 | r27:26 = memd(fp+#-8) |
153 | deallocframe |
154 | } |
155 | jumpr lr |
156 | FUNCTION_END __restore_r27_through_r26_and_deallocframe |
157 | |