1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Functions that implement common sequences in function prologues and epilogues |
10 | // used to save code size |
11 | |
12 | .macro FUNCTION_BEGIN name |
13 | .text |
14 | .globl \name |
15 | .type \name, @function |
16 | .falign |
17 | \name: |
18 | .endm |
19 | |
20 | .macro FUNCTION_END name |
21 | .size \name, . - \name |
22 | .endm |
23 | |
24 | .macro FALLTHROUGH_TAIL_CALL name0 name1 |
25 | .size \name0, . - \name0 |
26 | .globl \name1 |
27 | .type \name1, @function |
28 | .falign |
29 | \name1: |
30 | .endm |
31 | |
32 | |
33 | |
34 | |
35 | // Save r25:24 at fp+#-8 and r27:26 at fp+#-16. |
36 | |
37 | |
38 | |
39 | |
40 | // The compiler knows that the __save_* functions clobber LR. No other |
41 | // registers should be used without informing the compiler. |
42 | |
43 | // Since we can only issue one store per packet, we don't hurt performance by |
44 | // simply jumping to the right point in this sequence of stores. |
45 | |
46 | FUNCTION_BEGIN __save_r24_through_r27 |
47 | memd(fp+#-16) = r27:26 |
48 | FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 |
49 | { |
50 | memd(fp+#-8) = r25:24 |
51 | jumpr lr |
52 | } |
53 | FUNCTION_END __save_r24_through_r25 |
54 | |
55 | |
56 | |
57 | |
58 | // For each of the *_before_tailcall functions, jumpr lr is executed in parallel |
59 | // with deallocframe. That way, the return gets the old value of lr, which is |
60 | // where these functions need to return, and at the same time, lr gets the value |
61 | // it needs going into the tail call. |
62 | |
63 | FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall |
64 | r27:26 = memd(fp+#-16) |
65 | FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall |
66 | { |
67 | r25:24 = memd(fp+#-8) |
68 | deallocframe |
69 | jumpr lr |
70 | } |
71 | FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall |
72 | |
73 | |
74 | |
75 | |
76 | // Here we use the extra load bandwidth to restore LR early, allowing the return |
77 | // to occur in parallel with the deallocframe. |
78 | |
79 | FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe |
80 | { |
81 | lr = memw(fp+#4) |
82 | r27:26 = memd(fp+#-16) |
83 | } |
84 | { |
85 | r25:24 = memd(fp+#-8) |
86 | deallocframe |
87 | jumpr lr |
88 | } |
89 | FUNCTION_END __restore_r24_through_r27_and_deallocframe |
90 | |
91 | |
92 | |
93 | |
94 | // Here the load bandwidth is maximized. |
95 | |
96 | FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe |
97 | { |
98 | r25:24 = memd(fp+#-8) |
99 | deallocframe |
100 | } |
101 | jumpr lr |
102 | FUNCTION_END __restore_r24_through_r25_and_deallocframe |
103 | |