common_entry_exit_abi1.S source code [compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S]

1	//===----------------------Hexagon builtin routine ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// Functions that implement common sequences in function prologues and epilogues
10	// used to save code size
11
12	.macro FUNCTION_BEGIN name
13	.text
14	.globl \name
15	.type \name, @function
16	.falign
17	\name:
18	.endm
19
20	.macro FUNCTION_END name
21	.size \name, . - \name
22	.endm
23
24	.macro FALLTHROUGH_TAIL_CALL name0 name1
25	.size \name0, . - \name0
26	.globl \name1
27	.type \name1, @function
28	.falign
29	\name1:
30	.endm
31
32
33
34
35	// Save r25:24 at fp+#-8 and r27:26 at fp+#-16.
36
37
38
39
40	// The compiler knows that the __save_ functions clobber LR. No other*
41	// registers should be used without informing the compiler.
42
43	// Since we can only issue one store per packet, we don't hurt performance by
44	// simply jumping to the right point in this sequence of stores.
45
46	FUNCTION_BEGIN __save_r24_through_r27
47	memd(fp+#-`16`) = r27:`26`
48	FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
49	{
50	memd(fp+#-`8`) = r25:`24`
51	jumpr lr
52	}
53	FUNCTION_END __save_r24_through_r25
54
55
56
57
58	// For each of the _before_tailcall functions, jumpr lr is executed in parallel*
59	// with deallocframe. That way, the return gets the old value of lr, which is
60	// where these functions need to return, and at the same time, lr gets the value
61	// it needs going into the tail call.
62
63	FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
64	r27:`26` = memd(fp+#-`16`)
65	FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
66	{
67	r25:`24` = memd(fp+#-`8`)
68	deallocframe
69	jumpr lr
70	}
71	FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
72
73
74
75
76	// Here we use the extra load bandwidth to restore LR early, allowing the return
77	// to occur in parallel with the deallocframe.
78
79	FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
80	{
81	lr = memw(fp+#`4`)
82	r27:`26` = memd(fp+#-`16`)
83	}
84	{
85	r25:`24` = memd(fp+#-`8`)
86	deallocframe
87	jumpr lr
88	}
89	FUNCTION_END __restore_r24_through_r27_and_deallocframe
90
91
92
93
94	// Here the load bandwidth is maximized.
95
96	FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
97	{
98	r25:`24` = memd(fp+#-`8`)
99	deallocframe
100	}
101	jumpr lr
102	FUNCTION_END __restore_r24_through_r25_and_deallocframe
103

source code of compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S