cs-etm.c source code [linux/tools/perf/util/cs-etm.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright(C) 2015-2018 Linaro Limited.
4	*
5	* Author: Tor Jeremiassen <tor@ti.com>
6	* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7	*/
8
9	#include <linux/kernel.h>
10	#include <linux/bitfield.h>
11	#include <linux/bitops.h>
12	#include <linux/coresight-pmu.h>
13	#include <linux/err.h>
14	#include <linux/log2.h>
15	#include <linux/types.h>
16	#include <linux/zalloc.h>
17
18	#include <stdlib.h>
19
20	#include "auxtrace.h"
21	#include "color.h"
22	#include "cs-etm.h"
23	#include "cs-etm-decoder/cs-etm-decoder.h"
24	#include "debug.h"
25	#include "dso.h"
26	#include "evlist.h"
27	#include "intlist.h"
28	#include "machine.h"
29	#include "map.h"
30	#include "perf.h"
31	#include "session.h"
32	#include "map_symbol.h"
33	#include "branch.h"
34	#include "symbol.h"
35	#include "tool.h"
36	#include "thread.h"
37	#include "thread-stack.h"
38	#include "tsc.h"
39	#include <tools/libc_compat.h>
40	#include "util/synthetic-events.h"
41	#include "util/util.h"
42
43	struct cs_etm_auxtrace {
44	struct auxtrace auxtrace;
45	struct auxtrace_queues queues;
46	struct auxtrace_heap heap;
47	struct itrace_synth_opts synth_opts;
48	struct perf_session *session;
49	struct perf_tsc_conversion tc;
50
51	/*
52	* Timeless has no timestamps in the trace so overlapping mmap lookups
53	* are less accurate but produces smaller trace data. We use context IDs
54	* in the trace instead of matching timestamps with fork records so
55	* they're not really needed in the general case. Overlapping mmaps
56	* happen in cases like between a fork and an exec.
57	*/
58	bool timeless_decoding;
59
60	/*
61	* Per-thread ignores the trace channel ID and instead assumes that
62	* everything in a buffer comes from the same process regardless of
63	* which CPU it ran on. It also implies no context IDs so the TID is
64	* taken from the auxtrace buffer.
65	*/
66	bool per_thread_decoding;
67	bool snapshot_mode;
68	bool data_queued;
69	bool has_virtual_ts; / Virtual/Kernel timestamps in the trace. /
70
71	int num_cpu;
72	u64 latest_kernel_timestamp;
73	u32 auxtrace_type;
74	u64 branches_sample_type;
75	u64 branches_id;
76	u64 instructions_sample_type;
77	u64 instructions_sample_period;
78	u64 instructions_id;
79	u64 **metadata;
80	unsigned int pmu_type;
81	enum cs_etm_pid_fmt pid_fmt;
82	};
83
84	struct cs_etm_traceid_queue {
85	u8 trace_chan_id;
86	u64 period_instructions;
87	size_t last_branch_pos;
88	union perf_event *event_buf;
89	struct thread *thread;
90	struct thread *prev_packet_thread;
91	ocsd_ex_level prev_packet_el;
92	ocsd_ex_level el;
93	struct branch_stack *last_branch;
94	struct branch_stack *last_branch_rb;
95	struct cs_etm_packet *prev_packet;
96	struct cs_etm_packet *packet;
97	struct cs_etm_packet_queue packet_queue;
98	};
99
100	enum cs_etm_format {
101	UNSET,
102	FORMATTED,
103	UNFORMATTED
104	};
105
106	struct cs_etm_queue {
107	struct cs_etm_auxtrace *etm;
108	struct cs_etm_decoder *decoder;
109	struct auxtrace_buffer *buffer;
110	unsigned int queue_nr;
111	u8 pending_timestamp_chan_id;
112	enum cs_etm_format format;
113	u64 offset;
114	const unsigned char *buf;
115	size_t buf_len, buf_used;
116	/ Conversion between traceID and index in traceid_queues array /
117	struct intlist *traceid_queues_list;
118	struct cs_etm_traceid_queue **traceid_queues;
119	/ Conversion between traceID and metadata pointers /
120	struct intlist *traceid_list;
121	/*
122	* Same as traceid_list, but traceid_list may be a reference to another
123	* queue's which has a matching sink ID.
124	*/
125	struct intlist *own_traceid_list;
126	u32 sink_id;
127	};
128
129	static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130	static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131	pid_t tid);
132	static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133	static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134	static int cs_etm__metadata_get_trace_id(u8 trace_chan_id, u64 cpu_metadata);
135	static u64 get_cpu_data(struct* cs_etm_auxtrace etm, int* cpu);
136	static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137
138	/ PTMs ETMIDR [11:8] set to b0011 /
139	#define ETMIDR_PTM_VERSION 0x00000300
140
141	/*
142	* A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143	* work with. One option is to modify to auxtrace_heap_XYZ() API or simply
144	* encode the etm queue number as the upper 16 bit and the channel as
145	* the lower 16 bit.
146	*/
147	#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
148	(queue_nr << 16 \| trace_chan_id)
149	#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150	#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151	#define SINK_UNSET ((u32) -1)
152
153	static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154	{
155	etmidr &= ETMIDR_PTM_VERSION;
156
157	if (etmidr == ETMIDR_PTM_VERSION)
158	return CS_ETM_PROTO_PTM;
159
160	return CS_ETM_PROTO_ETMV3;
161	}
162
163	static int cs_etm__get_magic(struct cs_etm_queue etmq, u8 trace_chan_id, u64 magic)
164	{
165	struct int_node *inode;
166	u64 *metadata;
167
168	inode = intlist__find(ilist: etmq->traceid_list, i: trace_chan_id);
169	if (!inode)
170	return -EINVAL;
171
172	metadata = inode->priv;
173	*magic = metadata[CS_ETM_MAGIC];
174	return `0`;
175	}
176
177	int cs_etm__get_cpu(struct cs_etm_queue etmq, u8 trace_chan_id, int* *cpu)
178	{
179	struct int_node *inode;
180	u64 *metadata;
181
182	inode = intlist__find(ilist: etmq->traceid_list, i: trace_chan_id);
183	if (!inode)
184	return -EINVAL;
185
186	metadata = inode->priv;
187	cpu = (int*)metadata[CS_ETM_CPU];
188	return `0`;
189	}
190
191	/*
192	* The returned PID format is presented as an enum:
193	*
194	* CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195	* CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196	* CS_ETM_PIDFMT_NONE: No context IDs
197	*
198	* It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199	* are enabled at the same time when the session runs on an EL2 kernel.
200	* This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201	* recorded in the trace data, the tool will selectively use
202	* CONTEXTIDR_EL2 as PID.
203	*
204	* The result is cached in etm->pid_fmt so this function only needs to be called
205	* when processing the aux info.
206	*/
207	static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208	{
209	u64 val;
210
211	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212	val = metadata[CS_ETM_ETMCR];
213	/ CONTEXTIDR is traced /
214	if (val & BIT(ETM_OPT_CTXTID))
215	return CS_ETM_PIDFMT_CTXTID;
216	} else {
217	val = metadata[CS_ETMV4_TRCCONFIGR];
218	/ CONTEXTIDR_EL2 is traced /
219	if (val & (BIT(ETM4_CFG_BIT_VMID) \| BIT(ETM4_CFG_BIT_VMID_OPT)))
220	return CS_ETM_PIDFMT_CTXTID2;
221	/ CONTEXTIDR_EL1 is traced /
222	else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223	return CS_ETM_PIDFMT_CTXTID;
224	}
225
226	return CS_ETM_PIDFMT_NONE;
227	}
228
229	enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230	{
231	return etmq->etm->pid_fmt;
232	}
233
234	static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235	u8 trace_chan_id, u64 *cpu_metadata)
236	{
237	/ Get an RB node for this CPU /
238	struct int_node *inode = intlist__findnew(ilist: etmq->traceid_list, i: trace_chan_id);
239
240	/ Something went wrong, no need to continue /
241	if (!inode)
242	return -ENOMEM;
243
244	/ Disallow re-mapping a different traceID to metadata pair. /
245	if (inode->priv) {
246	u64 *curr_cpu_data = inode->priv;
247	u8 curr_chan_id;
248	int err;
249
250	if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251	/*
252	* With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253	* are expected (but not supported) in per-thread mode,
254	* rather than signifying an error.
255	*/
256	if (etmq->etm->per_thread_decoding)
257	pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258	else
259	pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260
261	return -EINVAL;
262	}
263
264	/ check that the mapped ID matches /
265	err = cs_etm__metadata_get_trace_id(trace_chan_id: &curr_chan_id, cpu_metadata: curr_cpu_data);
266	if (err)
267	return err;
268
269	if (curr_chan_id != trace_chan_id) {
270	pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271	return -EINVAL;
272	}
273
274	/ Skip re-adding the same mappings if everything matched /
275	return `0`;
276	}
277
278	/ Not one we've seen before, associate the traceID with the metadata pointer /
279	inode->priv = cpu_metadata;
280
281	return `0`;
282	}
283
284	static struct cs_etm_queue cs_etm__get_queue(struct* cs_etm_auxtrace etm, int* cpu)
285	{
286	if (etm->per_thread_decoding)
287	return etm->queues.queue_array[`0`].priv;
288	else
289	return etm->queues.queue_array[cpu].priv;
290	}
291
292	static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293	u64 *cpu_metadata)
294	{
295	struct cs_etm_queue *etmq;
296
297	/*
298	* If the queue is unformatted then only save one mapping in the
299	* queue associated with that CPU so only one decoder is made.
300	*/
301	etmq = cs_etm__get_queue(etm, cpu: cpu_metadata[CS_ETM_CPU]);
302	if (etmq->format == UNFORMATTED)
303	return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304	cpu_metadata);
305
306	/*
307	* Otherwise, version 0 trace IDs are global so save them into every
308	* queue.
309	*/
310	for (unsigned int i = `0`; i < etm->queues.nr_queues; ++i) {
311	int ret;
312
313	etmq = etm->queues.queue_array[i].priv;
314	ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315	cpu_metadata);
316	if (ret)
317	return ret;
318	}
319
320	return `0`;
321	}
322
323	static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace etm, int* cpu,
324	u64 hw_id)
325	{
326	int err;
327	u64 *cpu_data;
328	u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329
330	cpu_data = get_cpu_data(etm, cpu);
331	if (cpu_data == NULL)
332	return -EINVAL;
333
334	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_metadata: cpu_data);
335	if (err)
336	return err;
337
338	/*
339	* if we are picking up the association from the packet, need to plug
340	* the correct trace ID into the metadata for setting up decoders later.
341	*/
342	return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_metadata: cpu_data);
343	}
344
345	static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace etm, int* cpu,
346	u64 hw_id)
347	{
348	struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349	int ret;
350	u64 *cpu_data;
351	u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352	u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353
354	/*
355	* Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356	* let it pass for now until an actual overlapping trace ID is hit. In
357	* most cases IDs won't overlap even if the sink changes.
358	*/
359	if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360	etmq->sink_id != sink_id) {
361	pr_err("CS_ETM: mismatch between sink IDs\n");
362	return -EINVAL;
363	}
364
365	etmq->sink_id = sink_id;
366
367	/ Find which other queues use this sink and link their ID maps /
368	for (unsigned int i = `0`; i < etm->queues.nr_queues; ++i) {
369	struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370
371	/ Different sinks, skip /
372	if (other_etmq->sink_id != etmq->sink_id)
373	continue;
374
375	/ Already linked, skip /
376	if (other_etmq->traceid_list == etmq->traceid_list)
377	continue;
378
379	/ At the point of first linking, this one should be empty /
380	if (!intlist__empty(ilist: etmq->traceid_list)) {
381	pr_err("CS_ETM: Can't link populated trace ID lists\n");
382	return -EINVAL;
383	}
384
385	etmq->own_traceid_list = NULL;
386	intlist__delete(ilist: etmq->traceid_list);
387	etmq->traceid_list = other_etmq->traceid_list;
388	break;
389	}
390
391	cpu_data = get_cpu_data(etm, cpu);
392	ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id: trace_id, cpu_metadata: cpu_data);
393	if (ret)
394	return ret;
395
396	ret = cs_etm__metadata_set_trace_id(trace_chan_id: trace_id, cpu_metadata: cpu_data);
397	if (ret)
398	return ret;
399
400	return `0`;
401	}
402
403	static int cs_etm__metadata_get_trace_id(u8 trace_chan_id, u64 cpu_metadata)
404	{
405	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406
407	switch (cs_etm_magic) {
408	case __perf_cs_etmv3_magic:
409	*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410	CORESIGHT_TRACE_ID_VAL_MASK);
411	break;
412	case __perf_cs_etmv4_magic:
413	case __perf_cs_ete_magic:
414	*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415	CORESIGHT_TRACE_ID_VAL_MASK);
416	break;
417	default:
418	return -EINVAL;
419	}
420	return `0`;
421	}
422
423	/*
424	* update metadata trace ID from the value found in the AUX_HW_INFO packet.
425	*/
426	static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427	{
428	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429
430	switch (cs_etm_magic) {
431	case __perf_cs_etmv3_magic:
432	cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433	break;
434	case __perf_cs_etmv4_magic:
435	case __perf_cs_ete_magic:
436	cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437	break;
438
439	default:
440	return -EINVAL;
441	}
442	return `0`;
443	}
444
445	/*
446	* Get a metadata index for a specific cpu from an array.
447	*
448	*/
449	static int get_cpu_data_idx(struct cs_etm_auxtrace etm, int* cpu)
450	{
451	int i;
452
453	for (i = `0`; i < etm->num_cpu; i++) {
454	if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455	return i;
456	}
457	}
458
459	return -`1`;
460	}
461
462	/*
463	* Get a metadata for a specific cpu from an array.
464	*
465	*/
466	static u64 get_cpu_data(struct* cs_etm_auxtrace etm, int* cpu)
467	{
468	int idx = get_cpu_data_idx(etm, cpu);
469
470	return (idx != -`1`) ? etm->metadata[idx] : NULL;
471	}
472
473	/*
474	* Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475	*
476	* The payload associates the Trace ID and the CPU.
477	* The routine is tolerant of seeing multiple packets with the same association,
478	* but a CPU / Trace ID association changing during a session is an error.
479	*/
480	static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481	union perf_event *event)
482	{
483	struct cs_etm_auxtrace *etm;
484	struct perf_sample sample;
485	struct evsel *evsel;
486	u64 hw_id;
487	int cpu, version, err;
488
489	/ extract and parse the HW ID /
490	hw_id = event->aux_output_hw_id.hw_id;
491	version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492
493	/ check that we can handle this version /
494	if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495	pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496	version);
497	return -EINVAL;
498	}
499
500	/ get access to the etm metadata /
501	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502	if (!etm \|\| !etm->metadata)
503	return -EINVAL;
504
505	/ parse the sample to get the CPU /
506	evsel = evlist__event2evsel(evlist: session->evlist, event);
507	if (!evsel)
508	return -EINVAL;
509	perf_sample__init(&sample, /all=/false);
510	err = evsel__parse_sample(evsel, event, sample: &sample);
511	if (err)
512	goto out;
513	cpu = sample.cpu;
514	if (cpu == -`1`) {
515	/ no CPU in the sample - possibly recorded with an old version of perf /
516	pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
517	err = -EINVAL;
518	goto out;
519	}
520
521	if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == `0`) {
522	err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
523	goto out;
524	}
525
526	err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
527	out:
528	perf_sample__exit(&sample);
529	return err;
530	}
531
532	void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
533	u8 trace_chan_id)
534	{
535	/*
536	* When a timestamp packet is encountered the backend code
537	* is stopped so that the front end has time to process packets
538	* that were accumulated in the traceID queue. Since there can
539	* be more than one channel per cs_etm_queue, we need to specify
540	* what traceID queue needs servicing.
541	*/
542	etmq->pending_timestamp_chan_id = trace_chan_id;
543	}
544
545	static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
546	u8 *trace_chan_id)
547	{
548	struct cs_etm_packet_queue *packet_queue;
549
550	if (!etmq->pending_timestamp_chan_id)
551	return `0`;
552
553	if (trace_chan_id)
554	*trace_chan_id = etmq->pending_timestamp_chan_id;
555
556	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
557	etmq->pending_timestamp_chan_id);
558	if (!packet_queue)
559	return `0`;
560
561	/ Acknowledge pending status /
562	etmq->pending_timestamp_chan_id = `0`;
563
564	/ See function cs_etm_decoder__do_{hard\|soft}_timestamp() /
565	return packet_queue->cs_timestamp;
566	}
567
568	static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
569	{
570	int i;
571
572	queue->head = `0`;
573	queue->tail = `0`;
574	queue->packet_count = `0`;
575	for (i = `0`; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
576	queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
577	queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
578	queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
579	queue->packet_buffer[i].instr_count = `0`;
580	queue->packet_buffer[i].last_instr_taken_branch = false;
581	queue->packet_buffer[i].last_instr_size = `0`;
582	queue->packet_buffer[i].last_instr_type = `0`;
583	queue->packet_buffer[i].last_instr_subtype = `0`;
584	queue->packet_buffer[i].last_instr_cond = `0`;
585	queue->packet_buffer[i].flags = `0`;
586	queue->packet_buffer[i].exception_number = UINT32_MAX;
587	queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
588	queue->packet_buffer[i].cpu = INT_MIN;
589	}
590	}
591
592	static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
593	{
594	int idx;
595	struct int_node *inode;
596	struct cs_etm_traceid_queue *tidq;
597	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
598
599	intlist__for_each_entry(inode, traceid_queues_list) {
600	idx = (int)(intptr_t)inode->priv;
601	tidq = etmq->traceid_queues[idx];
602	cs_etm__clear_packet_queue(queue: &tidq->packet_queue);
603	}
604	}
605
606	static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
607	struct cs_etm_traceid_queue *tidq,
608	u8 trace_chan_id)
609	{
610	int rc = -ENOMEM;
611	struct auxtrace_queue *queue;
612	struct cs_etm_auxtrace *etm = etmq->etm;
613
614	cs_etm__clear_packet_queue(queue: &tidq->packet_queue);
615
616	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
617	tidq->trace_chan_id = trace_chan_id;
618	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
619	tidq->thread = machine__findnew_thread(machine: &etm->session->machines.host, pid: -`1`,
620	tid: queue->tid);
621	tidq->prev_packet_thread = machine__idle_thread(machine: &etm->session->machines.host);
622
623	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
624	if (!tidq->packet)
625	goto out;
626
627	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
628	if (!tidq->prev_packet)
629	goto out_free;
630
631	if (etm->synth_opts.last_branch) {
632	size_t sz = sizeof(struct branch_stack);
633
634	sz += etm->synth_opts.last_branch_sz *
635	sizeof(struct branch_entry);
636	tidq->last_branch = zalloc(sz);
637	if (!tidq->last_branch)
638	goto out_free;
639	tidq->last_branch_rb = zalloc(sz);
640	if (!tidq->last_branch_rb)
641	goto out_free;
642	}
643
644	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
645	if (!tidq->event_buf)
646	goto out_free;
647
648	return `0`;
649
650	out_free:
651	zfree(&tidq->last_branch_rb);
652	zfree(&tidq->last_branch);
653	zfree(&tidq->prev_packet);
654	zfree(&tidq->packet);
655	out:
656	return rc;
657	}
658
659	static struct cs_etm_traceid_queue
660	cs_etm__etmq_get_traceid_queue(struct* cs_etm_queue *etmq, u8 trace_chan_id)
661	{
662	int idx;
663	struct int_node *inode;
664	struct intlist *traceid_queues_list;
665	struct cs_etm_traceid_queue tidq, *traceid_queues;
666	struct cs_etm_auxtrace *etm = etmq->etm;
667
668	if (etm->per_thread_decoding)
669	trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
670
671	traceid_queues_list = etmq->traceid_queues_list;
672
673	/*
674	* Check if the traceid_queue exist for this traceID by looking
675	* in the queue list.
676	*/
677	inode = intlist__find(ilist: traceid_queues_list, i: trace_chan_id);
678	if (inode) {
679	idx = (int)(intptr_t)inode->priv;
680	return etmq->traceid_queues[idx];
681	}
682
683	/ We couldn't find a traceid_queue for this traceID, allocate one /
684	tidq = malloc(sizeof(*tidq));
685	if (!tidq)
686	return NULL;
687
688	memset(tidq, `0`, sizeof(*tidq));
689
690	/ Get a valid index for the new traceid_queue /
691	idx = intlist__nr_entries(ilist: traceid_queues_list);
692	/ Memory for the inode is free'ed in cs_etm_free_traceid_queues () /
693	inode = intlist__findnew(ilist: traceid_queues_list, i: trace_chan_id);
694	if (!inode)
695	goto out_free;
696
697	/ Associate this traceID with this index /
698	inode->priv = (void *)(intptr_t)idx;
699
700	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
701	goto out_free;
702
703	/ Grow the traceid_queues array by one unit /
704	traceid_queues = etmq->traceid_queues;
705	traceid_queues = reallocarray(traceid_queues,
706	idx + `1`,
707	sizeof(*traceid_queues));
708
709	/*
710	* On failure reallocarray() returns NULL and the original block of
711	* memory is left untouched.
712	*/
713	if (!traceid_queues)
714	goto out_free;
715
716	traceid_queues[idx] = tidq;
717	etmq->traceid_queues = traceid_queues;
718
719	return etmq->traceid_queues[idx];
720
721	out_free:
722	/*
723	* Function intlist__remove() removes the inode from the list
724	* and delete the memory associated to it.
725	*/
726	intlist__remove(ilist: traceid_queues_list, in: inode);
727	free(tidq);
728
729	return NULL;
730	}
731
732	struct cs_etm_packet_queue
733	cs_etm__etmq_get_packet_queue(struct* cs_etm_queue *etmq, u8 trace_chan_id)
734	{
735	struct cs_etm_traceid_queue *tidq;
736
737	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
738	if (tidq)
739	return &tidq->packet_queue;
740
741	return NULL;
742	}
743
744	static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
745	struct cs_etm_traceid_queue *tidq)
746	{
747	struct cs_etm_packet *tmp;
748
749	if (etm->synth_opts.branches \|\| etm->synth_opts.last_branch \|\|
750	etm->synth_opts.instructions) {
751	/*
752	* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
753	* the next incoming packet.
754	*
755	* Threads and exception levels are also tracked for both the
756	* previous and current packets. This is because the previous
757	* packet is used for the 'from' IP for branch samples, so the
758	* thread at that time must also be assigned to that sample.
759	* Across discontinuity packets the thread can change, so by
760	* tracking the thread for the previous packet the branch sample
761	* will have the correct info.
762	*/
763	tmp = tidq->packet;
764	tidq->packet = tidq->prev_packet;
765	tidq->prev_packet = tmp;
766	tidq->prev_packet_el = tidq->el;
767	thread__put(thread: tidq->prev_packet_thread);
768	tidq->prev_packet_thread = thread__get(thread: tidq->thread);
769	}
770	}
771
772	static void cs_etm__packet_dump(const char pkt_string, void* *data)
773	{
774	const char *color = PERF_COLOR_BLUE;
775	int len = strlen(pkt_string);
776	struct cs_etm_queue *etmq = data;
777	char queue_nr[`64`];
778
779	if (verbose)
780	snprintf(buf: queue_nr, size: sizeof(queue_nr), fmt: "Qnr:%u; ", etmq->queue_nr);
781	else
782	queue_nr[`0`] = `'\0'`;
783
784	if (len && (pkt_string[len-`1`] == `'\n'`))
785	color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string);
786	else
787	color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string);
788
789	fflush(stdout);
790	}
791
792	static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
793	u64 *metadata, u32 etmidr)
794	{
795	t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
796	t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
797	t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
798	}
799
800	static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
801	u64 *metadata)
802	{
803	t_params->protocol = CS_ETM_PROTO_ETMV4i;
804	t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
805	t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
806	t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
807	t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
808	t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
809	t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
810	}
811
812	static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
813	u64 *metadata)
814	{
815	t_params->protocol = CS_ETM_PROTO_ETE;
816	t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
817	t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
818	t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
819	t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
820	t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
821	t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
822	t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
823	}
824
825	static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
826	struct cs_etm_queue *etmq)
827	{
828	struct int_node *inode;
829
830	intlist__for_each_entry(inode, etmq->traceid_list) {
831	u64 *metadata = inode->priv;
832	u64 architecture = metadata[CS_ETM_MAGIC];
833	u32 etmidr;
834
835	switch (architecture) {
836	case __perf_cs_etmv3_magic:
837	etmidr = metadata[CS_ETM_ETMIDR];
838	cs_etm__set_trace_param_etmv3(t_params: t_params++, metadata, etmidr);
839	break;
840	case __perf_cs_etmv4_magic:
841	cs_etm__set_trace_param_etmv4(t_params: t_params++, metadata);
842	break;
843	case __perf_cs_ete_magic:
844	cs_etm__set_trace_param_ete(t_params: t_params++, metadata);
845	break;
846	default:
847	return -EINVAL;
848	}
849	}
850
851	return `0`;
852	}
853
854	static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
855	struct cs_etm_queue *etmq,
856	enum cs_etm_decoder_operation mode)
857	{
858	int ret = -EINVAL;
859
860	if (!(mode < CS_ETM_OPERATION_MAX))
861	goto out;
862
863	d_params->packet_printer = cs_etm__packet_dump;
864	d_params->operation = mode;
865	d_params->data = etmq;
866	d_params->formatted = etmq->format == FORMATTED;
867	d_params->fsyncs = false;
868	d_params->hsyncs = false;
869	d_params->frame_aligned = true;
870
871	ret = `0`;
872	out:
873	return ret;
874	}
875
876	static void cs_etm__dump_event(struct cs_etm_queue *etmq,
877	struct auxtrace_buffer *buffer)
878	{
879	int ret;
880	const char *color = PERF_COLOR_BLUE;
881	size_t buffer_used = `0`;
882
883	fprintf(stdout, "\n");
884	color_fprintf(stdout, color,
885	". ... CoreSight %s Trace data: size %#zx bytes\n",
886	cs_etm_decoder__get_name(etmq->decoder), buffer->size);
887
888	do {
889	size_t consumed;
890
891	ret = cs_etm_decoder__process_data_block(
892	decoder: etmq->decoder, indx: buffer->offset,
893	buf: &((u8 *)buffer->data)[buffer_used],
894	len: buffer->size - buffer_used, consumed: &consumed);
895	if (ret)
896	break;
897
898	buffer_used += consumed;
899	} while (buffer_used < buffer->size);
900
901	cs_etm_decoder__reset(decoder: etmq->decoder);
902	}
903
904	static int cs_etm__flush_events(struct perf_session *session,
905	const struct perf_tool *tool)
906	{
907	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
908	struct cs_etm_auxtrace,
909	auxtrace);
910	if (dump_trace)
911	return `0`;
912
913	if (!tool->ordered_events)
914	return -EINVAL;
915
916	if (etm->timeless_decoding) {
917	/*
918	* Pass tid = -1 to process all queues. But likely they will have
919	* already been processed on PERF_RECORD_EXIT anyway.
920	*/
921	return cs_etm__process_timeless_queues(etm, tid: -`1`);
922	}
923
924	return cs_etm__process_timestamped_queues(etm);
925	}
926
927	static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
928	{
929	int idx;
930	uintptr_t priv;
931	struct int_node inode, tmp;
932	struct cs_etm_traceid_queue *tidq;
933	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
934
935	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
936	priv = (uintptr_t)inode->priv;
937	idx = priv;
938
939	/ Free this traceid_queue from the array /
940	tidq = etmq->traceid_queues[idx];
941	thread__zput(tidq->thread);
942	thread__zput(tidq->prev_packet_thread);
943	zfree(&tidq->event_buf);
944	zfree(&tidq->last_branch);
945	zfree(&tidq->last_branch_rb);
946	zfree(&tidq->prev_packet);
947	zfree(&tidq->packet);
948	zfree(&tidq);
949
950	/*
951	* Function intlist__remove() removes the inode from the list
952	* and delete the memory associated to it.
953	*/
954	intlist__remove(ilist: traceid_queues_list, in: inode);
955	}
956
957	/ Then the RB tree itself /
958	intlist__delete(ilist: traceid_queues_list);
959	etmq->traceid_queues_list = NULL;
960
961	/ finally free the traceid_queues array /
962	zfree(&etmq->traceid_queues);
963	}
964
965	static void cs_etm__free_queue(void *priv)
966	{
967	struct int_node inode, tmp;
968	struct cs_etm_queue *etmq = priv;
969
970	if (!etmq)
971	return;
972
973	cs_etm_decoder__free(decoder: etmq->decoder);
974	cs_etm__free_traceid_queues(etmq);
975
976	if (etmq->own_traceid_list) {
977	/ First remove all traceID/metadata nodes for the RB tree /
978	intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
979	intlist__remove(ilist: etmq->own_traceid_list, in: inode);
980
981	/ Then the RB tree itself /
982	intlist__delete(ilist: etmq->own_traceid_list);
983	}
984
985	free(etmq);
986	}
987
988	static void cs_etm__free_events(struct perf_session *session)
989	{
990	unsigned int i;
991	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
992	struct cs_etm_auxtrace,
993	auxtrace);
994	struct auxtrace_queues *queues = &aux->queues;
995
996	for (i = `0`; i < queues->nr_queues; i++) {
997	cs_etm__free_queue(priv: queues->queue_array[i].priv);
998	queues->queue_array[i].priv = NULL;
999	}
1000
1001	auxtrace_queues__free(queues);
1002	}
1003
1004	static void cs_etm__free(struct perf_session *session)
1005	{
1006	int i;
1007	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1008	struct cs_etm_auxtrace,
1009	auxtrace);
1010	cs_etm__free_events(session);
1011	session->auxtrace = NULL;
1012
1013	for (i = `0`; i < aux->num_cpu; i++)
1014	zfree(&aux->metadata[i]);
1015
1016	zfree(&aux->metadata);
1017	zfree(&aux);
1018	}
1019
1020	static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1021	struct evsel *evsel)
1022	{
1023	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1024	struct cs_etm_auxtrace,
1025	auxtrace);
1026
1027	return evsel->core.attr.type == aux->pmu_type;
1028	}
1029
1030	static struct machine cs_etm__get_machine(struct* cs_etm_queue *etmq,
1031	ocsd_ex_level el)
1032	{
1033	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1034
1035	/*
1036	* For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1037	* running at EL1 assume everything is the host.
1038	*/
1039	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1040	return &etmq->etm->session->machines.host;
1041
1042	/*
1043	* Not perfect, but otherwise assume anything in EL1 is the default
1044	* guest, and everything else is the host. Distinguishing between guest
1045	* and host userspaces isn't currently supported either. Neither is
1046	* multiple guest support. All this does is reduce the likeliness of
1047	* decode errors where we look into the host kernel maps when it should
1048	* have been the guest maps.
1049	*/
1050	switch (el) {
1051	case ocsd_EL1:
1052	return machines__find_guest(machines: &etmq->etm->session->machines,
1053	DEFAULT_GUEST_KERNEL_ID);
1054	case ocsd_EL3:
1055	case ocsd_EL2:
1056	case ocsd_EL0:
1057	case ocsd_EL_unknown:
1058	default:
1059	return &etmq->etm->session->machines.host;
1060	}
1061	}
1062
1063	static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1064	ocsd_ex_level el)
1065	{
1066	struct machine *machine = cs_etm__get_machine(etmq, el);
1067
1068	if (address >= machine__kernel_start(machine)) {
1069	if (machine__is_host(machine))
1070	return PERF_RECORD_MISC_KERNEL;
1071	else
1072	return PERF_RECORD_MISC_GUEST_KERNEL;
1073	} else {
1074	if (machine__is_host(machine))
1075	return PERF_RECORD_MISC_USER;
1076	else {
1077	/*
1078	* Can't really happen at the moment because
1079	* cs_etm__get_machine() will always return
1080	* machines.host for any non EL1 trace.
1081	*/
1082	return PERF_RECORD_MISC_GUEST_USER;
1083	}
1084	}
1085	}
1086
1087	static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1088	u64 address, size_t size, u8 *buffer,
1089	const ocsd_mem_space_acc_t mem_space)
1090	{
1091	u8 cpumode;
1092	u64 offset;
1093	int len;
1094	struct addr_location al;
1095	struct dso *dso;
1096	struct cs_etm_traceid_queue *tidq;
1097	int ret = `0`;
1098
1099	if (!etmq)
1100	return `0`;
1101
1102	addr_location__init(al: &al);
1103	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1104	if (!tidq)
1105	goto out;
1106
1107	/*
1108	* We've already tracked EL along side the PID in cs_etm__set_thread()
1109	* so double check that it matches what OpenCSD thinks as well. It
1110	* doesn't distinguish between EL0 and EL1 for this mem access callback
1111	* so we had to do the extra tracking. Skip validation if it's any of
1112	* the 'any' values.
1113	*/
1114	if (!(mem_space == OCSD_MEM_SPACE_ANY \|\|
1115	mem_space == OCSD_MEM_SPACE_N \|\| mem_space == OCSD_MEM_SPACE_S)) {
1116	if (mem_space & OCSD_MEM_SPACE_EL1N) {
1117	/ Includes both non secure EL1 and EL0 /
1118	assert(tidq->el == ocsd_EL1 \|\| tidq->el == ocsd_EL0);
1119	} else if (mem_space & OCSD_MEM_SPACE_EL2)
1120	assert(tidq->el == ocsd_EL2);
1121	else if (mem_space & OCSD_MEM_SPACE_EL3)
1122	assert(tidq->el == ocsd_EL3);
1123	}
1124
1125	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1126
1127	if (!thread__find_map(thread: tidq->thread, cpumode, addr: address, al: &al))
1128	goto out;
1129
1130	dso = map__dso(map: al.map);
1131	if (!dso)
1132	goto out;
1133
1134	if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1135	dso__data_status_seen(dso, by: DSO_DATA_STATUS_SEEN_ITRACE))
1136	goto out;
1137
1138	offset = map__map_ip(map: al.map, ip_or_rip: address);
1139
1140	map__load(map: al.map);
1141
1142	len = dso__data_read_offset(dso, machine: maps__machine(maps: thread__maps(thread: tidq->thread)),
1143	offset, data: buffer, size);
1144
1145	if (len <= `0`) {
1146	ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1147	" Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1148	if (!dso__auxtrace_warned(dso)) {
1149	pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1150	address,
1151	dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1152	dso__set_auxtrace_warned(dso);
1153	}
1154	goto out;
1155	}
1156	ret = len;
1157	out:
1158	addr_location__exit(al: &al);
1159	return ret;
1160	}
1161
1162	static struct cs_etm_queue cs_etm__alloc_queue(void*)
1163	{
1164	struct cs_etm_queue etmq = zalloc(sizeof(etmq));
1165	if (!etmq)
1166	return NULL;
1167
1168	etmq->traceid_queues_list = intlist__new(NULL);
1169	if (!etmq->traceid_queues_list)
1170	goto out_free;
1171
1172	/*
1173	* Create an RB tree for traceID-metadata tuple. Since the conversion
1174	* has to be made for each packet that gets decoded, optimizing access
1175	* in anything other than a sequential array is worth doing.
1176	*/
1177	etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1178	if (!etmq->traceid_list)
1179	goto out_free;
1180
1181	return etmq;
1182
1183	out_free:
1184	intlist__delete(ilist: etmq->traceid_queues_list);
1185	free(etmq);
1186
1187	return NULL;
1188	}
1189
1190	static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1191	struct auxtrace_queue *queue,
1192	unsigned int queue_nr)
1193	{
1194	struct cs_etm_queue *etmq = queue->priv;
1195
1196	if (etmq)
1197	return `0`;
1198
1199	etmq = cs_etm__alloc_queue();
1200
1201	if (!etmq)
1202	return -ENOMEM;
1203
1204	queue->priv = etmq;
1205	etmq->etm = etm;
1206	etmq->queue_nr = queue_nr;
1207	queue->cpu = queue_nr; / Placeholder, may be reset to -1 in per-thread mode /
1208	etmq->offset = `0`;
1209	etmq->sink_id = SINK_UNSET;
1210
1211	return `0`;
1212	}
1213
1214	static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1215	struct cs_etm_queue *etmq,
1216	unsigned int queue_nr)
1217	{
1218	int ret = `0`;
1219	unsigned int cs_queue_nr;
1220	u8 trace_chan_id;
1221	u64 cs_timestamp;
1222
1223	/*
1224	* We are under a CPU-wide trace scenario. As such we need to know
1225	* when the code that generated the traces started to execute so that
1226	* it can be correlated with execution on other CPUs. So we get a
1227	* handle on the beginning of traces and decode until we find a
1228	* timestamp. The timestamp is then added to the auxtrace min heap
1229	* in order to know what nibble (of all the etmqs) to decode first.
1230	*/
1231	while (`1`) {
1232	/*
1233	* Fetch an aux_buffer from this etmq. Bail if no more
1234	* blocks or an error has been encountered.
1235	*/
1236	ret = cs_etm__get_data_block(etmq);
1237	if (ret <= `0`)
1238	goto out;
1239
1240	/*
1241	* Run decoder on the trace block. The decoder will stop when
1242	* encountering a CS timestamp, a full packet queue or the end of
1243	* trace for that block.
1244	*/
1245	ret = cs_etm__decode_data_block(etmq);
1246	if (ret)
1247	goto out;
1248
1249	/*
1250	* Function cs_etm_decoder__do_{hard\|soft}_timestamp() does all
1251	* the timestamp calculation for us.
1252	*/
1253	cs_timestamp = cs_etm__etmq_get_timestamp(etmq, trace_chan_id: &trace_chan_id);
1254
1255	/ We found a timestamp, no need to continue. /
1256	if (cs_timestamp)
1257	break;
1258
1259	/*
1260	* We didn't find a timestamp so empty all the traceid packet
1261	* queues before looking for another timestamp packet, either
1262	* in the current data block or a new one. Packets that were
1263	* just decoded are useless since no timestamp has been
1264	* associated with them. As such simply discard them.
1265	*/
1266	cs_etm__clear_all_packet_queues(etmq);
1267	}
1268
1269	/*
1270	* We have a timestamp. Add it to the min heap to reflect when
1271	* instructions conveyed by the range packets of this traceID queue
1272	* started to execute. Once the same has been done for all the traceID
1273	* queues of each etmq, redenring and decoding can start in
1274	* chronological order.
1275	*
1276	* Note that packets decoded above are still in the traceID's packet
1277	* queue and will be processed in cs_etm__process_timestamped_queues().
1278	*/
1279	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1280	ret = auxtrace_heap__add(heap: &etm->heap, queue_nr: cs_queue_nr, ordinal: cs_timestamp);
1281	out:
1282	return ret;
1283	}
1284
1285	static inline
1286	void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1287	struct cs_etm_traceid_queue *tidq)
1288	{
1289	struct branch_stack *bs_src = tidq->last_branch_rb;
1290	struct branch_stack *bs_dst = tidq->last_branch;
1291	size_t nr = `0`;
1292
1293	/*
1294	* Set the number of records before early exit: ->nr is used to
1295	* determine how many branches to copy from ->entries.
1296	*/
1297	bs_dst->nr = bs_src->nr;
1298
1299	/*
1300	* Early exit when there is nothing to copy.
1301	*/
1302	if (!bs_src->nr)
1303	return;
1304
1305	/*
1306	* As bs_src->entries is a circular buffer, we need to copy from it in
1307	* two steps. First, copy the branches from the most recently inserted
1308	* branch ->last_branch_pos until the end of bs_src->entries buffer.
1309	*/
1310	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1311	memcpy(&bs_dst->entries[`0`],
1312	&bs_src->entries[tidq->last_branch_pos],
1313	sizeof(struct branch_entry) * nr);
1314
1315	/*
1316	* If we wrapped around at least once, the branches from the beginning
1317	* of the bs_src->entries buffer and until the ->last_branch_pos element
1318	* are older valid branches: copy them over. The total number of
1319	* branches copied over will be equal to the number of branches asked by
1320	* the user in last_branch_sz.
1321	*/
1322	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1323	memcpy(&bs_dst->entries[nr],
1324	&bs_src->entries[`0`],
1325	sizeof(struct branch_entry) * tidq->last_branch_pos);
1326	}
1327	}
1328
1329	static inline
1330	void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1331	{
1332	tidq->last_branch_pos = `0`;
1333	tidq->last_branch_rb->nr = `0`;
1334	}
1335
1336	static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1337	u8 trace_chan_id, u64 addr)
1338	{
1339	u8 instrBytes[`2`];
1340
1341	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1342	instrBytes, `0`);
1343	/*
1344	* T32 instruction size is indicated by bits[15:11] of the first
1345	* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1346	* denote a 32-bit instruction.
1347	*/
1348	return ((instrBytes[`1`] & `0xF8`) >= `0xE8`) ? `4` : `2`;
1349	}
1350
1351	static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1352	{
1353	/*
1354	* Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1355	* appear in samples.
1356	*/
1357	if (packet->sample_type == CS_ETM_DISCONTINUITY \|\|
1358	packet->sample_type == CS_ETM_EXCEPTION)
1359	return `0`;
1360
1361	return packet->start_addr;
1362	}
1363
1364	static inline
1365	u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1366	{
1367	/ Returns 0 for the CS_ETM_DISCONTINUITY packet /
1368	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1369	return `0`;
1370
1371	return packet->end_addr - packet->last_instr_size;
1372	}
1373
1374	static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1375	u64 trace_chan_id,
1376	const struct cs_etm_packet *packet,
1377	u64 offset)
1378	{
1379	if (packet->isa == CS_ETM_ISA_T32) {
1380	u64 addr = packet->start_addr;
1381
1382	while (offset) {
1383	addr += cs_etm__t32_instr_size(etmq,
1384	trace_chan_id, addr);
1385	offset--;
1386	}
1387	return addr;
1388	}
1389
1390	/ Assume a 4 byte instruction size (A32/A64) /
1391	return packet->start_addr + offset * `4`;
1392	}
1393
1394	static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1395	struct cs_etm_traceid_queue *tidq)
1396	{
1397	struct branch_stack *bs = tidq->last_branch_rb;
1398	struct branch_entry *be;
1399
1400	/*
1401	* The branches are recorded in a circular buffer in reverse
1402	* chronological order: we start recording from the last element of the
1403	* buffer down. After writing the first element of the stack, move the
1404	* insert position back to the end of the buffer.
1405	*/
1406	if (!tidq->last_branch_pos)
1407	tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1408
1409	tidq->last_branch_pos -= `1`;
1410
1411	be = &bs->entries[tidq->last_branch_pos];
1412	be->from = cs_etm__last_executed_instr(packet: tidq->prev_packet);
1413	be->to = cs_etm__first_executed_instr(packet: tidq->packet);
1414	/ No support for mispredict /
1415	be->flags.mispred = `0`;
1416	be->flags.predicted = `1`;
1417
1418	/*
1419	* Increment bs->nr until reaching the number of last branches asked by
1420	* the user on the command line.
1421	*/
1422	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1423	bs->nr += `1`;
1424	}
1425
1426	static int cs_etm__inject_event(union perf_event *event,
1427	struct perf_sample *sample, u64 type)
1428	{
1429	event->header.size = perf_event__sample_event_size(sample, type, `0`);
1430	return perf_event__synthesize_sample(event, type, `0`, sample);
1431	}
1432
1433
1434	static int
1435	cs_etm__get_trace(struct cs_etm_queue *etmq)
1436	{
1437	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1438	struct auxtrace_buffer *old_buffer = aux_buffer;
1439	struct auxtrace_queue *queue;
1440
1441	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1442
1443	aux_buffer = auxtrace_buffer__next(queue, buffer: aux_buffer);
1444
1445	/ If no more data, drop the previous auxtrace_buffer and return /
1446	if (!aux_buffer) {
1447	if (old_buffer)
1448	auxtrace_buffer__drop_data(buffer: old_buffer);
1449	etmq->buf_len = `0`;
1450	return `0`;
1451	}
1452
1453	etmq->buffer = aux_buffer;
1454
1455	/ If the aux_buffer doesn't have data associated, try to load it /
1456	if (!aux_buffer->data) {
1457	/ get the file desc associated with the perf data file /
1458	int fd = perf_data__fd(data: etmq->etm->session->data);
1459
1460	aux_buffer->data = auxtrace_buffer__get_data(buffer: aux_buffer, fd);
1461	if (!aux_buffer->data)
1462	return -ENOMEM;
1463	}
1464
1465	/ If valid, drop the previous buffer /
1466	if (old_buffer)
1467	auxtrace_buffer__drop_data(buffer: old_buffer);
1468
1469	etmq->buf_used = `0`;
1470	etmq->buf_len = aux_buffer->size;
1471	etmq->buf = aux_buffer->data;
1472
1473	return etmq->buf_len;
1474	}
1475
1476	static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1477	struct cs_etm_traceid_queue *tidq, pid_t tid,
1478	ocsd_ex_level el)
1479	{
1480	struct machine *machine = cs_etm__get_machine(etmq, el);
1481
1482	if (tid != -`1`) {
1483	thread__zput(tidq->thread);
1484	tidq->thread = machine__find_thread(machine, pid: -`1`, tid);
1485	}
1486
1487	/ Couldn't find a known thread /
1488	if (!tidq->thread)
1489	tidq->thread = machine__idle_thread(machine);
1490
1491	tidq->el = el;
1492	}
1493
1494	int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1495	u8 trace_chan_id, ocsd_ex_level el)
1496	{
1497	struct cs_etm_traceid_queue *tidq;
1498
1499	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1500	if (!tidq)
1501	return -EINVAL;
1502
1503	cs_etm__set_thread(etmq, tidq, tid, el);
1504	return `0`;
1505	}
1506
1507	bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1508	{
1509	return !!etmq->etm->timeless_decoding;
1510	}
1511
1512	static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1513	u64 trace_chan_id,
1514	const struct cs_etm_packet *packet,
1515	struct perf_sample *sample)
1516	{
1517	/*
1518	* It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1519	* packet, so directly bail out with 'insn_len' = 0.
1520	*/
1521	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1522	sample->insn_len = `0`;
1523	return;
1524	}
1525
1526	/*
1527	* T32 instruction size might be 32-bit or 16-bit, decide by calling
1528	* cs_etm__t32_instr_size().
1529	*/
1530	if (packet->isa == CS_ETM_ISA_T32)
1531	sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1532	addr: sample->ip);
1533	/ Otherwise, A64 and A32 instruction size are always 32-bit. /
1534	else
1535	sample->insn_len = `4`;
1536
1537	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1538	(void *)sample->insn, `0`);
1539	}
1540
1541	u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1542	{
1543	struct cs_etm_auxtrace *etm = etmq->etm;
1544
1545	if (etm->has_virtual_ts)
1546	return tsc_to_perf_time(cyc: cs_timestamp, tc: &etm->tc);
1547	else
1548	return cs_timestamp;
1549	}
1550
1551	static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1552	struct cs_etm_traceid_queue *tidq)
1553	{
1554	struct cs_etm_auxtrace *etm = etmq->etm;
1555	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1556
1557	if (!etm->timeless_decoding && etm->has_virtual_ts)
1558	return packet_queue->cs_timestamp;
1559	else
1560	return etm->latest_kernel_timestamp;
1561	}
1562
1563	static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1564	struct cs_etm_traceid_queue *tidq,
1565	u64 addr, u64 period)
1566	{
1567	int ret = `0`;
1568	struct cs_etm_auxtrace *etm = etmq->etm;
1569	union perf_event *event = tidq->event_buf;
1570	struct perf_sample sample;
1571
1572	perf_sample__init(&sample, /all=/true);
1573	event->sample.header.type = PERF_RECORD_SAMPLE;
1574	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1575	event->sample.header.size = sizeof(struct perf_event_header);
1576
1577	/ Set time field based on etm auxtrace config. /
1578	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1579
1580	sample.ip = addr;
1581	sample.pid = thread__pid(thread: tidq->thread);
1582	sample.tid = thread__tid(thread: tidq->thread);
1583	sample.id = etmq->etm->instructions_id;
1584	sample.stream_id = etmq->etm->instructions_id;
1585	sample.period = period;
1586	sample.cpu = tidq->packet->cpu;
1587	sample.flags = tidq->prev_packet->flags;
1588	sample.cpumode = event->sample.header.misc;
1589
1590	cs_etm__copy_insn(etmq, trace_chan_id: tidq->trace_chan_id, packet: tidq->packet, sample: &sample);
1591
1592	if (etm->synth_opts.last_branch)
1593	sample.branch_stack = tidq->last_branch;
1594
1595	if (etm->synth_opts.inject) {
1596	ret = cs_etm__inject_event(event, sample: &sample,
1597	type: etm->instructions_sample_type);
1598	if (ret)
1599	return ret;
1600	}
1601
1602	ret = perf_session__deliver_synth_event(session: etm->session, event, sample: &sample);
1603
1604	if (ret)
1605	pr_err(
1606	"CS ETM Trace: failed to deliver instruction event, error %d\n",
1607	ret);
1608
1609	perf_sample__exit(&sample);
1610	return ret;
1611	}
1612
1613	/*
1614	* The cs etm packet encodes an instruction range between a branch target
1615	* and the next taken branch. Generate sample accordingly.
1616	*/
1617	static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1618	struct cs_etm_traceid_queue *tidq)
1619	{
1620	int ret = `0`;
1621	struct cs_etm_auxtrace *etm = etmq->etm;
1622	struct perf_sample sample = {.ip = `0`,};
1623	union perf_event *event = tidq->event_buf;
1624	struct dummy_branch_stack {
1625	u64 nr;
1626	u64 hw_idx;
1627	struct branch_entry entries;
1628	} dummy_bs;
1629	u64 ip;
1630
1631	ip = cs_etm__last_executed_instr(packet: tidq->prev_packet);
1632
1633	event->sample.header.type = PERF_RECORD_SAMPLE;
1634	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1635	tidq->prev_packet_el);
1636	event->sample.header.size = sizeof(struct perf_event_header);
1637
1638	/ Set time field based on etm auxtrace config. /
1639	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1640
1641	sample.ip = ip;
1642	sample.pid = thread__pid(thread: tidq->prev_packet_thread);
1643	sample.tid = thread__tid(thread: tidq->prev_packet_thread);
1644	sample.addr = cs_etm__first_executed_instr(packet: tidq->packet);
1645	sample.id = etmq->etm->branches_id;
1646	sample.stream_id = etmq->etm->branches_id;
1647	sample.period = `1`;
1648	sample.cpu = tidq->packet->cpu;
1649	sample.flags = tidq->prev_packet->flags;
1650	sample.cpumode = event->sample.header.misc;
1651
1652	cs_etm__copy_insn(etmq, trace_chan_id: tidq->trace_chan_id, packet: tidq->prev_packet,
1653	sample: &sample);
1654
1655	/*
1656	* perf report cannot handle events without a branch stack
1657	*/
1658	if (etm->synth_opts.last_branch) {
1659	dummy_bs = (struct dummy_branch_stack){
1660	.nr = `1`,
1661	.hw_idx = -`1ULL`,
1662	.entries = {
1663	.from = sample.ip,
1664	.to = sample.addr,
1665	},
1666	};
1667	sample.branch_stack = (struct branch_stack *)&dummy_bs;
1668	}
1669
1670	if (etm->synth_opts.inject) {
1671	ret = cs_etm__inject_event(event, sample: &sample,
1672	type: etm->branches_sample_type);
1673	if (ret)
1674	return ret;
1675	}
1676
1677	ret = perf_session__deliver_synth_event(session: etm->session, event, sample: &sample);
1678
1679	if (ret)
1680	pr_err(
1681	"CS ETM Trace: failed to deliver instruction event, error %d\n",
1682	ret);
1683
1684	return ret;
1685	}
1686
1687	static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1688	struct perf_session *session)
1689	{
1690	struct evlist *evlist = session->evlist;
1691	struct evsel *evsel;
1692	struct perf_event_attr attr;
1693	bool found = false;
1694	u64 id;
1695	int err;
1696
1697	evlist__for_each_entry(evlist, evsel) {
1698	if (evsel->core.attr.type == etm->pmu_type) {
1699	found = true;
1700	break;
1701	}
1702	}
1703
1704	if (!found) {
1705	pr_debug("No selected events with CoreSight Trace data\n");
1706	return `0`;
1707	}
1708
1709	memset(&attr, `0`, sizeof(struct perf_event_attr));
1710	attr.size = sizeof(struct perf_event_attr);
1711	attr.type = PERF_TYPE_HARDWARE;
1712	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1713	attr.sample_type \|= PERF_SAMPLE_IP \| PERF_SAMPLE_TID \|
1714	PERF_SAMPLE_PERIOD;
1715	if (etm->timeless_decoding)
1716	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1717	else
1718	attr.sample_type \|= PERF_SAMPLE_TIME;
1719
1720	attr.exclude_user = evsel->core.attr.exclude_user;
1721	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1722	attr.exclude_hv = evsel->core.attr.exclude_hv;
1723	attr.exclude_host = evsel->core.attr.exclude_host;
1724	attr.exclude_guest = evsel->core.attr.exclude_guest;
1725	attr.sample_id_all = evsel->core.attr.sample_id_all;
1726	attr.read_format = evsel->core.attr.read_format;
1727
1728	/ create new id val to be a fixed offset from evsel id /
1729	id = auxtrace_synth_id_range_start(evsel);
1730
1731	if (etm->synth_opts.branches) {
1732	attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1733	attr.sample_period = `1`;
1734	attr.sample_type \|= PERF_SAMPLE_ADDR;
1735	err = perf_session__deliver_synth_attr_event(session, attr: &attr, id);
1736	if (err)
1737	return err;
1738	etm->branches_sample_type = attr.sample_type;
1739	etm->branches_id = id;
1740	id += `1`;
1741	attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1742	}
1743
1744	if (etm->synth_opts.last_branch) {
1745	attr.sample_type \|= PERF_SAMPLE_BRANCH_STACK;
1746	/*
1747	* We don't use the hardware index, but the sample generation
1748	* code uses the new format branch_stack with this field,
1749	* so the event attributes must indicate that it's present.
1750	*/
1751	attr.branch_sample_type \|= PERF_SAMPLE_BRANCH_HW_INDEX;
1752	}
1753
1754	if (etm->synth_opts.instructions) {
1755	attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1756	attr.sample_period = etm->synth_opts.period;
1757	etm->instructions_sample_period = attr.sample_period;
1758	err = perf_session__deliver_synth_attr_event(session, attr: &attr, id);
1759	if (err)
1760	return err;
1761	etm->instructions_sample_type = attr.sample_type;
1762	etm->instructions_id = id;
1763	id += `1`;
1764	}
1765
1766	return `0`;
1767	}
1768
1769	static int cs_etm__sample(struct cs_etm_queue *etmq,
1770	struct cs_etm_traceid_queue *tidq)
1771	{
1772	struct cs_etm_auxtrace *etm = etmq->etm;
1773	int ret;
1774	u8 trace_chan_id = tidq->trace_chan_id;
1775	u64 instrs_prev;
1776
1777	/ Get instructions remainder from previous packet /
1778	instrs_prev = tidq->period_instructions;
1779
1780	tidq->period_instructions += tidq->packet->instr_count;
1781
1782	/*
1783	* Record a branch when the last instruction in
1784	* PREV_PACKET is a branch.
1785	*/
1786	if (etm->synth_opts.last_branch &&
1787	tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1788	tidq->prev_packet->last_instr_taken_branch)
1789	cs_etm__update_last_branch_rb(etmq, tidq);
1790
1791	if (etm->synth_opts.instructions &&
1792	tidq->period_instructions >= etm->instructions_sample_period) {
1793	/*
1794	* Emit instruction sample periodically
1795	* TODO: allow period to be defined in cycles and clock time
1796	*/
1797
1798	/*
1799	* Below diagram demonstrates the instruction samples
1800	* generation flows:
1801	*
1802	* Instrs Instrs Instrs Instrs
1803	* Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1804	* \| \| \| \|
1805	* V V V V
1806	* --------------------------------------------------
1807	* ^ ^
1808	* \| \|
1809	* Period Period
1810	* instructions(Pi) instructions(Pi')
1811	*
1812	* \| \|
1813	* \---------------- -----------------/
1814	* V
1815	* tidq->packet->instr_count
1816	*
1817	* Instrs Sample(n...) are the synthesised samples occurring
1818	* every etm->instructions_sample_period instructions - as
1819	* defined on the perf command line. Sample(n) is being the
1820	* last sample before the current etm packet, n+1 to n+3
1821	* samples are generated from the current etm packet.
1822	*
1823	* tidq->packet->instr_count represents the number of
1824	* instructions in the current etm packet.
1825	*
1826	* Period instructions (Pi) contains the number of
1827	* instructions executed after the sample point(n) from the
1828	* previous etm packet. This will always be less than
1829	* etm->instructions_sample_period.
1830	*
1831	* When generate new samples, it combines with two parts
1832	* instructions, one is the tail of the old packet and another
1833	* is the head of the new coming packet, to generate
1834	* sample(n+1); sample(n+2) and sample(n+3) consume the
1835	* instructions with sample period. After sample(n+3), the rest
1836	* instructions will be used by later packet and it is assigned
1837	* to tidq->period_instructions for next round calculation.
1838	*/
1839
1840	/*
1841	* Get the initial offset into the current packet instructions;
1842	* entry conditions ensure that instrs_prev is less than
1843	* etm->instructions_sample_period.
1844	*/
1845	u64 offset = etm->instructions_sample_period - instrs_prev;
1846	u64 addr;
1847
1848	/ Prepare last branches for instruction sample /
1849	if (etm->synth_opts.last_branch)
1850	cs_etm__copy_last_branch_rb(etmq, tidq);
1851
1852	while (tidq->period_instructions >=
1853	etm->instructions_sample_period) {
1854	/*
1855	* Calculate the address of the sampled instruction (-1
1856	* as sample is reported as though instruction has just
1857	* been executed, but PC has not advanced to next
1858	* instruction)
1859	*/
1860	addr = cs_etm__instr_addr(etmq, trace_chan_id,
1861	packet: tidq->packet, offset: offset - `1`);
1862	ret = cs_etm__synth_instruction_sample(
1863	etmq, tidq, addr,
1864	period: etm->instructions_sample_period);
1865	if (ret)
1866	return ret;
1867
1868	offset += etm->instructions_sample_period;
1869	tidq->period_instructions -=
1870	etm->instructions_sample_period;
1871	}
1872	}
1873
1874	if (etm->synth_opts.branches) {
1875	bool generate_sample = false;
1876
1877	/ Generate sample for tracing on packet /
1878	if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1879	generate_sample = true;
1880
1881	/ Generate sample for branch taken packet /
1882	if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1883	tidq->prev_packet->last_instr_taken_branch)
1884	generate_sample = true;
1885
1886	if (generate_sample) {
1887	ret = cs_etm__synth_branch_sample(etmq, tidq);
1888	if (ret)
1889	return ret;
1890	}
1891	}
1892
1893	cs_etm__packet_swap(etm, tidq);
1894
1895	return `0`;
1896	}
1897
1898	static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1899	{
1900	/*
1901	* When the exception packet is inserted, whether the last instruction
1902	* in previous range packet is taken branch or not, we need to force
1903	* to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1904	* to generate branch sample for the instruction range before the
1905	* exception is trapped to kernel or before the exception returning.
1906	*
1907	* The exception packet includes the dummy address values, so don't
1908	* swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1909	* for generating instruction and branch samples.
1910	*/
1911	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1912	tidq->prev_packet->last_instr_taken_branch = true;
1913
1914	return `0`;
1915	}
1916
1917	static int cs_etm__flush(struct cs_etm_queue *etmq,
1918	struct cs_etm_traceid_queue *tidq)
1919	{
1920	int err = `0`;
1921	struct cs_etm_auxtrace *etm = etmq->etm;
1922
1923	/ Handle start tracing packet /
1924	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1925	goto swap_packet;
1926
1927	if (etmq->etm->synth_opts.last_branch &&
1928	etmq->etm->synth_opts.instructions &&
1929	tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1930	u64 addr;
1931
1932	/ Prepare last branches for instruction sample /
1933	cs_etm__copy_last_branch_rb(etmq, tidq);
1934
1935	/*
1936	* Generate a last branch event for the branches left in the
1937	* circular buffer at the end of the trace.
1938	*
1939	* Use the address of the end of the last reported execution
1940	* range
1941	*/
1942	addr = cs_etm__last_executed_instr(packet: tidq->prev_packet);
1943
1944	err = cs_etm__synth_instruction_sample(
1945	etmq, tidq, addr,
1946	period: tidq->period_instructions);
1947	if (err)
1948	return err;
1949
1950	tidq->period_instructions = `0`;
1951
1952	}
1953
1954	if (etm->synth_opts.branches &&
1955	tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1956	err = cs_etm__synth_branch_sample(etmq, tidq);
1957	if (err)
1958	return err;
1959	}
1960
1961	swap_packet:
1962	cs_etm__packet_swap(etm, tidq);
1963
1964	/ Reset last branches after flush the trace /
1965	if (etm->synth_opts.last_branch)
1966	cs_etm__reset_last_branch_rb(tidq);
1967
1968	return err;
1969	}
1970
1971	static int cs_etm__end_block(struct cs_etm_queue *etmq,
1972	struct cs_etm_traceid_queue *tidq)
1973	{
1974	int err;
1975
1976	/*
1977	* It has no new packet coming and 'etmq->packet' contains the stale
1978	* packet which was set at the previous time with packets swapping;
1979	* so skip to generate branch sample to avoid stale packet.
1980	*
1981	* For this case only flush branch stack and generate a last branch
1982	* event for the branches left in the circular buffer at the end of
1983	* the trace.
1984	*/
1985	if (etmq->etm->synth_opts.last_branch &&
1986	etmq->etm->synth_opts.instructions &&
1987	tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1988	u64 addr;
1989
1990	/ Prepare last branches for instruction sample /
1991	cs_etm__copy_last_branch_rb(etmq, tidq);
1992
1993	/*
1994	* Use the address of the end of the last reported execution
1995	* range.
1996	*/
1997	addr = cs_etm__last_executed_instr(packet: tidq->prev_packet);
1998
1999	err = cs_etm__synth_instruction_sample(
2000	etmq, tidq, addr,
2001	period: tidq->period_instructions);
2002	if (err)
2003	return err;
2004
2005	tidq->period_instructions = `0`;
2006	}
2007
2008	return `0`;
2009	}
2010	/*
2011	* cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2012	* if need be.
2013	* Returns: < 0 if error
2014	* = 0 if no more auxtrace_buffer to read
2015	* > 0 if the current buffer isn't empty yet
2016	*/
2017	static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2018	{
2019	int ret;
2020
2021	if (!etmq->buf_len) {
2022	ret = cs_etm__get_trace(etmq);
2023	if (ret <= `0`)
2024	return ret;
2025	/*
2026	* We cannot assume consecutive blocks in the data file
2027	* are contiguous, reset the decoder to force re-sync.
2028	*/
2029	ret = cs_etm_decoder__reset(decoder: etmq->decoder);
2030	if (ret)
2031	return ret;
2032	}
2033
2034	return etmq->buf_len;
2035	}
2036
2037	static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2038	struct cs_etm_packet *packet,
2039	u64 end_addr)
2040	{
2041	/ Initialise to keep compiler happy /
2042	u16 instr16 = `0`;
2043	u32 instr32 = `0`;
2044	u64 addr;
2045
2046	switch (packet->isa) {
2047	case CS_ETM_ISA_T32:
2048	/*
2049	* The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2050	*
2051	* b'15 b'8
2052	* +-----------------+--------+
2053	* \| 1 1 0 1 1 1 1 1 \| imm8 \|
2054	* +-----------------+--------+
2055	*
2056	* According to the specification, it only defines SVC for T32
2057	* with 16 bits instruction and has no definition for 32bits;
2058	* so below only read 2 bytes as instruction size for T32.
2059	*/
2060	addr = end_addr - `2`;
2061	cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2062	(u8 *)&instr16, `0`);
2063	if ((instr16 & `0xFF00`) == `0xDF00`)
2064	return true;
2065
2066	break;
2067	case CS_ETM_ISA_A32:
2068	/*
2069	* The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2070	*
2071	* b'31 b'28 b'27 b'24
2072	* +---------+---------+-------------------------+
2073	* \| !1111 \| 1 1 1 1 \| imm24 \|
2074	* +---------+---------+-------------------------+
2075	*/
2076	addr = end_addr - `4`;
2077	cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2078	(u8 *)&instr32, `0`);
2079	if ((instr32 & `0x0F000000`) == `0x0F000000` &&
2080	(instr32 & `0xF0000000`) != `0xF0000000`)
2081	return true;
2082
2083	break;
2084	case CS_ETM_ISA_A64:
2085	/*
2086	* The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2087	*
2088	* b'31 b'21 b'4 b'0
2089	* +-----------------------+---------+-----------+
2090	* \| 1 1 0 1 0 1 0 0 0 0 0 \| imm16 \| 0 0 0 0 1 \|
2091	* +-----------------------+---------+-----------+
2092	*/
2093	addr = end_addr - `4`;
2094	cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2095	(u8 *)&instr32, `0`);
2096	if ((instr32 & `0xFFE0001F`) == `0xd4000001`)
2097	return true;
2098
2099	break;
2100	case CS_ETM_ISA_UNKNOWN:
2101	default:
2102	break;
2103	}
2104
2105	return false;
2106	}
2107
2108	static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2109	struct cs_etm_traceid_queue *tidq, u64 magic)
2110	{
2111	u8 trace_chan_id = tidq->trace_chan_id;
2112	struct cs_etm_packet *packet = tidq->packet;
2113	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2114
2115	if (magic == __perf_cs_etmv3_magic)
2116	if (packet->exception_number == CS_ETMV3_EXC_SVC)
2117	return true;
2118
2119	/*
2120	* ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2121	* HVC cases; need to check if it's SVC instruction based on
2122	* packet address.
2123	*/
2124	if (magic == __perf_cs_etmv4_magic) {
2125	if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2126	cs_etm__is_svc_instr(etmq, trace_chan_id, packet: prev_packet,
2127	end_addr: prev_packet->end_addr))
2128	return true;
2129	}
2130
2131	return false;
2132	}
2133
2134	static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2135	u64 magic)
2136	{
2137	struct cs_etm_packet *packet = tidq->packet;
2138
2139	if (magic == __perf_cs_etmv3_magic)
2140	if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT \|\|
2141	packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT \|\|
2142	packet->exception_number == CS_ETMV3_EXC_PE_RESET \|\|
2143	packet->exception_number == CS_ETMV3_EXC_IRQ \|\|
2144	packet->exception_number == CS_ETMV3_EXC_FIQ)
2145	return true;
2146
2147	if (magic == __perf_cs_etmv4_magic)
2148	if (packet->exception_number == CS_ETMV4_EXC_RESET \|\|
2149	packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT \|\|
2150	packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR \|\|
2151	packet->exception_number == CS_ETMV4_EXC_INST_DEBUG \|\|
2152	packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG \|\|
2153	packet->exception_number == CS_ETMV4_EXC_IRQ \|\|
2154	packet->exception_number == CS_ETMV4_EXC_FIQ)
2155	return true;
2156
2157	return false;
2158	}
2159
2160	static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2161	struct cs_etm_traceid_queue *tidq,
2162	u64 magic)
2163	{
2164	u8 trace_chan_id = tidq->trace_chan_id;
2165	struct cs_etm_packet *packet = tidq->packet;
2166	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2167
2168	if (magic == __perf_cs_etmv3_magic)
2169	if (packet->exception_number == CS_ETMV3_EXC_SMC \|\|
2170	packet->exception_number == CS_ETMV3_EXC_HYP \|\|
2171	packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE \|\|
2172	packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR \|\|
2173	packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT \|\|
2174	packet->exception_number == CS_ETMV3_EXC_DATA_FAULT \|\|
2175	packet->exception_number == CS_ETMV3_EXC_GENERIC)
2176	return true;
2177
2178	if (magic == __perf_cs_etmv4_magic) {
2179	if (packet->exception_number == CS_ETMV4_EXC_TRAP \|\|
2180	packet->exception_number == CS_ETMV4_EXC_ALIGNMENT \|\|
2181	packet->exception_number == CS_ETMV4_EXC_INST_FAULT \|\|
2182	packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2183	return true;
2184
2185	/*
2186	* For CS_ETMV4_EXC_CALL, except SVC other instructions
2187	* (SMC, HVC) are taken as sync exceptions.
2188	*/
2189	if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2190	!cs_etm__is_svc_instr(etmq, trace_chan_id, packet: prev_packet,
2191	end_addr: prev_packet->end_addr))
2192	return true;
2193
2194	/*
2195	* ETMv4 has 5 bits for exception number; if the numbers
2196	* are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2197	* they are implementation defined exceptions.
2198	*
2199	* For this case, simply take it as sync exception.
2200	*/
2201	if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2202	packet->exception_number <= CS_ETMV4_EXC_END)
2203	return true;
2204	}
2205
2206	return false;
2207	}
2208
2209	static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2210	struct cs_etm_traceid_queue *tidq)
2211	{
2212	struct cs_etm_packet *packet = tidq->packet;
2213	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2214	u8 trace_chan_id = tidq->trace_chan_id;
2215	u64 magic;
2216	int ret;
2217
2218	switch (packet->sample_type) {
2219	case CS_ETM_RANGE:
2220	/*
2221	* Immediate branch instruction without neither link nor
2222	* return flag, it's normal branch instruction within
2223	* the function.
2224	*/
2225	if (packet->last_instr_type == OCSD_INSTR_BR &&
2226	packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2227	packet->flags = PERF_IP_FLAG_BRANCH;
2228
2229	if (packet->last_instr_cond)
2230	packet->flags \|= PERF_IP_FLAG_CONDITIONAL;
2231	}
2232
2233	/*
2234	* Immediate branch instruction with link (e.g. BL), this is
2235	* branch instruction for function call.
2236	*/
2237	if (packet->last_instr_type == OCSD_INSTR_BR &&
2238	packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2239	packet->flags = PERF_IP_FLAG_BRANCH \|
2240	PERF_IP_FLAG_CALL;
2241
2242	/*
2243	* Indirect branch instruction with link (e.g. BLR), this is
2244	* branch instruction for function call.
2245	*/
2246	if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2247	packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2248	packet->flags = PERF_IP_FLAG_BRANCH \|
2249	PERF_IP_FLAG_CALL;
2250
2251	/*
2252	* Indirect branch instruction with subtype of
2253	* OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2254	* function return for A32/T32.
2255	*/
2256	if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2257	packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2258	packet->flags = PERF_IP_FLAG_BRANCH \|
2259	PERF_IP_FLAG_RETURN;
2260
2261	/*
2262	* Indirect branch instruction without link (e.g. BR), usually
2263	* this is used for function return, especially for functions
2264	* within dynamic link lib.
2265	*/
2266	if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2267	packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2268	packet->flags = PERF_IP_FLAG_BRANCH \|
2269	PERF_IP_FLAG_RETURN;
2270
2271	/ Return instruction for function return. /
2272	if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2273	packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2274	packet->flags = PERF_IP_FLAG_BRANCH \|
2275	PERF_IP_FLAG_RETURN;
2276
2277	/*
2278	* Decoder might insert a discontinuity in the middle of
2279	* instruction packets, fixup prev_packet with flag
2280	* PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2281	*/
2282	if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2283	prev_packet->flags \|= PERF_IP_FLAG_BRANCH \|
2284	PERF_IP_FLAG_TRACE_BEGIN;
2285
2286	/*
2287	* If the previous packet is an exception return packet
2288	* and the return address just follows SVC instruction,
2289	* it needs to calibrate the previous packet sample flags
2290	* as PERF_IP_FLAG_SYSCALLRET.
2291	*/
2292	if (prev_packet->flags == (PERF_IP_FLAG_BRANCH \|
2293	PERF_IP_FLAG_RETURN \|
2294	PERF_IP_FLAG_INTERRUPT) &&
2295	cs_etm__is_svc_instr(etmq, trace_chan_id,
2296	packet, end_addr: packet->start_addr))
2297	prev_packet->flags = PERF_IP_FLAG_BRANCH \|
2298	PERF_IP_FLAG_RETURN \|
2299	PERF_IP_FLAG_SYSCALLRET;
2300	break;
2301	case CS_ETM_DISCONTINUITY:
2302	/*
2303	* The trace is discontinuous, if the previous packet is
2304	* instruction packet, set flag PERF_IP_FLAG_TRACE_END
2305	* for previous packet.
2306	*/
2307	if (prev_packet->sample_type == CS_ETM_RANGE)
2308	prev_packet->flags \|= PERF_IP_FLAG_BRANCH \|
2309	PERF_IP_FLAG_TRACE_END;
2310	break;
2311	case CS_ETM_EXCEPTION:
2312	ret = cs_etm__get_magic(etmq, trace_chan_id: packet->trace_chan_id, magic: &magic);
2313	if (ret)
2314	return ret;
2315
2316	/ The exception is for system call. /
2317	if (cs_etm__is_syscall(etmq, tidq, magic))
2318	packet->flags = PERF_IP_FLAG_BRANCH \|
2319	PERF_IP_FLAG_CALL \|
2320	PERF_IP_FLAG_SYSCALLRET;
2321	/*
2322	* The exceptions are triggered by external signals from bus,
2323	* interrupt controller, debug module, PE reset or halt.
2324	*/
2325	else if (cs_etm__is_async_exception(tidq, magic))
2326	packet->flags = PERF_IP_FLAG_BRANCH \|
2327	PERF_IP_FLAG_CALL \|
2328	PERF_IP_FLAG_ASYNC \|
2329	PERF_IP_FLAG_INTERRUPT;
2330	/*
2331	* Otherwise, exception is caused by trap, instruction &
2332	* data fault, or alignment errors.
2333	*/
2334	else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2335	packet->flags = PERF_IP_FLAG_BRANCH \|
2336	PERF_IP_FLAG_CALL \|
2337	PERF_IP_FLAG_INTERRUPT;
2338
2339	/*
2340	* When the exception packet is inserted, since exception
2341	* packet is not used standalone for generating samples
2342	* and it's affiliation to the previous instruction range
2343	* packet; so set previous range packet flags to tell perf
2344	* it is an exception taken branch.
2345	*/
2346	if (prev_packet->sample_type == CS_ETM_RANGE)
2347	prev_packet->flags = packet->flags;
2348	break;
2349	case CS_ETM_EXCEPTION_RET:
2350	/*
2351	* When the exception return packet is inserted, since
2352	* exception return packet is not used standalone for
2353	* generating samples and it's affiliation to the previous
2354	* instruction range packet; so set previous range packet
2355	* flags to tell perf it is an exception return branch.
2356	*
2357	* The exception return can be for either system call or
2358	* other exception types; unfortunately the packet doesn't
2359	* contain exception type related info so we cannot decide
2360	* the exception type purely based on exception return packet.
2361	* If we record the exception number from exception packet and
2362	* reuse it for exception return packet, this is not reliable
2363	* due the trace can be discontinuity or the interrupt can
2364	* be nested, thus the recorded exception number cannot be
2365	* used for exception return packet for these two cases.
2366	*
2367	* For exception return packet, we only need to distinguish the
2368	* packet is for system call or for other types. Thus the
2369	* decision can be deferred when receive the next packet which
2370	* contains the return address, based on the return address we
2371	* can read out the previous instruction and check if it's a
2372	* system call instruction and then calibrate the sample flag
2373	* as needed.
2374	*/
2375	if (prev_packet->sample_type == CS_ETM_RANGE)
2376	prev_packet->flags = PERF_IP_FLAG_BRANCH \|
2377	PERF_IP_FLAG_RETURN \|
2378	PERF_IP_FLAG_INTERRUPT;
2379	break;
2380	case CS_ETM_EMPTY:
2381	default:
2382	break;
2383	}
2384
2385	return `0`;
2386	}
2387
2388	static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2389	{
2390	int ret = `0`;
2391	size_t processed = `0`;
2392
2393	/*
2394	* Packets are decoded and added to the decoder's packet queue
2395	* until the decoder packet processing callback has requested that
2396	* processing stops or there is nothing left in the buffer. Normal
2397	* operations that stop processing are a timestamp packet or a full
2398	* decoder buffer queue.
2399	*/
2400	ret = cs_etm_decoder__process_data_block(decoder: etmq->decoder,
2401	indx: etmq->offset,
2402	buf: &etmq->buf[etmq->buf_used],
2403	len: etmq->buf_len,
2404	consumed: &processed);
2405	if (ret)
2406	goto out;
2407
2408	etmq->offset += processed;
2409	etmq->buf_used += processed;
2410	etmq->buf_len -= processed;
2411
2412	out:
2413	return ret;
2414	}
2415
2416	static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2417	struct cs_etm_traceid_queue *tidq)
2418	{
2419	int ret;
2420	struct cs_etm_packet_queue *packet_queue;
2421
2422	packet_queue = &tidq->packet_queue;
2423
2424	/ Process each packet in this chunk /
2425	while (`1`) {
2426	ret = cs_etm_decoder__get_packet(packet_queue,
2427	packet: tidq->packet);
2428	if (ret <= `0`)
2429	/*
2430	* Stop processing this chunk on
2431	* end of data or error
2432	*/
2433	break;
2434
2435	/*
2436	* Since packet addresses are swapped in packet
2437	* handling within below switch() statements,
2438	* thus setting sample flags must be called
2439	* prior to switch() statement to use address
2440	* information before packets swapping.
2441	*/
2442	ret = cs_etm__set_sample_flags(etmq, tidq);
2443	if (ret < `0`)
2444	break;
2445
2446	switch (tidq->packet->sample_type) {
2447	case CS_ETM_RANGE:
2448	/*
2449	* If the packet contains an instruction
2450	* range, generate instruction sequence
2451	* events.
2452	*/
2453	cs_etm__sample(etmq, tidq);
2454	break;
2455	case CS_ETM_EXCEPTION:
2456	case CS_ETM_EXCEPTION_RET:
2457	/*
2458	* If the exception packet is coming,
2459	* make sure the previous instruction
2460	* range packet to be handled properly.
2461	*/
2462	cs_etm__exception(tidq);
2463	break;
2464	case CS_ETM_DISCONTINUITY:
2465	/*
2466	* Discontinuity in trace, flush
2467	* previous branch stack
2468	*/
2469	cs_etm__flush(etmq, tidq);
2470	break;
2471	case CS_ETM_EMPTY:
2472	/*
2473	* Should not receive empty packet,
2474	* report error.
2475	*/
2476	pr_err("CS ETM Trace: empty packet\n");
2477	return -EINVAL;
2478	default:
2479	break;
2480	}
2481	}
2482
2483	return ret;
2484	}
2485
2486	static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2487	{
2488	int idx;
2489	struct int_node *inode;
2490	struct cs_etm_traceid_queue *tidq;
2491	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2492
2493	intlist__for_each_entry(inode, traceid_queues_list) {
2494	idx = (int)(intptr_t)inode->priv;
2495	tidq = etmq->traceid_queues[idx];
2496
2497	/ Ignore return value /
2498	cs_etm__process_traceid_queue(etmq, tidq);
2499	}
2500	}
2501
2502	static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2503	{
2504	int err = `0`;
2505	struct cs_etm_traceid_queue *tidq;
2506
2507	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2508	if (!tidq)
2509	return -EINVAL;
2510
2511	/ Go through each buffer in the queue and decode them one by one /
2512	while (`1`) {
2513	err = cs_etm__get_data_block(etmq);
2514	if (err <= `0`)
2515	return err;
2516
2517	/ Run trace decoder until buffer consumed or end of trace /
2518	do {
2519	err = cs_etm__decode_data_block(etmq);
2520	if (err)
2521	return err;
2522
2523	/*
2524	* Process each packet in this chunk, nothing to do if
2525	* an error occurs other than hoping the next one will
2526	* be better.
2527	*/
2528	err = cs_etm__process_traceid_queue(etmq, tidq);
2529
2530	} while (etmq->buf_len);
2531
2532	if (err == `0`)
2533	/ Flush any remaining branch stack entries /
2534	err = cs_etm__end_block(etmq, tidq);
2535	}
2536
2537	return err;
2538	}
2539
2540	static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2541	{
2542	int idx, err = `0`;
2543	struct cs_etm_traceid_queue *tidq;
2544	struct int_node *inode;
2545
2546	/ Go through each buffer in the queue and decode them one by one /
2547	while (`1`) {
2548	err = cs_etm__get_data_block(etmq);
2549	if (err <= `0`)
2550	return err;
2551
2552	/ Run trace decoder until buffer consumed or end of trace /
2553	do {
2554	err = cs_etm__decode_data_block(etmq);
2555	if (err)
2556	return err;
2557
2558	/*
2559	* cs_etm__run_per_thread_timeless_decoder() runs on a
2560	* single traceID queue because each TID has a separate
2561	* buffer. But here in per-cpu mode we need to iterate
2562	* over each channel instead.
2563	*/
2564	intlist__for_each_entry(inode,
2565	etmq->traceid_queues_list) {
2566	idx = (int)(intptr_t)inode->priv;
2567	tidq = etmq->traceid_queues[idx];
2568	cs_etm__process_traceid_queue(etmq, tidq);
2569	}
2570	} while (etmq->buf_len);
2571
2572	intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2573	idx = (int)(intptr_t)inode->priv;
2574	tidq = etmq->traceid_queues[idx];
2575	/ Flush any remaining branch stack entries /
2576	err = cs_etm__end_block(etmq, tidq);
2577	if (err)
2578	return err;
2579	}
2580	}
2581
2582	return err;
2583	}
2584
2585	static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2586	pid_t tid)
2587	{
2588	unsigned int i;
2589	struct auxtrace_queues *queues = &etm->queues;
2590
2591	for (i = `0`; i < queues->nr_queues; i++) {
2592	struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2593	struct cs_etm_queue *etmq = queue->priv;
2594	struct cs_etm_traceid_queue *tidq;
2595
2596	if (!etmq)
2597	continue;
2598
2599	if (etm->per_thread_decoding) {
2600	tidq = cs_etm__etmq_get_traceid_queue(
2601	etmq, CS_ETM_PER_THREAD_TRACEID);
2602
2603	if (!tidq)
2604	continue;
2605
2606	if (tid == -`1` \|\| thread__tid(thread: tidq->thread) == tid)
2607	cs_etm__run_per_thread_timeless_decoder(etmq);
2608	} else
2609	cs_etm__run_per_cpu_timeless_decoder(etmq);
2610	}
2611
2612	return `0`;
2613	}
2614
2615	static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2616	{
2617	int ret = `0`;
2618	unsigned int cs_queue_nr, queue_nr, i;
2619	u8 trace_chan_id;
2620	u64 cs_timestamp;
2621	struct auxtrace_queue *queue;
2622	struct cs_etm_queue *etmq;
2623	struct cs_etm_traceid_queue *tidq;
2624
2625	/*
2626	* Pre-populate the heap with one entry from each queue so that we can
2627	* start processing in time order across all queues.
2628	*/
2629	for (i = `0`; i < etm->queues.nr_queues; i++) {
2630	etmq = etm->queues.queue_array[i].priv;
2631	if (!etmq)
2632	continue;
2633
2634	ret = cs_etm__queue_first_cs_timestamp(etm, etmq, queue_nr: i);
2635	if (ret)
2636	return ret;
2637	}
2638
2639	while (`1`) {
2640	if (!etm->heap.heap_cnt)
2641	break;
2642
2643	/ Take the entry at the top of the min heap /
2644	cs_queue_nr = etm->heap.heap_array[`0`].queue_nr;
2645	queue_nr = TO_QUEUE_NR(cs_queue_nr);
2646	trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2647	queue = &etm->queues.queue_array[queue_nr];
2648	etmq = queue->priv;
2649
2650	/*
2651	* Remove the top entry from the heap since we are about
2652	* to process it.
2653	*/
2654	auxtrace_heap__pop(heap: &etm->heap);
2655
2656	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2657	if (!tidq) {
2658	/*
2659	* No traceID queue has been allocated for this traceID,
2660	* which means something somewhere went very wrong. No
2661	* other choice than simply exit.
2662	*/
2663	ret = -EINVAL;
2664	goto out;
2665	}
2666
2667	/*
2668	* Packets associated with this timestamp are already in
2669	* the etmq's traceID queue, so process them.
2670	*/
2671	ret = cs_etm__process_traceid_queue(etmq, tidq);
2672	if (ret < `0`)
2673	goto out;
2674
2675	/*
2676	* Packets for this timestamp have been processed, time to
2677	* move on to the next timestamp, fetching a new auxtrace_buffer
2678	* if need be.
2679	*/
2680	refetch:
2681	ret = cs_etm__get_data_block(etmq);
2682	if (ret < `0`)
2683	goto out;
2684
2685	/*
2686	* No more auxtrace_buffers to process in this etmq, simply
2687	* move on to another entry in the auxtrace_heap.
2688	*/
2689	if (!ret)
2690	continue;
2691
2692	ret = cs_etm__decode_data_block(etmq);
2693	if (ret)
2694	goto out;
2695
2696	cs_timestamp = cs_etm__etmq_get_timestamp(etmq, trace_chan_id: &trace_chan_id);
2697
2698	if (!cs_timestamp) {
2699	/*
2700	* Function cs_etm__decode_data_block() returns when
2701	* there is no more traces to decode in the current
2702	* auxtrace_buffer OR when a timestamp has been
2703	* encountered on any of the traceID queues. Since we
2704	* did not get a timestamp, there is no more traces to
2705	* process in this auxtrace_buffer. As such empty and
2706	* flush all traceID queues.
2707	*/
2708	cs_etm__clear_all_traceid_queues(etmq);
2709
2710	/ Fetch another auxtrace_buffer for this etmq /
2711	goto refetch;
2712	}
2713
2714	/*
2715	* Add to the min heap the timestamp for packets that have
2716	* just been decoded. They will be processed and synthesized
2717	* during the next call to cs_etm__process_traceid_queue() for
2718	* this queue/traceID.
2719	*/
2720	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2721	ret = auxtrace_heap__add(heap: &etm->heap, queue_nr: cs_queue_nr, ordinal: cs_timestamp);
2722	}
2723
2724	for (i = `0`; i < etm->queues.nr_queues; i++) {
2725	struct int_node *inode;
2726
2727	etmq = etm->queues.queue_array[i].priv;
2728	if (!etmq)
2729	continue;
2730
2731	intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2732	int idx = (int)(intptr_t)inode->priv;
2733
2734	/ Flush any remaining branch stack entries /
2735	tidq = etmq->traceid_queues[idx];
2736	ret = cs_etm__end_block(etmq, tidq);
2737	if (ret)
2738	return ret;
2739	}
2740	}
2741	out:
2742	return ret;
2743	}
2744
2745	static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2746	union perf_event *event)
2747	{
2748	struct thread *th;
2749
2750	if (etm->timeless_decoding)
2751	return `0`;
2752
2753	/*
2754	* Add the tid/pid to the log so that we can get a match when we get a
2755	* contextID from the decoder. Only track for the host: only kernel
2756	* trace is supported for guests which wouldn't need pids so this should
2757	* be fine.
2758	*/
2759	th = machine__findnew_thread(machine: &etm->session->machines.host,
2760	pid: event->itrace_start.pid,
2761	tid: event->itrace_start.tid);
2762	if (!th)
2763	return -ENOMEM;
2764
2765	thread__put(thread: th);
2766
2767	return `0`;
2768	}
2769
2770	static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2771	union perf_event *event)
2772	{
2773	struct thread *th;
2774	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2775
2776	/*
2777	* Context switch in per-thread mode are irrelevant since perf
2778	* will start/stop tracing as the process is scheduled.
2779	*/
2780	if (etm->timeless_decoding)
2781	return `0`;
2782
2783	/*
2784	* SWITCH_IN events carry the next process to be switched out while
2785	* SWITCH_OUT events carry the process to be switched in. As such
2786	* we don't care about IN events.
2787	*/
2788	if (!out)
2789	return `0`;
2790
2791	/*
2792	* Add the tid/pid to the log so that we can get a match when we get a
2793	* contextID from the decoder. Only track for the host: only kernel
2794	* trace is supported for guests which wouldn't need pids so this should
2795	* be fine.
2796	*/
2797	th = machine__findnew_thread(machine: &etm->session->machines.host,
2798	pid: event->context_switch.next_prev_pid,
2799	tid: event->context_switch.next_prev_tid);
2800	if (!th)
2801	return -ENOMEM;
2802
2803	thread__put(thread: th);
2804
2805	return `0`;
2806	}
2807
2808	static int cs_etm__process_event(struct perf_session *session,
2809	union perf_event *event,
2810	struct perf_sample *sample,
2811	const struct perf_tool *tool)
2812	{
2813	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2814	struct cs_etm_auxtrace,
2815	auxtrace);
2816
2817	if (dump_trace)
2818	return `0`;
2819
2820	if (!tool->ordered_events) {
2821	pr_err("CoreSight ETM Trace requires ordered events\n");
2822	return -EINVAL;
2823	}
2824
2825	switch (event->header.type) {
2826	case PERF_RECORD_EXIT:
2827	/*
2828	* Don't need to wait for cs_etm__flush_events() in per-thread mode to
2829	* start the decode because we know there will be no more trace from
2830	* this thread. All this does is emit samples earlier than waiting for
2831	* the flush in other modes, but with timestamps it makes sense to wait
2832	* for flush so that events from different threads are interleaved
2833	* properly.
2834	*/
2835	if (etm->per_thread_decoding && etm->timeless_decoding)
2836	return cs_etm__process_timeless_queues(etm,
2837	tid: event->fork.tid);
2838	break;
2839
2840	case PERF_RECORD_ITRACE_START:
2841	return cs_etm__process_itrace_start(etm, event);
2842
2843	case PERF_RECORD_SWITCH_CPU_WIDE:
2844	return cs_etm__process_switch_cpu_wide(etm, event);
2845
2846	case PERF_RECORD_AUX:
2847	/*
2848	* Record the latest kernel timestamp available in the header
2849	* for samples so that synthesised samples occur from this point
2850	* onwards.
2851	*/
2852	if (sample->time && (sample->time != (u64)-`1`))
2853	etm->latest_kernel_timestamp = sample->time;
2854	break;
2855
2856	default:
2857	break;
2858	}
2859
2860	return `0`;
2861	}
2862
2863	static void dump_queued_data(struct cs_etm_auxtrace *etm,
2864	struct perf_record_auxtrace *event)
2865	{
2866	struct auxtrace_buffer *buf;
2867	unsigned int i;
2868	/*
2869	* Find all buffers with same reference in the queues and dump them.
2870	* This is because the queues can contain multiple entries of the same
2871	* buffer that were split on aux records.
2872	*/
2873	for (i = `0`; i < etm->queues.nr_queues; ++i)
2874	list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2875	if (buf->reference == event->reference)
2876	cs_etm__dump_event(etmq: etm->queues.queue_array[i].priv, buffer: buf);
2877	}
2878
2879	static int cs_etm__process_auxtrace_event(struct perf_session *session,
2880	union perf_event *event,
2881	const struct perf_tool *tool __maybe_unused)
2882	{
2883	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2884	struct cs_etm_auxtrace,
2885	auxtrace);
2886	if (!etm->data_queued) {
2887	struct auxtrace_buffer *buffer;
2888	off_t data_offset;
2889	int fd = perf_data__fd(data: session->data);
2890	bool is_pipe = perf_data__is_pipe(data: session->data);
2891	int err;
2892	int idx = event->auxtrace.idx;
2893
2894	if (is_pipe)
2895	data_offset = `0`;
2896	else {
2897	data_offset = lseek(fd, `0`, SEEK_CUR);
2898	if (data_offset == -`1`)
2899	return -errno;
2900	}
2901
2902	err = auxtrace_queues__add_event(queues: &etm->queues, session,
2903	event, data_offset, buffer_ptr: &buffer);
2904	if (err)
2905	return err;
2906
2907	if (dump_trace)
2908	if (auxtrace_buffer__get_data(buffer, fd)) {
2909	cs_etm__dump_event(etmq: etm->queues.queue_array[idx].priv, buffer);
2910	auxtrace_buffer__put_data(buffer);
2911	}
2912	} else if (dump_trace)
2913	dump_queued_data(etm, event: &event->auxtrace);
2914
2915	return `0`;
2916	}
2917
2918	static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2919	{
2920	struct evsel *evsel;
2921	struct evlist *evlist = etm->session->evlist;
2922
2923	/ Override timeless mode with user input from --itrace=Z /
2924	if (etm->synth_opts.timeless_decoding) {
2925	etm->timeless_decoding = true;
2926	return `0`;
2927	}
2928
2929	/*
2930	* Find the cs_etm evsel and look at what its timestamp setting was
2931	*/
2932	evlist__for_each_entry(evlist, evsel)
2933	if (cs_etm__evsel_is_auxtrace(session: etm->session, evsel)) {
2934	etm->timeless_decoding =
2935	!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2936	return `0`;
2937	}
2938
2939	pr_err("CS ETM: Couldn't find ETM evsel\n");
2940	return -EINVAL;
2941	}
2942
2943	/*
2944	* Read a single cpu parameter block from the auxtrace_info priv block.
2945	*
2946	* For version 1 there is a per cpu nr_params entry. If we are handling
2947	* version 1 file, then there may be less, the same, or more params
2948	* indicated by this value than the compile time number we understand.
2949	*
2950	* For a version 0 info block, there are a fixed number, and we need to
2951	* fill out the nr_param value in the metadata we create.
2952	*/
2953	static u64 cs_etm__create_meta_blk(u64 buff_in, int *buff_in_offset,
2954	int out_blk_size, int nr_params_v0)
2955	{
2956	u64 *metadata = NULL;
2957	int hdr_version;
2958	int nr_in_params, nr_out_params, nr_cmn_params;
2959	int i, k;
2960
2961	metadata = zalloc(sizeof(metadata) out_blk_size);
2962	if (!metadata)
2963	return NULL;
2964
2965	/ read block current index & version /
2966	i = *buff_in_offset;
2967	hdr_version = buff_in[CS_HEADER_VERSION];
2968
2969	if (!hdr_version) {
2970	/ read version 0 info block into a version 1 metadata block /
2971	nr_in_params = nr_params_v0;
2972	metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2973	metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2974	metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2975	/ remaining block params at offset +1 from source /
2976	for (k = CS_ETM_COMMON_BLK_MAX_V1 - `1`; k < nr_in_params; k++)
2977	metadata[k + `1`] = buff_in[i + k];
2978	/ version 0 has 2 common params /
2979	nr_cmn_params = `2`;
2980	} else {
2981	/ read version 1 info block - input and output nr_params may differ /
2982	/ version 1 has 3 common params /
2983	nr_cmn_params = `3`;
2984	nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2985
2986	/ if input has more params than output - skip excess /
2987	nr_out_params = nr_in_params + nr_cmn_params;
2988	if (nr_out_params > out_blk_size)
2989	nr_out_params = out_blk_size;
2990
2991	for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2992	metadata[k] = buff_in[i + k];
2993
2994	/ record the actual nr params we copied /
2995	metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2996	}
2997
2998	/ adjust in offset by number of in params used /
2999	i += nr_in_params + nr_cmn_params;
3000	*buff_in_offset = i;
3001	return metadata;
3002	}
3003
3004	/**
3005	* Puts a fragment of an auxtrace buffer into the auxtrace queues based
3006	* on the bounds of aux_event, if it matches with the buffer that's at
3007	* file_offset.
3008	*
3009	* Normally, whole auxtrace buffers would be added to the queue. But we
3010	* want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3011	* is reset across each buffer, so splitting the buffers up in advance has
3012	* the same effect.
3013	*/
3014	static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3015	struct perf_record_aux aux_event, struct* perf_sample *sample)
3016	{
3017	int err;
3018	char buf[PERF_SAMPLE_MAX_SIZE];
3019	union perf_event *auxtrace_event_union;
3020	struct perf_record_auxtrace *auxtrace_event;
3021	union perf_event auxtrace_fragment;
3022	__u64 aux_offset, aux_size;
3023	enum cs_etm_format format;
3024
3025	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3026	struct cs_etm_auxtrace,
3027	auxtrace);
3028
3029	/*
3030	* There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3031	* from looping through the auxtrace index.
3032	*/
3033	err = perf_session__peek_event(session, file_offset, buf,
3034	PERF_SAMPLE_MAX_SIZE, event_ptr: &auxtrace_event_union, NULL);
3035	if (err)
3036	return err;
3037	auxtrace_event = &auxtrace_event_union->auxtrace;
3038	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3039	return -EINVAL;
3040
3041	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) \|\|
3042	auxtrace_event->header.size != sz) {
3043	return -EINVAL;
3044	}
3045
3046	/*
3047	* In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3048	* auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3049	* CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3050	* So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3051	* Return 'not found' if mismatch.
3052	*/
3053	if (auxtrace_event->cpu == (__u32) -`1`) {
3054	etm->per_thread_decoding = true;
3055	if (auxtrace_event->tid != sample->tid)
3056	return `1`;
3057	} else if (auxtrace_event->cpu != sample->cpu) {
3058	if (etm->per_thread_decoding) {
3059	/*
3060	* Found a per-cpu buffer after a per-thread one was
3061	* already found
3062	*/
3063	pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3064	return -EINVAL;
3065	}
3066	return `1`;
3067	}
3068
3069	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3070	/*
3071	* Clamp size in snapshot mode. The buffer size is clamped in
3072	* __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3073	* the buffer size.
3074	*/
3075	aux_size = min(aux_event->aux_size, auxtrace_event->size);
3076
3077	/*
3078	* In this mode, the head also points to the end of the buffer so aux_offset
3079	* needs to have the size subtracted so it points to the beginning as in normal mode
3080	*/
3081	aux_offset = aux_event->aux_offset - aux_size;
3082	} else {
3083	aux_size = aux_event->aux_size;
3084	aux_offset = aux_event->aux_offset;
3085	}
3086
3087	if (aux_offset >= auxtrace_event->offset &&
3088	aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3089	struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3090
3091	/*
3092	* If this AUX event was inside this buffer somewhere, create a new auxtrace event
3093	* based on the sizes of the aux event, and queue that fragment.
3094	*/
3095	auxtrace_fragment.auxtrace = *auxtrace_event;
3096	auxtrace_fragment.auxtrace.size = aux_size;
3097	auxtrace_fragment.auxtrace.offset = aux_offset;
3098	file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3099
3100	pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3101	" tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3102	err = auxtrace_queues__add_event(queues: &etm->queues, session, event: &auxtrace_fragment,
3103	data_offset: file_offset, NULL);
3104	if (err)
3105	return err;
3106
3107	format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3108	UNFORMATTED : FORMATTED;
3109	if (etmq->format != UNSET && format != etmq->format) {
3110	pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3111	return -EINVAL;
3112	}
3113	etmq->format = format;
3114	return `0`;
3115	}
3116
3117	/ Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' /
3118	return `1`;
3119	}
3120
3121	static int cs_etm__process_aux_hw_id_cb(struct perf_session session, union* perf_event *event,
3122	u64 offset __maybe_unused, void *data __maybe_unused)
3123	{
3124	/ look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up /
3125	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3126	((int* )data)++; /* increment found count /
3127	return cs_etm__process_aux_output_hw_id(session, event);
3128	}
3129	return `0`;
3130	}
3131
3132	static int cs_etm__queue_aux_records_cb(struct perf_session session, union* perf_event *event,
3133	u64 offset __maybe_unused, void *data __maybe_unused)
3134	{
3135	struct perf_sample sample;
3136	int ret;
3137	struct auxtrace_index_entry *ent;
3138	struct auxtrace_index *auxtrace_index;
3139	struct evsel *evsel;
3140	size_t i;
3141
3142	/ Don't care about any other events, we're only queuing buffers for AUX events /
3143	if (event->header.type != PERF_RECORD_AUX)
3144	return `0`;
3145
3146	if (event->header.size < sizeof(struct perf_record_aux))
3147	return -EINVAL;
3148
3149	/ Truncated Aux records can have 0 size and shouldn't result in anything being queued. /
3150	if (!event->aux.aux_size)
3151	return `0`;
3152
3153	/*
3154	* Parse the sample, we need the sample_id_all data that comes after the event so that the
3155	* CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3156	*/
3157	evsel = evlist__event2evsel(evlist: session->evlist, event);
3158	if (!evsel)
3159	return -EINVAL;
3160	perf_sample__init(&sample, /all=/false);
3161	ret = evsel__parse_sample(evsel, event, sample: &sample);
3162	if (ret)
3163	goto out;
3164
3165	/*
3166	* Loop through the auxtrace index to find the buffer that matches up with this aux event.
3167	*/
3168	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3169	for (i = `0`; i < auxtrace_index->nr; i++) {
3170	ent = &auxtrace_index->entries[i];
3171	ret = cs_etm__queue_aux_fragment(session, file_offset: ent->file_offset,
3172	sz: ent->sz, aux_event: &event->aux, sample: &sample);
3173	/*
3174	* Stop search on error or successful values. Continue search on
3175	* 1 ('not found')
3176	*/
3177	if (ret != `1`)
3178	goto out;
3179	}
3180	}
3181
3182	/*
3183	* Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3184	* don't exit with an error because it will still be possible to decode other aux records.
3185	*/
3186	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3187	" tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3188	ret = `0`;
3189	out:
3190	perf_sample__exit(&sample);
3191	return ret;
3192	}
3193
3194	static int cs_etm__queue_aux_records(struct perf_session *session)
3195	{
3196	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3197	struct auxtrace_index, list);
3198	if (index && index->nr > `0`)
3199	return perf_session__peek_events(session, offset: session->header.data_offset,
3200	size: session->header.data_size,
3201	cb: cs_etm__queue_aux_records_cb, NULL);
3202
3203	/*
3204	* We would get here if there are no entries in the index (either no auxtrace
3205	* buffers or no index at all). Fail silently as there is the possibility of
3206	* queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3207	* false.
3208	*
3209	* In that scenario, buffers will not be split by AUX records.
3210	*/
3211	return `0`;
3212	}
3213
3214	#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3215	(CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3216
3217	/*
3218	* Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3219	* timestamps).
3220	*/
3221	static bool cs_etm__has_virtual_ts(u64 *metadata, int* num_cpu)
3222	{
3223	int j;
3224
3225	for (j = `0`; j < num_cpu; j++) {
3226	switch (metadata[j][CS_ETM_MAGIC]) {
3227	case __perf_cs_etmv4_magic:
3228	if (HAS_PARAM(j, ETMV4, TS_SOURCE) \|\| metadata[j][CS_ETMV4_TS_SOURCE] != `1`)
3229	return false;
3230	break;
3231	case __perf_cs_ete_magic:
3232	if (HAS_PARAM(j, ETE, TS_SOURCE) \|\| metadata[j][CS_ETE_TS_SOURCE] != `1`)
3233	return false;
3234	break;
3235	default:
3236	/ Unknown / unsupported magic number. /
3237	return false;
3238	}
3239	}
3240	return true;
3241	}
3242
3243	/ map trace ids to correct metadata block, from information in metadata /
3244	static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace etm, int* num_cpu,
3245	u64 **metadata)
3246	{
3247	u64 cs_etm_magic;
3248	u8 trace_chan_id;
3249	int i, err;
3250
3251	for (i = `0`; i < num_cpu; i++) {
3252	cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3253	switch (cs_etm_magic) {
3254	case __perf_cs_etmv3_magic:
3255	metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3256	trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3257	break;
3258	case __perf_cs_etmv4_magic:
3259	case __perf_cs_ete_magic:
3260	metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3261	trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3262	break;
3263	default:
3264	/ unknown magic number /
3265	return -EINVAL;
3266	}
3267	err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_metadata: metadata[i]);
3268	if (err)
3269	return err;
3270	}
3271	return `0`;
3272	}
3273
3274	/*
3275	* Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3276	* (formatted or not) packets to create the decoders.
3277	*/
3278	static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3279	{
3280	struct cs_etm_decoder_params d_params;
3281	struct cs_etm_trace_params *t_params;
3282	int decoders = intlist__nr_entries(ilist: etmq->traceid_list);
3283
3284	if (decoders == `0`)
3285	return `0`;
3286
3287	/*
3288	* Each queue can only contain data from one CPU when unformatted, so only one decoder is
3289	* needed.
3290	*/
3291	if (etmq->format == UNFORMATTED)
3292	assert(decoders == `1`);
3293
3294	/ Use metadata to fill in trace parameters for trace decoder /
3295	t_params = zalloc(sizeof(t_params) decoders);
3296
3297	if (!t_params)
3298	goto out_free;
3299
3300	if (cs_etm__init_trace_params(t_params, etmq))
3301	goto out_free;
3302
3303	/ Set decoder parameters to decode trace packets /
3304	if (cs_etm__init_decoder_params(d_params: &d_params, etmq,
3305	mode: dump_trace ? CS_ETM_OPERATION_PRINT :
3306	CS_ETM_OPERATION_DECODE))
3307	goto out_free;
3308
3309	etmq->decoder = cs_etm_decoder__new(num_cpu: decoders, d_params: &d_params,
3310	t_params);
3311
3312	if (!etmq->decoder)
3313	goto out_free;
3314
3315	/*
3316	* Register a function to handle all memory accesses required by
3317	* the trace decoder library.
3318	*/
3319	if (cs_etm_decoder__add_mem_access_cb(decoder: etmq->decoder,
3320	start: `0x0L`, end: ((u64) -`1L`),
3321	cb_func: cs_etm__mem_access))
3322	goto out_free_decoder;
3323
3324	zfree(&t_params);
3325	return `0`;
3326
3327	out_free_decoder:
3328	cs_etm_decoder__free(decoder: etmq->decoder);
3329	out_free:
3330	zfree(&t_params);
3331	return -EINVAL;
3332	}
3333
3334	static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3335	{
3336	struct auxtrace_queues *queues = &etm->queues;
3337
3338	for (unsigned int i = `0`; i < queues->nr_queues; i++) {
3339	bool empty = list_empty(head: &queues->queue_array[i].head);
3340	struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3341	int ret;
3342
3343	/*
3344	* Don't create decoders for empty queues, mainly because
3345	* etmq->format is unknown for empty queues.
3346	*/
3347	assert(empty \|\| etmq->format != UNSET);
3348	if (empty)
3349	continue;
3350
3351	ret = cs_etm__create_queue_decoders(etmq);
3352	if (ret)
3353	return ret;
3354	}
3355	return `0`;
3356	}
3357
3358	int cs_etm__process_auxtrace_info_full(union perf_event *event,
3359	struct perf_session *session)
3360	{
3361	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3362	struct cs_etm_auxtrace *etm = NULL;
3363	struct perf_record_time_conv *tc = &session->time_conv;
3364	int event_header_size = sizeof(struct perf_event_header);
3365	int total_size = auxtrace_info->header.size;
3366	int priv_size = `0`;
3367	int num_cpu, max_cpu = `0`;
3368	int err = `0`;
3369	int aux_hw_id_found;
3370	int i;
3371	u64 *ptr = NULL;
3372	u64 **metadata = NULL;
3373
3374	/ First the global part /
3375	ptr = (u64 *) auxtrace_info->priv;
3376	num_cpu = ptr[CS_PMU_TYPE_CPUS] & `0xffffffff`;
3377	metadata = zalloc(sizeof(metadata) num_cpu);
3378	if (!metadata)
3379	return -ENOMEM;
3380
3381	/ Start parsing after the common part of the header /
3382	i = CS_HEADER_VERSION_MAX;
3383
3384	/*
3385	* The metadata is stored in the auxtrace_info section and encodes
3386	* the configuration of the ARM embedded trace macrocell which is
3387	* required by the trace decoder to properly decode the trace due
3388	* to its highly compressed nature.
3389	*/
3390	for (int j = `0`; j < num_cpu; j++) {
3391	if (ptr[i] == __perf_cs_etmv3_magic) {
3392	metadata[j] =
3393	cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i,
3394	out_blk_size: CS_ETM_PRIV_MAX,
3395	CS_ETM_NR_TRC_PARAMS_V0);
3396	} else if (ptr[i] == __perf_cs_etmv4_magic) {
3397	metadata[j] =
3398	cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i,
3399	out_blk_size: CS_ETMV4_PRIV_MAX,
3400	CS_ETMV4_NR_TRC_PARAMS_V0);
3401	} else if (ptr[i] == __perf_cs_ete_magic) {
3402	metadata[j] = cs_etm__create_meta_blk(buff_in: ptr, buff_in_offset: &i, out_blk_size: CS_ETE_PRIV_MAX, nr_params_v0: -`1`);
3403	} else {
3404	ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3405	ptr[i]);
3406	err = -EINVAL;
3407	goto err_free_metadata;
3408	}
3409
3410	if (!metadata[j]) {
3411	err = -ENOMEM;
3412	goto err_free_metadata;
3413	}
3414
3415	if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3416	max_cpu = metadata[j][CS_ETM_CPU];
3417	}
3418
3419	/*
3420	* Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3421	* CS_ETMV4_PRIV_MAX mark how many double words are in the
3422	* global metadata, and each cpu's metadata respectively.
3423	* The following tests if the correct number of double words was
3424	* present in the auxtrace info section.
3425	*/
3426	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3427	if (i * `8` != priv_size) {
3428	err = -EINVAL;
3429	goto err_free_metadata;
3430	}
3431
3432	etm = zalloc(sizeof(*etm));
3433
3434	if (!etm) {
3435	err = -ENOMEM;
3436	goto err_free_metadata;
3437	}
3438
3439	/*
3440	* As all the ETMs run at the same exception level, the system should
3441	* have the same PID format crossing CPUs. So cache the PID format
3442	* and reuse it for sequential decoding.
3443	*/
3444	etm->pid_fmt = cs_etm__init_pid_fmt(metadata: metadata[`0`]);
3445
3446	err = auxtrace_queues__init_nr(queues: &etm->queues, nr_queues: max_cpu + `1`);
3447	if (err)
3448	goto err_free_etm;
3449
3450	for (unsigned int j = `0`; j < etm->queues.nr_queues; ++j) {
3451	err = cs_etm__setup_queue(etm, queue: &etm->queues.queue_array[j], queue_nr: j);
3452	if (err)
3453	goto err_free_queues;
3454	}
3455
3456	if (session->itrace_synth_opts->set) {
3457	etm->synth_opts = *session->itrace_synth_opts;
3458	} else {
3459	itrace_synth_opts__set_default(synth_opts: &etm->synth_opts,
3460	no_sample: session->itrace_synth_opts->default_no_sample);
3461	etm->synth_opts.callchain = false;
3462	}
3463
3464	etm->session = session;
3465
3466	etm->num_cpu = num_cpu;
3467	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> `32`) & `0xffffffff`);
3468	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != `0`);
3469	etm->metadata = metadata;
3470	etm->auxtrace_type = auxtrace_info->type;
3471
3472	if (etm->synth_opts.use_timestamp)
3473	/*
3474	* Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3475	* therefore the decoder cannot know if the timestamp trace is
3476	* same with the kernel time.
3477	*
3478	* If a user has knowledge for the working platform and can
3479	* specify itrace option 'T' to tell decoder to forcely use the
3480	* traced timestamp as the kernel time.
3481	*/
3482	etm->has_virtual_ts = true;
3483	else
3484	/ Use virtual timestamps if all ETMs report ts_source = 1 /
3485	etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3486
3487	if (!etm->has_virtual_ts)
3488	ui__warning(format: "Virtual timestamps are not enabled, or not supported by the traced system.\n"
3489	"The time field of the samples will not be set accurately.\n"
3490	"For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3491	"you can specify the itrace option 'T' for timestamp decoding\n"
3492	"if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3493
3494	etm->auxtrace.process_event = cs_etm__process_event;
3495	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3496	etm->auxtrace.flush_events = cs_etm__flush_events;
3497	etm->auxtrace.free_events = cs_etm__free_events;
3498	etm->auxtrace.free = cs_etm__free;
3499	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3500	session->auxtrace = &etm->auxtrace;
3501
3502	err = cs_etm__setup_timeless_decoding(etm);
3503	if (err)
3504	return err;
3505
3506	etm->tc.time_shift = tc->time_shift;
3507	etm->tc.time_mult = tc->time_mult;
3508	etm->tc.time_zero = tc->time_zero;
3509	if (event_contains(*tc, time_cycles)) {
3510	etm->tc.time_cycles = tc->time_cycles;
3511	etm->tc.time_mask = tc->time_mask;
3512	etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3513	etm->tc.cap_user_time_short = tc->cap_user_time_short;
3514	}
3515	err = cs_etm__synth_events(etm, session);
3516	if (err)
3517	goto err_free_queues;
3518
3519	err = cs_etm__queue_aux_records(session);
3520	if (err)
3521	goto err_free_queues;
3522
3523	/*
3524	* Map Trace ID values to CPU metadata.
3525	*
3526	* Trace metadata will always contain Trace ID values from the legacy algorithm
3527	* in case it's read by a version of Perf that doesn't know about HW_ID packets
3528	* or the kernel doesn't emit them.
3529	*
3530	* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3531	* the same IDs as the old algorithm as far as is possible, unless there are clashes
3532	* in which case a different value will be used. This means an older perf may still
3533	* be able to record and read files generate on a newer system.
3534	*
3535	* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3536	* those packets. If they are there then the values will be mapped and plugged into
3537	* the metadata and decoders are only created for each mapping received.
3538	*
3539	* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3540	* then we map Trace ID values to CPU directly from the metadata and create decoders
3541	* for all mappings.
3542	*/
3543
3544	/ Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata /
3545	aux_hw_id_found = `0`;
3546	err = perf_session__peek_events(session, offset: session->header.data_offset,
3547	size: session->header.data_size,
3548	cb: cs_etm__process_aux_hw_id_cb, data: &aux_hw_id_found);
3549	if (err)
3550	goto err_free_queues;
3551
3552	/ if no HW ID found this is a file with metadata values only, map from metadata /
3553	if (!aux_hw_id_found) {
3554	err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3555	if (err)
3556	goto err_free_queues;
3557	}
3558
3559	err = cs_etm__create_decoders(etm);
3560	if (err)
3561	goto err_free_queues;
3562
3563	etm->data_queued = etm->queues.populated;
3564	return `0`;
3565
3566	err_free_queues:
3567	auxtrace_queues__free(queues: &etm->queues);
3568	session->auxtrace = NULL;
3569	err_free_etm:
3570	zfree(&etm);
3571	err_free_metadata:
3572	/ No need to check @metadata[j], free(NULL) is supported /
3573	for (int j = `0`; j < num_cpu; j++)
3574	zfree(&metadata[j]);
3575	zfree(&metadata);
3576	return err;
3577	}
3578

source code of linux/tools/perf/util/cs-etm.c