| 1 | //===-- dfsan_interface.h -------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is a part of DataFlowSanitizer. |
| 10 | // |
| 11 | // Public interface header. |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #ifndef DFSAN_INTERFACE_H |
| 14 | #define DFSAN_INTERFACE_H |
| 15 | |
| 16 | #include <sanitizer/common_interface_defs.h> |
| 17 | #include <stddef.h> |
| 18 | #include <stdint.h> |
| 19 | |
| 20 | #ifdef __cplusplus |
| 21 | extern "C" { |
| 22 | #endif |
| 23 | |
| 24 | typedef uint8_t dfsan_label; |
| 25 | typedef uint32_t dfsan_origin; |
| 26 | |
| 27 | /// Signature of the callback argument to dfsan_set_write_callback(). |
| 28 | typedef void(SANITIZER_CDECL *dfsan_write_callback_t)(int fd, const void *buf, |
| 29 | size_t count); |
| 30 | |
| 31 | /// Signature of the callback argument to dfsan_set_conditional_callback(). |
| 32 | typedef void(SANITIZER_CDECL *dfsan_conditional_callback_t)( |
| 33 | dfsan_label label, dfsan_origin origin); |
| 34 | |
| 35 | /// Signature of the callback argument to dfsan_set_reaches_function_callback(). |
| 36 | /// The description is intended to hold the name of the variable. |
| 37 | typedef void(SANITIZER_CDECL *dfsan_reaches_function_callback_t)( |
| 38 | dfsan_label label, dfsan_origin origin, const char *file, unsigned int line, |
| 39 | const char *function); |
| 40 | |
| 41 | /// Computes the union of \c l1 and \c l2, resulting in a union label. |
| 42 | dfsan_label SANITIZER_CDECL dfsan_union(dfsan_label l1, dfsan_label l2); |
| 43 | |
| 44 | /// Sets the label for each address in [addr,addr+size) to \c label. |
| 45 | void SANITIZER_CDECL dfsan_set_label(dfsan_label label, void *addr, |
| 46 | size_t size); |
| 47 | |
| 48 | /// Sets the label for each address in [addr,addr+size) to the union of the |
| 49 | /// current label for that address and \c label. |
| 50 | void SANITIZER_CDECL dfsan_add_label(dfsan_label label, void *addr, |
| 51 | size_t size); |
| 52 | |
| 53 | /// Retrieves the label associated with the given data. |
| 54 | /// |
| 55 | /// The type of 'data' is arbitrary. The function accepts a value of any type, |
| 56 | /// which can be truncated or extended (implicitly or explicitly) as necessary. |
| 57 | /// The truncation/extension operations will preserve the label of the original |
| 58 | /// value. |
| 59 | dfsan_label SANITIZER_CDECL dfsan_get_label(long data); |
| 60 | |
| 61 | /// Retrieves the immediate origin associated with the given data. The returned |
| 62 | /// origin may point to another origin. |
| 63 | /// |
| 64 | /// The type of 'data' is arbitrary. |
| 65 | dfsan_origin SANITIZER_CDECL dfsan_get_origin(long data); |
| 66 | |
| 67 | /// Retrieves the label associated with the data at the given address. |
| 68 | dfsan_label SANITIZER_CDECL dfsan_read_label(const void *addr, size_t size); |
| 69 | |
| 70 | /// Return the origin associated with the first taint byte in the size bytes |
| 71 | /// from the address addr. |
| 72 | dfsan_origin SANITIZER_CDECL dfsan_read_origin_of_first_taint(const void *addr, |
| 73 | size_t size); |
| 74 | |
| 75 | /// Returns whether the given label contains the label elem. |
| 76 | int SANITIZER_CDECL dfsan_has_label(dfsan_label label, dfsan_label elem); |
| 77 | |
| 78 | /// Flushes the DFSan shadow, i.e. forgets about all labels currently associated |
| 79 | /// with the application memory. Use this call to start over the taint tracking |
| 80 | /// within the same process. |
| 81 | /// |
| 82 | /// Note: If another thread is working with tainted data during the flush, that |
| 83 | /// taint could still be written to shadow after the flush. |
| 84 | void SANITIZER_CDECL dfsan_flush(void); |
| 85 | |
| 86 | /// Sets a callback to be invoked on calls to write(). The callback is invoked |
| 87 | /// before the write is done. The write is not guaranteed to succeed when the |
| 88 | /// callback executes. Pass in NULL to remove any callback. |
| 89 | void SANITIZER_CDECL |
| 90 | dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); |
| 91 | |
| 92 | /// Sets a callback to be invoked on any conditional expressions which have a |
| 93 | /// taint label set. This can be used to find where tainted data influences |
| 94 | /// the behavior of the program. |
| 95 | /// These callbacks will only be added when -dfsan-conditional-callbacks=true. |
| 96 | void SANITIZER_CDECL |
| 97 | dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); |
| 98 | |
| 99 | /// Conditional expressions occur during signal handlers. |
| 100 | /// Making callbacks that handle signals well is tricky, so when |
| 101 | /// -dfsan-conditional-callbacks=true, conditional expressions used in signal |
| 102 | /// handlers will add the labels they see into a global (bitwise-or together). |
| 103 | /// This function returns all label bits seen in signal handler conditions. |
| 104 | dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_conditional(); |
| 105 | |
| 106 | /// Sets a callback to be invoked when tainted data reaches a function. |
| 107 | /// This could occur at function entry, or at a load instruction. |
| 108 | /// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. |
| 109 | void SANITIZER_CDECL |
| 110 | dfsan_set_reaches_function_callback(dfsan_reaches_function_callback_t callback); |
| 111 | |
| 112 | /// Making callbacks that handle signals well is tricky, so when |
| 113 | /// -dfsan-reaches-function-callbacks=true, functions reached in signal |
| 114 | /// handlers will add the labels they see into a global (bitwise-or together). |
| 115 | /// This function returns all label bits seen during signal handlers. |
| 116 | dfsan_label SANITIZER_CDECL dfsan_get_labels_in_signal_reaches_function(); |
| 117 | |
| 118 | /// Interceptor hooks. |
| 119 | /// Whenever a dfsan's custom function is called the corresponding |
| 120 | /// hook is called it non-zero. The hooks should be defined by the user. |
| 121 | /// The primary use case is taint-guided fuzzing, where the fuzzer |
| 122 | /// needs to see the parameters of the function and the labels. |
| 123 | /// FIXME: implement more hooks. |
| 124 | void SANITIZER_CDECL dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, |
| 125 | const void *s2, size_t n, |
| 126 | dfsan_label s1_label, |
| 127 | dfsan_label s2_label, |
| 128 | dfsan_label n_label); |
| 129 | void SANITIZER_CDECL dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, |
| 130 | const char *s2, size_t n, |
| 131 | dfsan_label s1_label, |
| 132 | dfsan_label s2_label, |
| 133 | dfsan_label n_label); |
| 134 | |
| 135 | /// Prints the origin trace of the label at the address addr to stderr. It also |
| 136 | /// prints description at the beginning of the trace. If origin tracking is not |
| 137 | /// on, or the address is not labeled, it prints nothing. |
| 138 | void SANITIZER_CDECL dfsan_print_origin_trace(const void *addr, |
| 139 | const char *description); |
| 140 | /// As above, but use an origin id from dfsan_get_origin() instead of address. |
| 141 | /// Does not include header line with taint label and address information. |
| 142 | void SANITIZER_CDECL dfsan_print_origin_id_trace(dfsan_origin origin); |
| 143 | |
| 144 | /// Prints the origin trace of the label at the address \p addr to a |
| 145 | /// pre-allocated output buffer. If origin tracking is not on, or the address is |
| 146 | /// not labeled, it prints nothing. |
| 147 | /// |
| 148 | /// Typical usage: |
| 149 | /// \code |
| 150 | /// char kDescription[] = "..."; |
| 151 | /// char buf[1024]; |
| 152 | /// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); |
| 153 | /// \endcode |
| 154 | /// |
| 155 | /// Typical usage that handles truncation: |
| 156 | /// \code |
| 157 | /// char buf[1024]; |
| 158 | /// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); |
| 159 | /// |
| 160 | /// if (len < sizeof(buf)) { |
| 161 | /// ProcessOriginTrace(buf); |
| 162 | /// } else { |
| 163 | /// char *tmpbuf = new char[len + 1]; |
| 164 | /// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); |
| 165 | /// ProcessOriginTrace(tmpbuf); |
| 166 | /// delete[] tmpbuf; |
| 167 | /// } |
| 168 | /// \endcode |
| 169 | /// |
| 170 | /// \param addr The tainted memory address whose origin we are printing. |
| 171 | /// \param description A description printed at the beginning of the trace. |
| 172 | /// \param [out] out_buf The output buffer to write the results to. |
| 173 | /// \param out_buf_size The size of \p out_buf. |
| 174 | /// |
| 175 | /// \returns The number of symbols that should have been written to \p out_buf |
| 176 | /// (not including trailing null byte '\0'). Thus, the string is truncated iff |
| 177 | /// return value is not less than \p out_buf_size. |
| 178 | size_t SANITIZER_CDECL dfsan_sprint_origin_trace(const void *addr, |
| 179 | const char *description, |
| 180 | char *out_buf, |
| 181 | size_t out_buf_size); |
| 182 | /// As above, but use an origin id from dfsan_get_origin() instead of address. |
| 183 | /// Does not include header line with taint label and address information. |
| 184 | size_t SANITIZER_CDECL dfsan_sprint_origin_id_trace(dfsan_origin origin, |
| 185 | char *out_buf, |
| 186 | size_t out_buf_size); |
| 187 | |
| 188 | /// Prints the stack trace leading to this call to a pre-allocated output |
| 189 | /// buffer. |
| 190 | /// |
| 191 | /// For usage examples, see dfsan_sprint_origin_trace. |
| 192 | /// |
| 193 | /// \param [out] out_buf The output buffer to write the results to. |
| 194 | /// \param out_buf_size The size of \p out_buf. |
| 195 | /// |
| 196 | /// \returns The number of symbols that should have been written to \p out_buf |
| 197 | /// (not including trailing null byte '\0'). Thus, the string is truncated iff |
| 198 | /// return value is not less than \p out_buf_size. |
| 199 | size_t SANITIZER_CDECL dfsan_sprint_stack_trace(char *out_buf, |
| 200 | size_t out_buf_size); |
| 201 | |
| 202 | /// Retrieves the very first origin associated with the data at the given |
| 203 | /// address. |
| 204 | dfsan_origin SANITIZER_CDECL dfsan_get_init_origin(const void *addr); |
| 205 | |
| 206 | /// Returns the value of -dfsan-track-origins. |
| 207 | /// * 0: do not track origins. |
| 208 | /// * 1: track origins at memory store operations. |
| 209 | /// * 2: track origins at memory load and store operations. |
| 210 | int SANITIZER_CDECL dfsan_get_track_origins(void); |
| 211 | #ifdef __cplusplus |
| 212 | } // extern "C" |
| 213 | |
| 214 | template <typename T> void dfsan_set_label(dfsan_label label, T &data) { |
| 215 | dfsan_set_label(label, addr: (void *)&data, size: sizeof(T)); |
| 216 | } |
| 217 | |
| 218 | #endif |
| 219 | |
| 220 | #endif // DFSAN_INTERFACE_H |
| 221 | |