cprover
osx_fat_reader.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Read Mach-O
4
5Author:
6
7\*******************************************************************/
8
11
12#include "osx_fat_reader.h"
13
15#include <util/invariant.h>
16
17// we define file-type magic values for all platforms to detect when we find a
18// file that we might not be able to process
19#define CPROVER_FAT_MAGIC 0xcafebabe
20#define CPROVER_FAT_CIGAM 0xbebafeca
21#define CPROVER_MH_MAGIC 0xfeedface
22#define CPROVER_MH_CIGAM 0xcefaedfe
23#define CPROVER_MH_MAGIC_64 0xfeedfacf
24#define CPROVER_MH_CIGAM_64 0xcffaedfe
25
26#ifdef __APPLE__
27# include <architecture/byte_order.h>
28# include <mach-o/fat.h>
29# include <mach-o/loader.h>
30# include <mach-o/swap.h>
31
32# if(CPROVER_FAT_MAGIC != FAT_MAGIC) || (CPROVER_FAT_CIGAM != FAT_CIGAM) || \
33 (CPROVER_MH_MAGIC != MH_MAGIC) || (CPROVER_MH_CIGAM != MH_CIGAM) || \
34 (CPROVER_MH_MAGIC_64 != MH_MAGIC_64) || \
35 (CPROVER_MH_CIGAM_64 != MH_CIGAM_64)
36# error "Mach-O magic has inconsistent value"
37# endif
38#endif
39
40#include <util/run.h>
41
43{
44 uint32_t magic;
46};
47
48static uint32_t u32_to_native_endian(uint32_t input)
49{
50 const uint8_t *input_as_bytes = reinterpret_cast<uint8_t *>(&input);
51 return (((uint32_t)input_as_bytes[0]) << 24) |
52 (((uint32_t)input_as_bytes[1]) << 16) |
53 (((uint32_t)input_as_bytes[2]) << 8) |
54 (((uint32_t)input_as_bytes[3]) << 0);
55}
56
57bool is_osx_fat_header(char header_bytes[8])
58{
59 struct fat_header_prefixt *header =
60 reinterpret_cast<struct fat_header_prefixt *>(header_bytes);
61
62 // Unfortunately for us, both Java class files and Mach fat binaries use the
63 // magic number 0xCAFEBABE. Therefore we must also check the second field,
64 // number of architectures, is in a sensible range (I use at 1 <= archs < 20,
65 // the same criterion used by `GNU file`).
66 // Luckily the class file format stores the file version here, which cannot
67 // fall in this range.
68 uint32_t n_architectures_native =
70 return u32_to_native_endian(header->magic) == CPROVER_FAT_MAGIC &&
71 n_architectures_native >= 1 && n_architectures_native < 20;
72}
73
75 std::ifstream &in,
76 message_handlert &message_handler)
77 : log(message_handler), has_gb_arch(false)
78{
79#ifdef __APPLE__
80 // NOLINTNEXTLINE(readability/identifiers)
81 struct fat_header fh;
82 // NOLINTNEXTLINE(readability/identifiers)
83 in.read(reinterpret_cast<char*>(&fh), sizeof(struct fat_header));
84
85 if(!in)
86 throw system_exceptiont("failed to read OSX fat header");
87
88 if(!is_osx_fat_header(reinterpret_cast<char *>(&(fh.magic))))
89 throw deserialization_exceptiont("OSX fat header malformed");
90
91 static_assert(
92 sizeof(fh.nfat_arch) == 4, "fat_header::nfat_arch is of type uint32_t");
93 unsigned narch = u32_to_native_endian(fh.nfat_arch);
94
95 for(unsigned i=0; !has_gb_arch && i<narch; ++i)
96 {
97 // NOLINTNEXTLINE(readability/identifiers)
98 struct fat_arch fa;
99 // NOLINTNEXTLINE(readability/identifiers)
100 in.read(reinterpret_cast<char*>(&fa), sizeof(struct fat_arch));
101
102 static_assert(
103 sizeof(fa.cputype) == 4 && sizeof(fa.cpusubtype) == 4 &&
104 sizeof(fa.size) == 4,
105 "This requires a specific fat architecture");
106 int cputype = u32_to_native_endian(fa.cputype);
107 int cpusubtype = u32_to_native_endian(fa.cpusubtype);
108 unsigned size = u32_to_native_endian(fa.size);
109
110 has_gb_arch=cputype==CPU_TYPE_HPPA &&
111 cpusubtype==CPU_SUBTYPE_HPPA_7100LC &&
112 size > 0;
113 }
114#else
115 (void)in; // unused parameter
116
117 log.warning() << "Cannot read OSX fat archive on this platform"
118 << messaget::eom;
119#endif
120}
121
123 const std::string &source,
124 const std::string &dest) const
125{
127
128 return run(
129 "lipo", {"lipo", "-thin", "hppa7100LC", "-output", dest, source}) !=
130 0;
131}
132
133// guided by https://lowlevelbits.org/parsing-mach-o-files/
134bool is_osx_mach_object(char hdr[4])
135{
136 uint32_t *magic = reinterpret_cast<uint32_t *>(hdr);
137
138 switch(*magic)
139 {
140 case CPROVER_MH_MAGIC:
141 case CPROVER_MH_CIGAM:
144 return true;
145 }
146
147 return false;
148}
149
150void osx_mach_o_readert::process_sections_32(uint32_t nsects, bool need_swap)
151{
152#ifdef __APPLE__
153 for(uint32_t i = 0; i < nsects; ++i)
154 {
155 // NOLINTNEXTLINE(readability/identifiers)
156 struct section s;
157 in.read(reinterpret_cast<char *>(&s), sizeof(s));
158
159 if(!in)
160 throw deserialization_exceptiont("failed to read Mach-O section");
161
162 if(need_swap)
163 swap_section(&s, 1, NXHostByteOrder());
164
165 sections.emplace(s.sectname, sectiont(s.sectname, s.offset, s.size));
166 }
167#else
168 // unused parameters
169 (void)nsects;
170 (void)need_swap;
171#endif
172}
173
174void osx_mach_o_readert::process_sections_64(uint32_t nsects, bool need_swap)
175{
176#ifdef __APPLE__
177 for(uint32_t i = 0; i < nsects; ++i)
178 {
179 // NOLINTNEXTLINE(readability/identifiers)
180 struct section_64 s;
181 in.read(reinterpret_cast<char *>(&s), sizeof(s));
182
183 if(!in)
184 throw deserialization_exceptiont("failed to read 64-bit Mach-O section");
185
186 if(need_swap)
187 swap_section_64(&s, 1, NXHostByteOrder());
188
189 sections.emplace(s.sectname, sectiont(s.sectname, s.offset, s.size));
190 }
191#else
192 // unused parameters
193 (void)nsects;
194 (void)need_swap;
195#endif
196}
197
199 uint32_t ncmds,
200 std::size_t offset,
201 bool need_swap)
202{
203#ifdef __APPLE__
204 for(uint32_t i = 0; i < ncmds; ++i)
205 {
206 in.seekg(offset);
207
208 // NOLINTNEXTLINE(readability/identifiers)
209 struct load_command lc;
210 in.read(reinterpret_cast<char *>(&lc), sizeof(lc));
211
212 if(!in)
213 throw deserialization_exceptiont("failed to read Mach-O command");
214
215 if(need_swap)
216 swap_load_command(&lc, NXHostByteOrder());
217
218 // we may need to re-read the command once we have figured out its type; in
219 // particular, segment commands contain additional information that we have
220 // now just read a prefix of
221 in.seekg(offset);
222
223 switch(lc.cmd)
224 {
225 case LC_SEGMENT:
226 {
227 // NOLINTNEXTLINE(readability/identifiers)
228 struct segment_command seg;
229 in.read(reinterpret_cast<char *>(&seg), sizeof(seg));
230
231 if(!in)
232 throw deserialization_exceptiont("failed to read Mach-O segment");
233
234 if(need_swap)
235 swap_segment_command(&seg, NXHostByteOrder());
236
237 process_sections_32(seg.nsects, need_swap);
238 break;
239 }
240 case LC_SEGMENT_64:
241 {
242 // NOLINTNEXTLINE(readability/identifiers)
243 struct segment_command_64 seg;
244 in.read(reinterpret_cast<char *>(&seg), sizeof(seg));
245
246 if(!in)
247 throw deserialization_exceptiont("failed to read Mach-O segment");
248
249 if(need_swap)
250 swap_segment_command_64(&seg, NXHostByteOrder());
251
252 process_sections_64(seg.nsects, need_swap);
253 break;
254 }
255 default:
256 break;
257 }
258
259 offset += lc.cmdsize;
260 }
261#else
262 // unused parameters
263 (void)ncmds;
264 (void)offset;
265 (void)need_swap;
266#endif
267}
268
270 std::istream &_in,
271 message_handlert &message_handler)
272 : log(message_handler), in(_in)
273{
274 // read magic
275 uint32_t magic;
276 in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
277
278 if(!in)
279 throw deserialization_exceptiont("failed to read Mach-O magic");
280
281#ifdef __APPLE__
282 bool is_64 = false, need_swap = false;
283 switch(magic)
284 {
285 case CPROVER_MH_CIGAM:
286 need_swap = true;
287 break;
288 case CPROVER_MH_MAGIC:
289 break;
291 need_swap = true;
292 is_64 = true;
293 break;
295 is_64 = true;
296 break;
297 default:
298 throw deserialization_exceptiont("no Mach-O magic");
299 }
300
301 uint32_t ncmds = 0;
302 std::size_t offset = 0;
303
304 // re-read from the beginning, now reading the full header
305 in.seekg(0);
306
307 if(!is_64)
308 {
309 // NOLINTNEXTLINE(readability/identifiers)
310 struct mach_header mh;
311 in.read(reinterpret_cast<char *>(&mh), sizeof(mh));
312
313 if(!in)
314 throw deserialization_exceptiont("failed to read 32-bit Mach-O header");
315
316 if(need_swap)
317 swap_mach_header(&mh, NXHostByteOrder());
318
319 ncmds = mh.ncmds;
320 offset = sizeof(mh);
321 }
322 else
323 {
324 // NOLINTNEXTLINE(readability/identifiers)
325 struct mach_header_64 mh;
326 in.read(reinterpret_cast<char *>(&mh), sizeof(mh));
327
328 if(!in)
329 throw deserialization_exceptiont("failed to read 64-bit Mach-O header");
330
331 if(need_swap)
332 swap_mach_header_64(&mh, NXHostByteOrder());
333
334 ncmds = mh.ncmds;
335 offset = sizeof(mh);
336 }
337
338 process_commands(ncmds, offset, need_swap);
339#else
340 log.warning() << "Cannot read OSX Mach-O on this platform" << messaget::eom;
341#endif
342}
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
mstreamt & warning() const
Definition: message.h:404
static eomt eom
Definition: message.h:297
osx_fat_readert(std::ifstream &, message_handlert &)
bool extract_gb(const std::string &source, const std::string &dest) const
std::istream & in
void process_commands(uint32_t ncmds, std::size_t offset, bool need_swap)
osx_mach_o_readert(std::istream &, message_handlert &)
void process_sections_32(uint32_t nsects, bool need_swap)
void process_sections_64(uint32_t nsects, bool need_swap)
Thrown when some external system fails unexpectedly.
bool is_osx_fat_header(char header_bytes[8])
#define CPROVER_FAT_MAGIC
#define CPROVER_MH_CIGAM
#define CPROVER_MH_MAGIC
static uint32_t u32_to_native_endian(uint32_t input)
#define CPROVER_MH_MAGIC_64
bool is_osx_mach_object(char hdr[4])
#define CPROVER_MH_CIGAM_64
Read OS X Fat Binaries.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
#define PRECONDITION(CONDITION)
Definition: invariant.h:463