2 /*--------------------------------------------------------------------*/
3 /*--- Wrappers for generic Unix system calls ---*/
4 /*--- syswrap-generic.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2000-2012 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #if defined(VGO_linux) || defined(VGO_darwin)
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h" // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h" // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h" // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_tool_gdbserver.h" // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h" // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
64 #include "priv_types_n_macros.h"
65 #include "priv_syswrap-generic.h"
70 /* Returns True iff address range is something the client can
71 plausibly mess with: all of it is either already belongs to the
72 client or is free or a reservation. */
74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
75 const Char *syscallname)
82 ret = VG_(am_is_valid_for_client_or_free_or_resvn)
83 (start,size,VKI_PROT_NONE);
86 VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
87 syscallname, start, start+size-1, (Int)ret);
89 if (!ret && syscallname != NULL) {
90 VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
91 "to modify addresses %#lx-%#lx\n",
92 syscallname, start, start+size-1);
93 if (VG_(clo_verbosity) > 1) {
94 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
102 Bool ML_(client_signal_OK)(Int sigNo)
104 /* signal 0 is OK for kill */
105 Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
107 //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
113 /* Handy small function to help stop wrappers from segfaulting when
114 presented with bogus client addresses. Is not used for generating
115 user-visible errors. */
117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
119 return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
123 /* ---------------------------------------------------------------------
125 ------------------------------------------------------------------ */
127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128 munmap, mprotect (and mremap??) work at the page level. So addresses
129 and lengths must be adjusted for this. */
131 /* Mash around start and length so that the area exactly covers
132 an integral number of pages. If we don't do that, memcheck's
133 idea of addressible memory diverges from that of the
134 kernel's, which causes the leak detector to crash. */
136 void page_align_addr_and_len( Addr* a, SizeT* len)
140 ra = VG_PGROUNDDN(*a);
141 *len = VG_PGROUNDUP(*a + *len) - ra;
145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146 UInt flags, Int fd, Off64T offset)
150 /* 'a' is the return value from a real kernel mmap, hence: */
151 vg_assert(VG_IS_PAGE_ALIGNED(a));
152 /* whereas len is whatever the syscall supplied. So: */
153 len = VG_PGROUNDUP(len);
155 d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
158 VG_(discard_translations)( (Addr64)a, (ULong)len,
159 "notify_core_of_mmap" );
162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
166 /* 'a' is the return value from a real kernel mmap, hence: */
167 vg_assert(VG_IS_PAGE_ALIGNED(a));
168 /* whereas len is whatever the syscall supplied. So: */
169 len = VG_PGROUNDUP(len);
171 rr = toBool(prot & VKI_PROT_READ);
172 ww = toBool(prot & VKI_PROT_WRITE);
173 xx = toBool(prot & VKI_PROT_EXEC);
175 VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
179 /* When a client mmap has been successfully done, this function must
180 be called. It notifies both aspacem and the tool of the new
183 JRS 2008-Aug-14: But notice this is *very* obscure. The only place
184 it is called from is POST(sys_io_setup). In particular,
185 ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
186 client mmap. But it doesn't call this function; instead it does the
187 relevant notifications itself. Here, we just pass di_handle=0 to
188 notify_tool_of_mmap as we have no better information. But really this
189 function should be done away with; problem is I don't understand what
190 POST(sys_io_setup) does or how it works.
192 [However, this function is used lots for Darwin, because
193 ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
196 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
197 UInt flags, Int fd, Off64T offset )
199 // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
200 // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
202 notify_core_of_mmap(a, len, prot, flags, fd, offset);
203 notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
207 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
211 page_align_addr_and_len(&a, &len);
212 d = VG_(am_notify_munmap)(a, len);
213 VG_TRACK( die_mem_munmap, a, len );
214 VG_(di_notify_munmap)( a, len );
216 VG_(discard_translations)( (Addr64)a, (ULong)len,
217 "ML_(notify_core_and_tool_of_munmap)" );
221 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
223 Bool rr = toBool(prot & VKI_PROT_READ);
224 Bool ww = toBool(prot & VKI_PROT_WRITE);
225 Bool xx = toBool(prot & VKI_PROT_EXEC);
228 page_align_addr_and_len(&a, &len);
229 d = VG_(am_notify_mprotect)(a, len, prot);
230 VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
231 VG_(di_notify_mprotect)( a, len, prot );
233 VG_(discard_translations)( (Addr64)a, (ULong)len,
234 "ML_(notify_core_and_tool_of_mprotect)" );
240 /* Expand (or shrink) an existing mapping, potentially moving it at
241 the same time (controlled by the MREMAP_MAYMOVE flag). Nightmare.
244 SysRes do_mremap( Addr old_addr, SizeT old_len,
245 Addr new_addr, SizeT new_len,
246 UWord flags, ThreadId tid )
248 # define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
251 NSegment const* old_seg;
253 Bool f_fixed = toBool(flags & VKI_MREMAP_FIXED);
254 Bool f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
257 VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
258 old_addr,old_len,new_addr,new_len,
259 flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
260 flags & VKI_MREMAP_FIXED ? "FIXED" : "");
262 VG_(am_show_nsegments)(0, "do_remap: before");
264 if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
267 if (!VG_IS_PAGE_ALIGNED(old_addr))
270 old_len = VG_PGROUNDUP(old_len);
271 new_len = VG_PGROUNDUP(new_len);
276 /* kernel doesn't reject this, but we do. */
280 /* reject wraparounds */
281 if (old_addr + old_len < old_addr)
283 if (f_fixed == True && new_addr + new_len < new_len)
286 /* kernel rejects all fixed, no-move requests (which are
288 if (f_fixed == True && f_maymove == False)
291 /* Stay away from non-client areas. */
292 if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
295 /* In all remaining cases, if the old range does not fall within a
296 single segment, fail. */
297 old_seg = VG_(am_find_nsegment)( old_addr );
298 if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
300 if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
303 vg_assert(old_len > 0);
304 vg_assert(new_len > 0);
305 vg_assert(VG_IS_PAGE_ALIGNED(old_len));
306 vg_assert(VG_IS_PAGE_ALIGNED(new_len));
307 vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
309 /* There are 3 remaining cases:
313 new space has to be at old address, so:
314 - shrink -> unmap end
315 - same size -> do nothing
316 - grow -> if can grow in-place, do so, else fail
318 * maymove == True, fixed == False
320 new space can be anywhere, so:
321 - shrink -> unmap end
322 - same size -> do nothing
323 - grow -> if can grow in-place, do so, else
324 move to anywhere large enough, else fail
326 * maymove == True, fixed == True
328 new space must be at new address, so:
330 - if new address is not page aligned, fail
331 - if new address range overlaps old one, fail
332 - if new address range cannot be allocated, fail
333 - else move to new address range with new size
337 if (f_maymove == False) {
338 /* new space has to be at old address */
339 if (new_len < old_len)
340 goto shrink_in_place;
341 if (new_len > old_len)
342 goto grow_in_place_or_fail;
346 if (f_maymove == True && f_fixed == False) {
347 /* new space can be anywhere */
348 if (new_len < old_len)
349 goto shrink_in_place;
350 if (new_len > old_len)
351 goto grow_in_place_or_move_anywhere_or_fail;
355 if (f_maymove == True && f_fixed == True) {
356 /* new space can only be at the new address */
357 if (!VG_IS_PAGE_ALIGNED(new_addr))
359 if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
366 /* VG_(am_get_advisory_client_simple) interprets zero to mean
367 non-fixed, which is not what we want */
368 advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
369 if (!ok || advised != new_addr)
371 ok = VG_(am_relocate_nooverlap_client)
372 ( &d, old_addr, old_len, new_addr, new_len );
374 VG_TRACK( copy_mem_remap, old_addr, new_addr,
375 MIN_SIZET(old_len,new_len) );
376 if (new_len > old_len)
377 VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
378 old_seg->hasR, old_seg->hasW, old_seg->hasX,
380 VG_TRACK(die_mem_munmap, old_addr, old_len);
382 VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
383 VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
385 return VG_(mk_SysRes_Success)( new_addr );
390 /* end of the 3 cases */
391 /*NOTREACHED*/ vg_assert(0);
393 grow_in_place_or_move_anywhere_or_fail:
395 /* try growing it in-place */
396 Addr needA = old_addr + old_len;
397 SSizeT needL = new_len - old_len;
399 vg_assert(needL > 0);
402 /* VG_(am_get_advisory_client_simple) interprets zero to mean
403 non-fixed, which is not what we want */
404 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
406 /* Fixes bug #129866. */
407 ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
409 if (ok && advised == needA) {
410 ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
412 VG_TRACK( new_mem_mmap, needA, needL,
414 old_seg->hasW, old_seg->hasX,
417 VG_(discard_translations)( needA, needL, "do_remap(3)" );
418 return VG_(mk_SysRes_Success)( old_addr );
422 /* that failed. Look elsewhere. */
423 advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
425 Bool oldR = old_seg->hasR;
426 Bool oldW = old_seg->hasW;
427 Bool oldX = old_seg->hasX;
428 /* assert new area does not overlap old */
429 vg_assert(advised+new_len-1 < old_addr
430 || advised > old_addr+old_len-1);
431 ok = VG_(am_relocate_nooverlap_client)
432 ( &d, old_addr, old_len, advised, new_len );
434 VG_TRACK( copy_mem_remap, old_addr, advised,
435 MIN_SIZET(old_len,new_len) );
436 if (new_len > old_len)
437 VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
438 oldR, oldW, oldX, 0/*di_handle*/ );
439 VG_TRACK(die_mem_munmap, old_addr, old_len);
441 VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
442 VG_(discard_translations)( advised, new_len, "do_remap(5)" );
444 return VG_(mk_SysRes_Success)( advised );
449 /*NOTREACHED*/ vg_assert(0);
451 grow_in_place_or_fail:
453 Addr needA = old_addr + old_len;
454 SizeT needL = new_len - old_len;
457 /* VG_(am_get_advisory_client_simple) interprets zero to mean
458 non-fixed, which is not what we want */
459 advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
461 /* Fixes bug #129866. */
462 ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
464 if (!ok || advised != needA)
466 ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
469 VG_TRACK( new_mem_mmap, needA, needL,
470 old_seg->hasR, old_seg->hasW, old_seg->hasX,
473 VG_(discard_translations)( needA, needL, "do_remap(6)" );
474 return VG_(mk_SysRes_Success)( old_addr );
476 /*NOTREACHED*/ vg_assert(0);
480 SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
481 if (sr_isError(sres))
483 VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
485 VG_(discard_translations)( old_addr+new_len, old_len-new_len,
487 return VG_(mk_SysRes_Success)( old_addr );
489 /*NOTREACHED*/ vg_assert(0);
492 return VG_(mk_SysRes_Success)( old_addr );
493 /*NOTREACHED*/ vg_assert(0);
496 return VG_(mk_SysRes_Error)( VKI_EINVAL );
498 return VG_(mk_SysRes_Error)( VKI_ENOMEM );
502 #endif /* HAVE_MREMAP */
505 /* ---------------------------------------------------------------------
506 File-descriptor tracking
507 ------------------------------------------------------------------ */
509 /* One of these is allocated for each open file descriptor. */
510 typedef struct OpenFd
512 Int fd; /* The file descriptor */
513 Char *pathname; /* NULL if not a regular file or unknown */
514 ExeContext *where; /* NULL if inherited from parent */
515 struct OpenFd *next, *prev;
518 /* List of allocated file descriptors. */
519 static OpenFd *allocated_fds = NULL;
521 /* Count of open file descriptors. */
522 static Int fd_count = 0;
525 /* Note the fact that a file descriptor was just closed. */
527 void record_fd_close(Int fd)
529 OpenFd *i = allocated_fds;
531 if (fd >= VG_(fd_hard_limit))
532 return; /* Valgrind internal */
537 i->prev->next = i->next;
539 allocated_fds = i->next;
541 i->next->prev = i->prev;
543 VG_(arena_free) (VG_AR_CORE, i->pathname);
544 VG_(arena_free) (VG_AR_CORE, i);
552 /* Note the fact that a file descriptor was just opened. If the
553 tid is -1, this indicates an inherited fd. If the pathname is NULL,
554 this either indicates a non-standard file (i.e. a pipe or socket or
555 some such thing) or that we don't know the filename. If the fd is
556 already open, then we're probably doing a dup2() to an existing fd,
557 so just overwrite the existing one. */
558 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
562 if (fd >= VG_(fd_hard_limit))
563 return; /* Valgrind internal */
565 /* Check to see if this fd is already open. */
569 if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
575 /* Not already one: allocate an OpenFd */
577 i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
580 i->next = allocated_fds;
581 if(allocated_fds) allocated_fds->prev = i;
587 i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
588 i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
591 // Record opening of an fd, and find its name.
592 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
594 static HChar buf[VKI_PATH_MAX];
596 if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
601 ML_(record_fd_open_with_given_name)(tid, fd, name);
604 // Record opening of a nameless fd.
605 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
607 ML_(record_fd_open_with_given_name)(tid, fd, NULL);
611 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
613 if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
614 VG_(sprintf)(name, "<unknown>");
616 VG_(sprintf)(name, "%s", sa->sun_path);
623 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
625 if (sa == NULL || len == 0) {
626 VG_(sprintf)(name, "<unknown>");
628 UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
630 VG_(sprintf)(name, "<unbound>");
632 VG_(sprintf)(name, "%u.%u.%u.%u:%u",
633 (addr>>24) & 0xFF, (addr>>16) & 0xFF,
634 (addr>>8) & 0xFF, addr & 0xFF,
635 VG_(ntohs)(sa->sin_port));
643 * Try get some details about a socket.
646 getsockdetails(Int fd)
649 struct vki_sockaddr a;
650 struct vki_sockaddr_in in;
651 struct vki_sockaddr_un un;
655 llen = sizeof(laddr);
656 VG_(memset)(&laddr, 0, llen);
658 if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
659 switch(laddr.a.sa_family) {
661 static char lname[32];
662 static char pname[32];
663 struct vki_sockaddr_in paddr;
664 UInt plen = sizeof(struct vki_sockaddr_in);
666 if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
667 VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
668 inet2name(&(laddr.in), llen, lname),
669 inet2name(&paddr, plen, pname));
671 VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
672 fd, inet2name(&(laddr.in), llen, lname));
677 static char lname[256];
678 VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
679 unix2name(&(laddr.un), llen, lname));
683 VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
684 laddr.a.sa_family, fd);
689 VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
693 /* Dump out a summary, and a more detailed list, of open file descriptors. */
694 void VG_(show_open_fds) (HChar* when)
696 OpenFd *i = allocated_fds;
698 VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
702 VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
706 UInt len = sizeof(val);
708 if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
710 VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
712 getsockdetails(i->fd);
717 VG_(pp_ExeContext)(i->where);
718 VG_(message)(Vg_UserMsg, "\n");
720 VG_(message)(Vg_UserMsg, " <inherited from parent>\n");
721 VG_(message)(Vg_UserMsg, "\n");
727 VG_(message)(Vg_UserMsg, "\n");
730 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
731 have /proc support compiled in, or a non-Linux kernel), then we need to
732 find out what file descriptors we inherited from our parent process the
733 hard way - by checking each fd in turn. */
735 void init_preopened_fds_without_proc_self_fd(void)
737 struct vki_rlimit lim;
741 if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
742 /* Hmm. getrlimit() failed. Now we're screwed, so just choose
743 an arbitrarily high number. 1024 happens to be the limit in
744 the 2.4 Linux kernels. */
747 count = lim.rlim_cur;
750 for (i = 0; i < count; i++)
751 if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
752 ML_(record_fd_open_named)(-1, i);
755 /* Initialize the list of open file descriptors with the file descriptors
756 we inherited from out parent process. */
758 void VG_(init_preopened_fds)(void)
760 // DDD: should probably use HAVE_PROC here or similar, instead.
761 #if defined(VGO_linux)
766 f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
768 init_preopened_fds_without_proc_self_fd();
772 while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
776 if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
778 Int fno = VG_(strtoll10)(d.d_name, &s);
780 if (fno != sr_Res(f))
781 if (VG_(clo_track_fds))
782 ML_(record_fd_open_named)(-1, fno);
784 VG_(message)(Vg_DebugMsg,
785 "Warning: invalid file name in /proc/self/fd: %s\n",
790 VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
794 VG_(close)(sr_Res(f));
796 #elif defined(VGO_darwin)
797 init_preopened_fds_without_proc_self_fd();
805 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
807 UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
808 Char *result = VG_(arena_malloc) ( aid, cc, len );
809 VG_(strcpy) ( result, s1 );
810 VG_(strcat) ( result, s2 );
815 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
816 Char *msg, Addr base, SizeT size )
818 Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
819 "sendmsg", msg, VG_AR_CORE );
820 PRE_MEM_READ( outmsg, base, size );
821 VG_(arena_free) ( VG_AR_CORE, outmsg );
825 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
826 Char *msg, Addr base, SizeT size )
828 Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
829 "recvmsg", msg, VG_AR_CORE );
831 PRE_MEM_READ( outmsg, base, size );
833 PRE_MEM_WRITE( outmsg, base, size );
834 VG_(arena_free) ( VG_AR_CORE, outmsg );
838 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
839 Char *fieldName, Addr base, SizeT size )
842 POST_MEM_WRITE( base, size );
846 void msghdr_foreachfield (
849 struct vki_msghdr *msg,
851 void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
859 fieldName = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.mfef", VG_(strlen)(name) + 32 );
861 VG_(sprintf) ( fieldName, "(%s)", name );
863 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
864 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
865 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
866 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
867 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
868 foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
869 foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
871 if ( msg->msg_name ) {
872 VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
873 foreach_func ( tid, False, fieldName,
874 (Addr)msg->msg_name, msg->msg_namelen );
877 if ( msg->msg_iov ) {
878 struct vki_iovec *iov = msg->msg_iov;
881 VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
883 foreach_func ( tid, True, fieldName,
884 (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
886 for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
887 UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
888 VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
889 foreach_func ( tid, False, fieldName,
890 (Addr)iov->iov_base, iov_len );
891 length = length - iov_len;
895 if ( msg->msg_control )
897 VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
898 foreach_func ( tid, False, fieldName,
899 (Addr)msg->msg_control, msg->msg_controllen );
902 VG_(arena_free) ( VG_AR_CORE, fieldName );
905 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
907 struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
910 if (cm->cmsg_level == VKI_SOL_SOCKET &&
911 cm->cmsg_type == VKI_SCM_RIGHTS ) {
912 Int *fds = (Int *) VKI_CMSG_DATA(cm);
913 Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
917 for (i = 0; i < fdc; i++)
918 if(VG_(clo_track_fds))
919 // XXX: must we check the range on these fds with
920 // ML_(fd_allowed)()?
921 ML_(record_fd_open_named)(tid, fds[i]);
924 cm = VKI_CMSG_NXTHDR(msg, cm);
928 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
930 void pre_mem_read_sockaddr ( ThreadId tid,
932 struct vki_sockaddr *sa, UInt salen )
935 struct vki_sockaddr_un* sun = (struct vki_sockaddr_un *)sa;
936 struct vki_sockaddr_in* sin = (struct vki_sockaddr_in *)sa;
937 struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
939 /* NULL/zero-length sockaddrs are legal */
940 if ( sa == NULL || salen == 0 ) return;
942 outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
943 VG_(strlen)( description ) + 30 );
945 VG_(sprintf) ( outmsg, description, "sa_family" );
946 PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
948 switch (sa->sa_family) {
951 VG_(sprintf) ( outmsg, description, "sun_path" );
952 PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
953 // GrP fixme max of sun_len-2? what about nul char?
957 VG_(sprintf) ( outmsg, description, "sin_port" );
958 PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
959 VG_(sprintf) ( outmsg, description, "sin_addr" );
960 PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
964 VG_(sprintf) ( outmsg, description, "sin6_port" );
965 PRE_MEM_READ( outmsg,
966 (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
967 VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
968 PRE_MEM_READ( outmsg,
969 (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
970 VG_(sprintf) ( outmsg, description, "sin6_addr" );
971 PRE_MEM_READ( outmsg,
972 (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
973 VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
974 PRE_MEM_READ( outmsg,
975 (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
979 VG_(sprintf) ( outmsg, description, "" );
980 PRE_MEM_READ( outmsg, (Addr) sa, salen );
984 VG_(arena_free) ( VG_AR_CORE, outmsg );
987 /* Dereference a pointer to a UInt. */
988 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
990 UInt* a_p = (UInt*)a;
991 PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
998 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
999 Char* buf_s, Char* buflen_s )
1001 if (VG_(tdict).track_pre_mem_write) {
1002 UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1003 if (buflen_in > 0) {
1004 VG_(tdict).track_pre_mem_write(
1005 Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1010 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1011 Addr buf_p, Addr buflen_p, Char* s )
1013 if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1014 UInt buflen_out = deref_UInt( tid, buflen_p, s);
1015 if (buflen_out > 0 && buf_p != (Addr)NULL) {
1016 VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1021 /* ---------------------------------------------------------------------
1022 Data seg end, for brk()
1023 ------------------------------------------------------------------ */
1025 /* +--------+------------+
1027 +--------+------------+
1030 | | boundary is page aligned
1031 | VG_(brk_limit) -- no alignment constraint
1032 VG_(brk_base) -- page aligned -- does not move
1034 Both the anon part and the reservation part are always at least
1038 /* Set the new data segment end to NEWBRK. If this succeeds, return
1039 NEWBRK, else return the current data segment end. */
1041 static Addr do_brk ( Addr newbrk )
1043 NSegment const* aseg;
1044 NSegment const* rseg;
1051 VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1052 VG_(brk_base), VG_(brk_limit), newbrk);
1055 if (0) show_segments("in_brk");
1058 if (newbrk < VG_(brk_base))
1059 /* Clearly impossible. */
1062 if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1063 /* shrinking the data segment. Be lazy and don't munmap the
1065 NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1066 if (seg && seg->hasT)
1067 VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1069 /* Since we're being lazy and not unmapping pages, we have to
1070 zero out the area, so that if the area later comes back into
1071 circulation, it will be filled with zeroes, as if it really
1072 had been unmapped and later remapped. Be a bit paranoid and
1073 try hard to ensure we're not going to segfault by doing the
1074 write - check both ends of the range are in the same segment
1075 and that segment is writable. */
1077 /* pre: newbrk < VG_(brk_limit)
1078 => newbrk <= VG_(brk_limit)-1 */
1079 NSegment const * seg2;
1080 vg_assert(newbrk < VG_(brk_limit));
1081 seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1082 if (seg2 && seg == seg2 && seg->hasW)
1083 VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1086 VG_(brk_limit) = newbrk;
1090 /* otherwise we're expanding the brk segment. */
1091 if (VG_(brk_limit) > VG_(brk_base))
1092 aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1094 aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1095 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1097 /* These should be assured by setup_client_dataseg in m_main. */
1100 vg_assert(aseg->kind == SkAnonC);
1101 vg_assert(rseg->kind == SkResvn);
1102 vg_assert(aseg->end+1 == rseg->start);
1104 vg_assert(newbrk >= VG_(brk_base));
1105 if (newbrk <= rseg->start) {
1106 /* still fits within the anon segment. */
1107 VG_(brk_limit) = newbrk;
1111 if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1112 /* request is too large -- the resvn would fall below 1 page,
1113 which isn't allowed. */
1117 newbrkP = VG_PGROUNDUP(newbrk);
1118 vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1119 delta = newbrkP - rseg->start;
1120 vg_assert(delta > 0);
1121 vg_assert(VG_IS_PAGE_ALIGNED(delta));
1123 ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1126 VG_(brk_limit) = newbrk;
1130 return VG_(brk_limit);
1134 /* ---------------------------------------------------------------------
1135 Vet file descriptors for sanity
1136 ------------------------------------------------------------------ */
1138 > - what does the "Bool soft" parameter mean?
1140 (Tom Hughes, 3 Oct 05):
1142 Whether or not to consider a file descriptor invalid if it is above
1143 the current soft limit.
1145 Basically if we are testing whether a newly created file descriptor is
1146 valid (in a post handler) then we set soft to true, and if we are
1147 testing whether a file descriptor that is about to be used (in a pre
1148 handler) is valid [viz, an already-existing fd] then we set it to false.
1150 The point is that if the (virtual) soft limit is lowered then any
1151 existing descriptors can still be read/written/closed etc (so long as
1152 they are below the valgrind reserved descriptors) but no new
1153 descriptors can be created above the new soft limit.
1155 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1158 /* Return true if we're allowed to use or create this fd */
1159 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1161 Bool allowed = True;
1163 /* hard limits always apply */
1164 if (fd < 0 || fd >= VG_(fd_hard_limit))
1167 /* hijacking the output fds is never allowed */
1168 if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1171 /* if creating a new fd (rather than using an existing one), the
1172 soft limit must also be observed */
1173 if (isNewFd && fd >= VG_(fd_soft_limit))
1176 /* this looks like it ought to be included, but causes problems: */
1178 if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1181 /* The difficulty is as follows: consider a program P which expects
1182 to be able to mess with (redirect) its own stderr (fd 2).
1183 Usually to deal with P we would issue command line flags to send
1184 logging somewhere other than stderr, so as not to disrupt P.
1185 The problem is that -d unilaterally hijacks stderr with no
1186 consultation with P. And so, if this check is enabled, P will
1187 work OK normally but fail if -d is issued.
1189 Basically -d is a hack and you take your chances when using it.
1190 It's very useful for low level debugging -- particularly at
1191 startup -- and having its presence change the behaviour of the
1192 client is exactly what we don't want. */
1195 if ((!allowed) && VG_(showing_core_errors)() ) {
1196 VG_(message)(Vg_UserMsg,
1197 "Warning: invalid file descriptor %d in syscall %s()\n",
1199 if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1200 VG_(message)(Vg_UserMsg,
1201 " Use --log-fd=<number> to select an alternative log fd.\n");
1202 if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1203 VG_(message)(Vg_UserMsg,
1204 " Use --xml-fd=<number> to select an alternative XML "
1206 // DDD: consider always printing this stack trace, it's useful.
1207 // Also consider also making this a proper core error, ie.
1208 // suppressible and all that.
1209 if (VG_(clo_verbosity) > 1) {
1210 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1218 /* ---------------------------------------------------------------------
1219 Deal with a bunch of socket-related syscalls
1220 ------------------------------------------------------------------ */
1225 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1226 UWord arg0, UWord arg1,
1227 UWord arg2, UWord arg3 )
1229 /* int socketpair(int d, int type, int protocol, int sv[2]); */
1230 PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1231 arg3, 2*sizeof(int) );
1235 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1237 UWord arg0, UWord arg1,
1238 UWord arg2, UWord arg3 )
1241 Int fd1 = ((Int*)arg3)[0];
1242 Int fd2 = ((Int*)arg3)[1];
1243 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1244 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1245 if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1246 !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1249 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1251 POST_MEM_WRITE( arg3, 2*sizeof(int) );
1252 if (VG_(clo_track_fds)) {
1253 ML_(record_fd_open_nameless)(tid, fd1);
1254 ML_(record_fd_open_nameless)(tid, fd2);
1263 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1266 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1267 if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1268 VG_(close)(sr_Res(res));
1269 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1271 if (VG_(clo_track_fds))
1272 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1280 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1281 UWord arg0, UWord arg1, UWord arg2 )
1283 /* int bind(int sockfd, struct sockaddr *my_addr,
1285 pre_mem_read_sockaddr(
1286 tid, "socketcall.bind(my_addr.%s)",
1287 (struct vki_sockaddr *) arg1, arg2
1294 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1295 UWord arg0, UWord arg1, UWord arg2 )
1297 /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1299 Addr addrlen_p = arg2;
1300 if (addr_p != (Addr)NULL)
1301 ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1302 "socketcall.accept(addr)",
1303 "socketcall.accept(addrlen_in)" );
1307 ML_(generic_POST_sys_accept) ( ThreadId tid,
1309 UWord arg0, UWord arg1, UWord arg2 )
1312 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1313 if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1314 VG_(close)(sr_Res(res));
1315 r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1318 Addr addrlen_p = arg2;
1319 if (addr_p != (Addr)NULL)
1320 ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1321 "socketcall.accept(addrlen_out)" );
1322 if (VG_(clo_track_fds))
1323 ML_(record_fd_open_nameless)(tid, sr_Res(res));
1331 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1332 UWord arg0, UWord arg1, UWord arg2,
1333 UWord arg3, UWord arg4, UWord arg5 )
1335 /* int sendto(int s, const void *msg, int len,
1337 const struct sockaddr *to, int tolen); */
1338 PRE_MEM_READ( "socketcall.sendto(msg)",
1341 pre_mem_read_sockaddr(
1342 tid, "socketcall.sendto(to.%s)",
1343 (struct vki_sockaddr *) arg4, arg5
1350 ML_(generic_PRE_sys_send) ( ThreadId tid,
1351 UWord arg0, UWord arg1, UWord arg2 )
1353 /* int send(int s, const void *msg, size_t len, int flags); */
1354 PRE_MEM_READ( "socketcall.send(msg)",
1363 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1364 UWord arg0, UWord arg1, UWord arg2,
1365 UWord arg3, UWord arg4, UWord arg5 )
1367 /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1368 struct sockaddr *from, int *fromlen); */
1372 Addr fromlen_p = arg5;
1373 PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1374 if (from_p != (Addr)NULL)
1375 ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1376 "socketcall.recvfrom(from)",
1377 "socketcall.recvfrom(fromlen_in)" );
1381 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1383 UWord arg0, UWord arg1, UWord arg2,
1384 UWord arg3, UWord arg4, UWord arg5 )
1389 Addr fromlen_p = arg5;
1391 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1392 if (from_p != (Addr)NULL)
1393 ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1394 "socketcall.recvfrom(fromlen_out)" );
1395 POST_MEM_WRITE( buf_p, len );
1401 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1402 UWord arg0, UWord arg1, UWord arg2 )
1404 /* int recv(int s, void *buf, int len, unsigned int flags); */
1406 The recv call is normally used only on a connected socket
1407 (see connect(2)) and is identical to recvfrom with a NULL
1410 PRE_MEM_WRITE( "socketcall.recv(buf)",
1416 ML_(generic_POST_sys_recv) ( ThreadId tid,
1418 UWord arg0, UWord arg1, UWord arg2 )
1420 if (res >= 0 && arg1 != 0) {
1421 POST_MEM_WRITE( arg1, /* buf */
1429 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1430 UWord arg0, UWord arg1, UWord arg2 )
1432 /* int connect(int sockfd,
1433 struct sockaddr *serv_addr, int addrlen ); */
1434 pre_mem_read_sockaddr( tid,
1435 "socketcall.connect(serv_addr.%s)",
1436 (struct vki_sockaddr *) arg1, arg2);
1442 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1443 UWord arg0, UWord arg1, UWord arg2,
1444 UWord arg3, UWord arg4 )
1446 /* int setsockopt(int s, int level, int optname,
1447 const void *optval, int optlen); */
1448 PRE_MEM_READ( "socketcall.setsockopt(optval)",
1450 arg4 /* optlen */ );
1456 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1457 UWord arg0, UWord arg1, UWord arg2 )
1459 /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1461 Addr namelen_p = arg2;
1462 /* Nb: name_p cannot be NULL */
1463 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1464 "socketcall.getsockname(name)",
1465 "socketcall.getsockname(namelen_in)" );
1469 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1471 UWord arg0, UWord arg1, UWord arg2 )
1474 Addr namelen_p = arg2;
1475 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1476 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1477 "socketcall.getsockname(namelen_out)" );
1483 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1484 UWord arg0, UWord arg1, UWord arg2 )
1486 /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1488 Addr namelen_p = arg2;
1489 /* Nb: name_p cannot be NULL */
1490 ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1491 "socketcall.getpeername(name)",
1492 "socketcall.getpeername(namelen_in)" );
1496 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1498 UWord arg0, UWord arg1, UWord arg2 )
1501 Addr namelen_p = arg2;
1502 vg_assert(!sr_isError(res)); /* guaranteed by caller */
1503 ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1504 "socketcall.getpeername(namelen_out)" );
1510 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
1512 msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg );
1518 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
1520 msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg );
1524 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg, UInt length )
1526 msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg );
1527 check_cmsg_for_fds( tid, msg );
1531 /* ---------------------------------------------------------------------
1532 Deal with a bunch of IPC related syscalls
1533 ------------------------------------------------------------------ */
1538 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1539 UWord arg0, UWord arg1, UWord arg2 )
1541 /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1542 PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1548 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1549 UWord arg0, UWord arg1,
1550 UWord arg2, UWord arg3 )
1552 /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1553 struct timespec *timeout); */
1554 PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1556 PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1562 UInt get_sem_count( Int semid )
1564 struct vki_semid_ds buf;
1565 union vki_semun arg;
1568 /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1569 (experimental) otherwise complains that the use in the return
1570 statement below is uninitialised. */
1576 res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1578 res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1579 VKI_IPC_STAT, (UWord)&arg);
1581 if (sr_isError(res))
1584 return buf.sem_nsems;
1588 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1589 UWord arg0, UWord arg1,
1590 UWord arg2, UWord arg3 )
1592 /* int semctl(int semid, int semnum, int cmd, ...); */
1593 union vki_semun arg = *(union vki_semun *)&arg3;
1595 switch (arg2 /* cmd */) {
1596 #if defined(VKI_IPC_INFO)
1599 case VKI_IPC_INFO|VKI_IPC_64:
1600 case VKI_SEM_INFO|VKI_IPC_64:
1601 PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1602 (Addr)arg.buf, sizeof(struct vki_seminfo) );
1607 #if defined(VKI_SEM_STAT)
1610 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1611 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1614 #if defined(VKI_IPC_64)
1615 case VKI_IPC_STAT|VKI_IPC_64:
1616 #if defined(VKI_SEM_STAT)
1617 case VKI_SEM_STAT|VKI_IPC_64:
1619 PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1620 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1625 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1626 (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1629 #if defined(VKI_IPC_64)
1630 case VKI_IPC_SET|VKI_IPC_64:
1631 PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1632 (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1637 #if defined(VKI_IPC_64)
1638 case VKI_GETALL|VKI_IPC_64:
1640 nsems = get_sem_count( arg0 );
1641 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1642 (Addr)arg.array, sizeof(unsigned short) * nsems );
1646 #if defined(VKI_IPC_64)
1647 case VKI_SETALL|VKI_IPC_64:
1649 nsems = get_sem_count( arg0 );
1650 PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1651 (Addr)arg.array, sizeof(unsigned short) * nsems );
1657 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1659 UWord arg0, UWord arg1,
1660 UWord arg2, UWord arg3 )
1662 union vki_semun arg = *(union vki_semun *)&arg3;
1664 switch (arg2 /* cmd */) {
1665 #if defined(VKI_IPC_INFO)
1668 case VKI_IPC_INFO|VKI_IPC_64:
1669 case VKI_SEM_INFO|VKI_IPC_64:
1670 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1675 #if defined(VKI_SEM_STAT)
1678 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1681 #if defined(VKI_IPC_64)
1682 case VKI_IPC_STAT|VKI_IPC_64:
1683 case VKI_SEM_STAT|VKI_IPC_64:
1684 POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1689 #if defined(VKI_IPC_64)
1690 case VKI_GETALL|VKI_IPC_64:
1692 nsems = get_sem_count( arg0 );
1693 POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1703 SizeT get_shm_size ( Int shmid )
1707 struct vki_shmid64_ds buf;
1708 # ifdef VGP_amd64_linux
1709 /* See bug 222545 comment 7 */
1710 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1711 VKI_IPC_STAT, (UWord)&buf);
1713 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1714 VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1716 # else /* !def VKI_IPC_64 */
1717 struct vki_shmid_ds buf;
1718 SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1719 # endif /* def VKI_IPC_64 */
1721 struct vki_shmid_ds buf;
1722 SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1723 VKI_IPC_STAT, 0, (UWord)&buf);
1725 if (sr_isError(__res))
1728 return (SizeT) buf.shm_segsz;
1732 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1733 UWord arg0, UWord arg1, UWord arg2 )
1735 /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1736 SizeT segmentSize = get_shm_size ( arg0 );
1740 /* arm-linux only: work around the fact that
1741 VG_(am_get_advisory_client_simple) produces something that is
1742 VKI_PAGE_SIZE aligned, whereas what we want is something
1743 VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE. Hence
1744 increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1745 then round the result up to the next VKI_SHMLBA boundary.
1746 See bug 222545 comment 15. So far, arm-linux is the only
1747 platform where this is known to be necessary. */
1748 vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1749 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1750 segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1752 tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1754 if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1755 arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1761 else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1767 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1769 UWord arg0, UWord arg1, UWord arg2 )
1771 SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1772 if ( segmentSize > 0 ) {
1773 UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1776 if (arg2 & VKI_SHM_RDONLY)
1777 prot &= ~VKI_PROT_WRITE;
1778 /* It isn't exactly correct to pass 0 for the fd and offset
1779 here. The kernel seems to think the corresponding section
1780 does have dev/ino numbers:
1782 04e52000-04ec8000 rw-s 00000000 00:06 1966090 /SYSV00000000 (deleted)
1784 However there is no obvious way to find them. In order to
1785 cope with the discrepancy, aspacem's sync checker omits the
1786 dev/ino correspondence check in cases where V does not know
1788 d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1790 /* we don't distinguish whether it's read-only or
1791 * read-write -- it doesn't matter really. */
1792 VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1795 VG_(discard_translations)( (Addr64)res,
1796 (ULong)VG_PGROUNDUP(segmentSize),
1797 "ML_(generic_POST_sys_shmat)" );
1804 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1806 /* int shmdt(const void *shmaddr); */
1807 return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1811 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1813 NSegment const* s = VG_(am_find_nsegment)(arg0);
1816 Addr s_start = s->start;
1817 SizeT s_len = s->end+1 - s->start;
1820 vg_assert(s->kind == SkShmC);
1821 vg_assert(s->start == arg0);
1823 d = VG_(am_notify_munmap)(s_start, s_len);
1824 s = NULL; /* s is now invalid */
1825 VG_TRACK( die_mem_munmap, s_start, s_len );
1827 VG_(discard_translations)( (Addr64)s_start,
1829 "ML_(generic_POST_sys_shmdt)" );
1835 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1836 UWord arg0, UWord arg1, UWord arg2 )
1838 /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1839 switch (arg1 /* cmd */) {
1840 #if defined(VKI_IPC_INFO)
1842 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1843 arg2, sizeof(struct vki_shminfo) );
1845 #if defined(VKI_IPC_64)
1846 case VKI_IPC_INFO|VKI_IPC_64:
1847 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1848 arg2, sizeof(struct vki_shminfo64) );
1853 #if defined(VKI_SHM_INFO)
1855 #if defined(VKI_IPC_64)
1856 case VKI_SHM_INFO|VKI_IPC_64:
1858 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1859 arg2, sizeof(struct vki_shm_info) );
1864 #if defined(VKI_SHM_STAT)
1867 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1868 arg2, sizeof(struct vki_shmid_ds) );
1871 #if defined(VKI_IPC_64)
1872 case VKI_IPC_STAT|VKI_IPC_64:
1873 case VKI_SHM_STAT|VKI_IPC_64:
1874 PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1875 arg2, sizeof(struct vki_shmid64_ds) );
1880 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1881 arg2, sizeof(struct vki_shmid_ds) );
1884 #if defined(VKI_IPC_64)
1885 case VKI_IPC_SET|VKI_IPC_64:
1886 PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1887 arg2, sizeof(struct vki_shmid64_ds) );
1894 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1896 UWord arg0, UWord arg1, UWord arg2 )
1898 switch (arg1 /* cmd */) {
1899 #if defined(VKI_IPC_INFO)
1901 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1903 case VKI_IPC_INFO|VKI_IPC_64:
1904 POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1908 #if defined(VKI_SHM_INFO)
1910 case VKI_SHM_INFO|VKI_IPC_64:
1911 POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1916 #if defined(VKI_SHM_STAT)
1919 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1922 #if defined(VKI_IPC_64)
1923 case VKI_IPC_STAT|VKI_IPC_64:
1924 case VKI_SHM_STAT|VKI_IPC_64:
1925 POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1934 /* ---------------------------------------------------------------------
1935 Generic handler for mmap
1936 ------------------------------------------------------------------ */
1939 * Although mmap is specified by POSIX and the argument are generally
1940 * consistent across platforms the precise details of the low level
1941 * argument passing conventions differ. For example:
1943 * - On x86-linux there is mmap (aka old_mmap) which takes the
1944 * arguments in a memory block and the offset in bytes; and
1945 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
1946 * way and the offset in pages.
1948 * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1949 * arguments in the normal way and the offset in bytes; and
1950 * mmap2 (aka sys_mmap2) which takes the arguments in the normal
1951 * way and the offset in pages.
1953 * - On amd64-linux everything is simple and there is just the one
1954 * call, mmap (aka sys_mmap) which takes the arguments in the
1955 * normal way and the offset in bytes.
1957 * - On s390x-linux there is mmap (aka old_mmap) which takes the
1958 * arguments in a memory block and the offset in bytes. mmap2
1959 * is also available (but not exported via unistd.h) with
1960 * arguments in a memory block and the offset in pages.
1962 * To cope with all this we provide a generic handler function here
1963 * and then each platform implements one or more system call handlers
1964 * which call this generic routine after extracting and normalising
1969 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1970 UWord arg1, UWord arg2, UWord arg3,
1971 UWord arg4, UWord arg5, Off64T arg6 )
1978 #if defined(VGO_darwin)
1979 // Nb: we can't use this on Darwin, it has races:
1980 // * needs to RETRY if advisory succeeds but map fails
1981 // (could have been some other thread in a nonblocking call)
1982 // * needs to not use fixed-position mmap() on Darwin
1983 // (mmap will cheerfully smash whatever's already there, which might
1984 // be a new mapping from some other thread in a nonblocking call)
1985 VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
1989 /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
1990 shall be established. */
1991 return VG_(mk_SysRes_Error)( VKI_EINVAL );
1994 if (!VG_IS_PAGE_ALIGNED(arg1)) {
1995 /* zap any misaligned addresses. */
1996 /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
1997 to fail. Here, we catch them all. */
1998 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2001 if (!VG_IS_PAGE_ALIGNED(arg6)) {
2002 /* zap any misaligned offsets. */
2003 /* SuSV3 says: The off argument is constrained to be aligned and
2004 sized according to the value returned by sysconf() when
2005 passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2006 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2009 /* Figure out what kind of allocation constraints there are
2010 (fixed/hint/any), and ask aspacem what we should do. */
2013 if (arg4 & VKI_MAP_FIXED) {
2014 mreq.rkind = MFixed;
2023 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2025 /* Our request was bounced, so we'd better fail. */
2026 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2029 /* Otherwise we're OK (so far). Install aspacem's choice of
2030 address, and let the mmap go through. */
2031 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2032 arg4 | VKI_MAP_FIXED,
2035 /* A refinement: it may be that the kernel refused aspacem's choice
2036 of address. If we were originally asked for a hinted mapping,
2037 there is still a last chance: try again at any address.
2039 if (mreq.rkind == MHint && sr_isError(sres)) {
2043 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2045 /* Our request was bounced, so we'd better fail. */
2046 return VG_(mk_SysRes_Error)( VKI_EINVAL );
2048 /* and try again with the kernel */
2049 sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2050 arg4 | VKI_MAP_FIXED,
2054 if (!sr_isError(sres)) {
2056 /* Notify aspacem. */
2057 notify_core_of_mmap(
2058 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2061 arg4, /* the original flags value */
2066 di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2067 False/*allow_SkFileV*/, (Int)arg5 );
2068 /* Notify the tool. */
2069 notify_tool_of_mmap(
2070 (Addr)sr_Res(sres), /* addr kernel actually assigned */
2073 di_handle /* so the tool can refer to the read debuginfo later,
2079 if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2080 vg_assert(sr_Res(sres) == arg1);
2086 /* ---------------------------------------------------------------------
2087 The Main Entertainment ... syscall wrappers
2088 ------------------------------------------------------------------ */
2090 /* Note: the PRE() and POST() wrappers are for the actual functions
2091 implementing the system calls in the OS kernel. These mostly have
2092 names like sys_write(); a few have names like old_mmap(). See the
2093 comment for ML_(syscall_table)[] for important info about the __NR_foo
2094 constants and their relationship to the sys_foo() functions.
2096 Some notes about names used for syscalls and args:
2097 - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2100 - For error messages, we generally use a somewhat generic name
2101 for the syscall (eg. "write" rather than "sys_write"). This should be
2102 good enough for the average user to understand what is happening,
2103 without confusing them with names like "sys_write".
2105 - Also, for error messages the arg names are mostly taken from the man
2106 pages (even though many of those man pages are really for glibc
2107 functions of the same name), rather than from the OS kernel source,
2108 for the same reason -- a user presented with a "bogus foo(bar)" arg
2109 will most likely look at the "foo" man page to see which is the "bar"
2112 Note that we use our own vki_* types. The one exception is in
2113 PRE_REG_READn calls, where pointer types haven't been changed, because
2114 they don't need to be -- eg. for "foo*" to be used, the type foo need not
2117 XXX: some of these are arch-specific, and should be factored out.
2120 #define PRE(name) DEFN_PRE_TEMPLATE(generic, name)
2121 #define POST(name) DEFN_POST_TEMPLATE(generic, name)
2123 // Macros to support 64-bit syscall args split into two 32 bit values
2124 #if defined(VG_LITTLEENDIAN)
2125 #define MERGE64(lo,hi) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2126 #define MERGE64_FIRST(name) name##_low
2127 #define MERGE64_SECOND(name) name##_high
2128 #elif defined(VG_BIGENDIAN)
2129 #define MERGE64(hi,lo) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2130 #define MERGE64_FIRST(name) name##_high
2131 #define MERGE64_SECOND(name) name##_low
2133 #error Unknown endianness
2139 /* simple; just make this thread exit */
2140 PRINT("exit( %ld )", ARG1);
2141 PRE_REG_READ1(void, "exit", int, status);
2142 tst = VG_(get_ThreadState)(tid);
2143 /* Set the thread's status to be exiting, then claim that the
2144 syscall succeeded. */
2145 tst->exitreason = VgSrc_ExitThread;
2146 tst->os_state.exitcode = ARG1;
2147 SET_STATUS_Success(0);
2152 PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2153 VG_SYSNUM_STRING(SYSNO));
2154 PRE_REG_READ0(long, "ni_syscall");
2155 SET_STATUS_Failure( VKI_ENOSYS );
2160 PRINT("sys_iopl ( %ld )", ARG1);
2161 PRE_REG_READ1(long, "iopl", unsigned long, level);
2166 *flags |= SfMayBlock;
2167 PRINT("sys_fsync ( %ld )", ARG1);
2168 PRE_REG_READ1(long, "fsync", unsigned int, fd);
2173 *flags |= SfMayBlock;
2174 PRINT("sys_fdatasync ( %ld )", ARG1);
2175 PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2180 *flags |= SfMayBlock;
2181 PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2182 PRE_REG_READ3(long, "msync",
2183 unsigned long, start, vki_size_t, length, int, flags);
2184 PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2187 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2188 // versions of LiS (Linux Streams). They are not part of the kernel.
2189 // Therefore, we have to provide this type ourself, rather than getting it
2190 // from the kernel sources.
2191 struct vki_pmsg_strbuf {
2192 int maxlen; /* no. of bytes in buffer */
2193 int len; /* no. of bytes returned */
2194 vki_caddr_t buf; /* pointer to data */
2198 /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2199 struct vki_pmsg_strbuf *ctrl;
2200 struct vki_pmsg_strbuf *data;
2201 *flags |= SfMayBlock;
2202 PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2203 PRE_REG_READ5(int, "getpmsg",
2204 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2205 int *, bandp, int *, flagsp);
2206 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2207 data = (struct vki_pmsg_strbuf *)ARG3;
2208 if (ctrl && ctrl->maxlen > 0)
2209 PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2210 if (data && data->maxlen > 0)
2211 PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2213 PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2215 PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2219 struct vki_pmsg_strbuf *ctrl;
2220 struct vki_pmsg_strbuf *data;
2222 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2223 data = (struct vki_pmsg_strbuf *)ARG3;
2224 if (RES == 0 && ctrl && ctrl->len > 0) {
2225 POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2227 if (RES == 0 && data && data->len > 0) {
2228 POST_MEM_WRITE( (Addr)data->buf, data->len);
2234 /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2235 struct vki_pmsg_strbuf *ctrl;
2236 struct vki_pmsg_strbuf *data;
2237 *flags |= SfMayBlock;
2238 PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2239 PRE_REG_READ5(int, "putpmsg",
2240 int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2241 int, band, int, flags);
2242 ctrl = (struct vki_pmsg_strbuf *)ARG2;
2243 data = (struct vki_pmsg_strbuf *)ARG3;
2244 if (ctrl && ctrl->len > 0)
2245 PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2246 if (data && data->len > 0)
2247 PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2252 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2253 PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2254 PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2256 PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2257 PRE_timeval_WRITE( "getitimer(&value->it_value)", &(value->it_value));
2262 if (ARG2 != (Addr)NULL) {
2263 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2264 POST_timeval_WRITE( &(value->it_interval) );
2265 POST_timeval_WRITE( &(value->it_value) );
2271 PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2272 PRE_REG_READ3(long, "setitimer",
2274 struct itimerval *, value, struct itimerval *, ovalue);
2275 if (ARG2 != (Addr)NULL) {
2276 struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2277 PRE_timeval_READ( "setitimer(&value->it_interval)",
2278 &(value->it_interval));
2279 PRE_timeval_READ( "setitimer(&value->it_value)",
2280 &(value->it_value));
2282 if (ARG3 != (Addr)NULL) {
2283 struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2284 PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2285 &(ovalue->it_interval));
2286 PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2287 &(ovalue->it_value));
2293 if (ARG3 != (Addr)NULL) {
2294 struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2295 POST_timeval_WRITE( &(ovalue->it_interval) );
2296 POST_timeval_WRITE( &(ovalue->it_value) );
2302 PRINT("sys_chroot ( %#lx )", ARG1);
2303 PRE_REG_READ1(long, "chroot", const char *, path);
2304 PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2309 *flags |= SfMayBlock;
2310 PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2311 PRE_REG_READ3(long, "madvise",
2312 unsigned long, start, vki_size_t, length, int, advice);
2318 // Nb: this is different to the glibc version described in the man pages,
2319 // which lacks the fifth 'new_address' argument.
2320 if (ARG4 & VKI_MREMAP_FIXED) {
2321 PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2322 ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2323 PRE_REG_READ5(unsigned long, "mremap",
2324 unsigned long, old_addr, unsigned long, old_size,
2325 unsigned long, new_size, unsigned long, flags,
2326 unsigned long, new_addr);
2328 PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2329 ARG1, (ULong)ARG2, ARG3, ARG4);
2330 PRE_REG_READ4(unsigned long, "mremap",
2331 unsigned long, old_addr, unsigned long, old_size,
2332 unsigned long, new_size, unsigned long, flags);
2334 SET_STATUS_from_SysRes(
2335 do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2338 #endif /* HAVE_MREMAP */
2342 PRINT("sys_nice ( %ld )", ARG1);
2343 PRE_REG_READ1(long, "nice", int, inc);
2348 *flags |= SfMayBlock;
2349 PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2350 PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2355 *flags |= SfMayBlock;
2356 PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2357 PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2362 *flags |= SfMayBlock;
2363 PRINT("sys_mlockall ( %lx )", ARG1);
2364 PRE_REG_READ1(long, "mlockall", int, flags);
2367 PRE(sys_setpriority)
2369 PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2370 PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2373 PRE(sys_getpriority)
2375 PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2376 PRE_REG_READ2(long, "getpriority", int, which, int, who);
2381 *flags |= SfMayBlock;
2382 #if VG_WORDSIZE == 4
2383 PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2384 ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2385 PRE_REG_READ5(ssize_t, "pwrite64",
2386 unsigned int, fd, const char *, buf, vki_size_t, count,
2387 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2388 #elif VG_WORDSIZE == 8
2389 PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2390 ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2391 PRE_REG_READ4(ssize_t, "pwrite64",
2392 unsigned int, fd, const char *, buf, vki_size_t, count,
2395 # error Unexpected word size
2397 PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2402 *flags |= SfMayBlock;
2403 PRINT("sys_sync ( )");
2404 PRE_REG_READ0(long, "sync");
2409 FUSE_COMPATIBLE_MAY_BLOCK();
2410 PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2411 PRE_REG_READ2(long, "fstatfs",
2412 unsigned int, fd, struct statfs *, buf);
2413 PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2418 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2423 FUSE_COMPATIBLE_MAY_BLOCK();
2424 PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2425 PRE_REG_READ3(long, "fstatfs64",
2426 unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2427 PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2431 POST_MEM_WRITE( ARG3, ARG2 );
2436 PRINT("sys_getsid ( %ld )", ARG1);
2437 PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2442 *flags |= SfMayBlock;
2443 #if VG_WORDSIZE == 4
2444 PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2445 ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2446 PRE_REG_READ5(ssize_t, "pread64",
2447 unsigned int, fd, char *, buf, vki_size_t, count,
2448 vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2449 #elif VG_WORDSIZE == 8
2450 PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2451 ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2452 PRE_REG_READ4(ssize_t, "pread64",
2453 unsigned int, fd, char *, buf, vki_size_t, count,
2456 # error Unexpected word size
2458 PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2464 POST_MEM_WRITE( ARG2, RES );
2470 FUSE_COMPATIBLE_MAY_BLOCK();
2471 PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2472 PRE_REG_READ3(long, "mknod",
2473 const char *, pathname, int, mode, unsigned, dev);
2474 PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2479 *flags |= SfMayBlock;
2480 PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2481 PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2484 // Pre_read a char** argument.
2485 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2489 Addr* a_p = (Addr*)a;
2490 PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2494 PRE_MEM_RASCIIZ( s2, a_deref );
2499 static Bool i_am_the_only_thread ( void )
2501 Int c = VG_(count_living_threads)();
2502 vg_assert(c >= 1); /* stay sane */
2506 /* Wait until all other threads disappear. */
2507 void VG_(reap_threads)(ThreadId self)
2509 while (!i_am_the_only_thread()) {
2510 /* Let other thread(s) run */
2512 VG_(poll_signals)(self);
2514 vg_assert(i_am_the_only_thread());
2517 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2518 // but it seems to work nonetheless...
2521 Char* path = NULL; /* path to executable */
2525 Char* launcher_basename = NULL;
2529 Bool setuid_allowed, trace_this_child;
2531 PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2532 PRE_REG_READ3(vki_off_t, "execve",
2533 char *, filename, char **, argv, char **, envp);
2534 PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2536 pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2538 pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2540 vg_assert(VG_(is_valid_tid)(tid));
2541 tst = VG_(get_ThreadState)(tid);
2543 /* Erk. If the exec fails, then the following will have made a
2544 mess of things which makes it hard for us to continue. The
2545 right thing to do is piece everything together again in
2546 POST(execve), but that's close to impossible. Instead, we make
2547 an effort to check that the execve will work before actually
2550 /* Check that the name at least begins in client-accessible storage. */
2551 if (ARG1 == 0 /* obviously bogus */
2552 || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2553 SET_STATUS_Failure( VKI_EFAULT );
2557 // debug-only printing
2559 VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2561 VG_(printf)("ARG2 = ");
2563 HChar** vec = (HChar**)ARG2;
2564 for (q = 0; vec[q]; q++)
2565 VG_(printf)("%p(%s) ", vec[q], vec[q]);
2568 VG_(printf)("ARG2 = null\n");
2572 // Decide whether or not we want to follow along
2573 { // Make 'child_argv' be a pointer to the child's arg vector
2574 // (skipping the exe name)
2575 HChar** child_argv = (HChar**)ARG2;
2576 if (child_argv && child_argv[0] == NULL)
2578 trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2581 // Do the important checks: it is a file, is executable, permissions are
2582 // ok, etc. We allow setuid executables to run only in the case when
2583 // we are not simulating them, that is, they to be run natively.
2584 setuid_allowed = trace_this_child ? False : True;
2585 res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2586 if (sr_isError(res)) {
2587 SET_STATUS_Failure( sr_Err(res) );
2591 /* If we're tracing the child, and the launcher name looks bogus
2592 (possibly because launcher.c couldn't figure it out, see
2593 comments therein) then we have no option but to fail. */
2594 if (trace_this_child
2595 && (VG_(name_of_launcher) == NULL
2596 || VG_(name_of_launcher)[0] != '/')) {
2597 SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2601 /* After this point, we can't recover if the execve fails. */
2602 VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2605 // Terminate gdbserver if it is active.
2606 if (VG_(clo_vgdb) != Vg_VgdbNo) {
2607 // If the child will not be traced, we need to terminate gdbserver
2608 // to cleanup the gdbserver resources (e.g. the FIFO files).
2609 // If child will be traced, we also terminate gdbserver: the new
2610 // Valgrind will start a fresh gdbserver after exec.
2614 /* Resistance is futile. Nuke all other threads. POSIX mandates
2615 this. (Really, nuke them all, since the new process will make
2616 its own new thread.) */
2617 VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2618 VG_(reap_threads)(tid);
2620 // Set up the child's exe path.
2622 if (trace_this_child) {
2624 // We want to exec the launcher. Get its pre-remembered path.
2625 path = VG_(name_of_launcher);
2626 // VG_(name_of_launcher) should have been acquired by m_main at
2630 launcher_basename = VG_(strrchr)(path, '/');
2631 if (launcher_basename == NULL || launcher_basename[1] == 0) {
2632 launcher_basename = path; // hmm, tres dubious
2634 launcher_basename++;
2641 // Set up the child's environment.
2643 // Remove the valgrind-specific stuff from the environment so the
2644 // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2645 // This is done unconditionally, since if we are tracing the child,
2646 // the child valgrind will set up the appropriate client environment.
2647 // Nb: we make a copy of the environment before trying to mangle it
2648 // as it might be in read-only memory (this was bug #101881).
2650 // Then, if tracing the child, set VALGRIND_LIB for it.
2655 envp = VG_(env_clone)( (Char**)ARG3 );
2656 if (envp == NULL) goto hosed;
2657 VG_(env_remove_valgrind_env_stuff)( envp );
2660 if (trace_this_child) {
2661 // Set VALGRIND_LIB in ARG3 (the environment)
2662 VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2665 // Set up the child's args. If not tracing it, they are
2666 // simply ARG2. Otherwise, they are
2668 // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2670 // except that the first VG_(args_for_valgrind_noexecpass) args
2673 if (!trace_this_child) {
2674 argv = (Char**)ARG2;
2676 vg_assert( VG_(args_for_valgrind) );
2677 vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2678 vg_assert( VG_(args_for_valgrind_noexecpass)
2679 <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2680 /* how many args in total will there be? */
2681 // launcher basename
2684 tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2685 tot_args -= VG_(args_for_valgrind_noexecpass);
2686 // name of client exe
2688 // args for client exe, skipping [0]
2689 arg2copy = (Char**)ARG2;
2690 if (arg2copy && arg2copy[0]) {
2691 for (i = 1; arg2copy[i]; i++)
2695 argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2696 (tot_args+1) * sizeof(HChar*) );
2697 if (argv == 0) goto hosed;
2700 argv[j++] = launcher_basename;
2701 for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2702 if (i < VG_(args_for_valgrind_noexecpass))
2704 argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2706 argv[j++] = (Char*)ARG1;
2707 if (arg2copy && arg2copy[0])
2708 for (i = 1; arg2copy[i]; i++)
2709 argv[j++] = arg2copy[i];
2712 vg_assert(j == tot_args+1);
2715 /* restore the DATA rlimit for the child */
2716 VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2719 Set the signal state up for exec.
2721 We need to set the real signal state to make sure the exec'd
2722 process gets SIG_IGN properly.
2724 Also set our real sigmask to match the client's sigmask so that
2725 the exec'd child will get the right mask. First we need to
2726 clear out any pending signals so they they don't get delivered,
2727 which would confuse things.
2729 XXX This is a bug - the signals should remain pending, and be
2730 delivered to the new process after exec. There's also a
2731 race-condition, since if someone delivers us a signal between
2732 the sigprocmask and the execve, we'll still get the signal. Oh
2736 vki_sigset_t allsigs;
2739 /* What this loop does: it queries SCSS (the signal state that
2740 the client _thinks_ the kernel is in) by calling
2741 VG_(do_sys_sigaction), and modifies the real kernel signal
2742 state accordingly. */
2743 for (i = 1; i < VG_(max_signal); i++) {
2744 vki_sigaction_fromK_t sa_f;
2745 vki_sigaction_toK_t sa_t;
2746 VG_(do_sys_sigaction)(i, NULL, &sa_f);
2747 VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2748 if (sa_t.ksa_handler == VKI_SIG_IGN)
2749 VG_(sigaction)(i, &sa_t, NULL);
2751 sa_t.ksa_handler = VKI_SIG_DFL;
2752 VG_(sigaction)(i, &sa_t, NULL);
2756 VG_(sigfillset)(&allsigs);
2757 while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2760 VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2765 VG_(printf)("exec: %s\n", path);
2766 for (cpp = argv; cpp && *cpp; cpp++)
2767 VG_(printf)("argv: %s\n", *cpp);
2769 for (cpp = envp; cpp && *cpp; cpp++)
2770 VG_(printf)("env: %s\n", *cpp);
2773 SET_STATUS_from_SysRes(
2774 VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2777 /* If we got here, then the execve failed. We've already made way
2778 too much of a mess to continue, so we have to abort. */
2781 VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2782 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2783 VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2784 "execve() failing, so I'm dying.\n");
2785 VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2786 "or work out how to recover.\n");
2792 PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2793 PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2794 PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2799 PRINT("sys_alarm ( %ld )", ARG1);
2800 PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2805 Addr brk_limit = VG_(brk_limit);
2808 /* libc says: int brk(void *end_data_segment);
2809 kernel says: void* brk(void* end_data_segment); (more or less)
2811 libc returns 0 on success, and -1 (and sets errno) on failure.
2812 Nb: if you ask to shrink the dataseg end below what it
2813 currently is, that always succeeds, even if the dataseg end
2814 doesn't actually change (eg. brk(0)). Unless it seg faults.
2816 Kernel returns the new dataseg end. If the brk() failed, this
2817 will be unchanged from the old one. That's why calling (kernel)
2818 brk(0) gives the current dataseg end (libc brk() just returns
2821 Both will seg fault if you shrink it back into a text segment.
2823 PRINT("sys_brk ( %#lx )", ARG1);
2824 PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2826 brk_new = do_brk(ARG1);
2827 SET_STATUS_Success( brk_new );
2829 if (brk_new == ARG1) {
2830 /* brk() succeeded */
2831 if (brk_new < brk_limit) {
2832 /* successfully shrunk the data segment. */
2833 VG_TRACK( die_mem_brk, (Addr)ARG1,
2836 if (brk_new > brk_limit) {
2837 /* successfully grew the data segment */
2838 VG_TRACK( new_mem_brk, brk_limit,
2839 ARG1-brk_limit, tid );
2843 vg_assert(brk_limit == brk_new);
2849 FUSE_COMPATIBLE_MAY_BLOCK();
2850 PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2851 PRE_REG_READ1(long, "chdir", const char *, path);
2852 PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2857 FUSE_COMPATIBLE_MAY_BLOCK();
2858 PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2859 PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2860 PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2865 FUSE_COMPATIBLE_MAY_BLOCK();
2866 PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2867 PRE_REG_READ3(long, "chown",
2868 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2869 PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2874 FUSE_COMPATIBLE_MAY_BLOCK();
2875 PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2876 PRE_REG_READ3(long, "lchown",
2877 const char *, path, vki_uid_t, owner, vki_gid_t, group);
2878 PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2883 FUSE_COMPATIBLE_MAY_BLOCK();
2884 PRINT("sys_close ( %ld )", ARG1);
2885 PRE_REG_READ1(long, "close", unsigned int, fd);
2887 /* Detect and negate attempts by the client to close Valgrind's log fd */
2888 if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2889 /* If doing -d style logging (which is to fd=2), don't
2890 allow that to be closed either. */
2891 || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2892 SET_STATUS_Failure( VKI_EBADF );
2897 if (VG_(clo_track_fds)) record_fd_close(ARG1);
2902 PRINT("sys_dup ( %ld )", ARG1);
2903 PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2909 if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2911 SET_STATUS_Failure( VKI_EMFILE );
2913 if (VG_(clo_track_fds))
2914 ML_(record_fd_open_named)(tid, RES);
2920 PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2921 PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2922 if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2923 SET_STATUS_Failure( VKI_EBADF );
2929 if (VG_(clo_track_fds))
2930 ML_(record_fd_open_named)(tid, RES);
2935 FUSE_COMPATIBLE_MAY_BLOCK();
2936 PRINT("sys_fchdir ( %ld )", ARG1);
2937 PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2942 FUSE_COMPATIBLE_MAY_BLOCK();
2943 PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2944 PRE_REG_READ3(long, "fchown",
2945 unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2950 FUSE_COMPATIBLE_MAY_BLOCK();
2951 PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2952 PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2957 FUSE_COMPATIBLE_MAY_BLOCK();
2958 PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2959 PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2960 PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2965 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2968 static vki_sigset_t fork_saved_mask;
2970 // In Linux, the sys_fork() function varies across architectures, but we
2971 // ignore the various args it gets, and so it looks arch-neutral. Hmm.
2978 PRINT("sys_fork ( )");
2979 PRE_REG_READ0(long, "fork");
2981 /* Block all signals during fork, so that we can fix things up in
2982 the child without being interrupted. */
2983 VG_(sigfillset)(&mask);
2984 VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
2986 SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
2988 if (!SUCCESS) return;
2990 #if defined(VGO_linux)
2991 // RES is 0 for child, non-0 (the child's PID) for parent.
2992 is_child = ( RES == 0 ? True : False );
2993 child_pid = ( is_child ? -1 : RES );
2994 #elif defined(VGO_darwin)
2995 // RES is the child's pid. RESHI is 1 for child, 0 for parent.
3002 VG_(do_atfork_pre)(tid);
3005 VG_(do_atfork_child)(tid);
3007 /* restore signal mask */
3008 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3010 /* If --child-silent-after-fork=yes was specified, set the
3011 output file descriptors to 'impossible' values. This is
3012 noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3013 duly stops writing any further output. */
3014 if (VG_(clo_child_silent_after_fork)) {
3015 if (!VG_(log_output_sink).is_socket)
3016 VG_(log_output_sink).fd = -1;
3017 if (!VG_(xml_output_sink).is_socket)
3018 VG_(xml_output_sink).fd = -1;
3022 VG_(do_atfork_parent)(tid);
3024 PRINT(" fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3026 /* restore signal mask */
3027 VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3033 *flags |= SfMayBlock;
3034 PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3035 PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3040 *flags |= SfMayBlock;
3041 PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3042 PRE_REG_READ2(long, "truncate",
3043 const char *, path, unsigned long, length);
3044 PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3047 PRE(sys_ftruncate64)
3049 *flags |= SfMayBlock;
3050 #if VG_WORDSIZE == 4
3051 PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3052 PRE_REG_READ3(long, "ftruncate64",
3054 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3056 PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3057 PRE_REG_READ2(long, "ftruncate64",
3058 unsigned int,fd, UWord,length);
3064 *flags |= SfMayBlock;
3065 #if VG_WORDSIZE == 4
3066 PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3067 PRE_REG_READ3(long, "truncate64",
3069 UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3071 PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3072 PRE_REG_READ2(long, "truncate64",
3073 const char *,path, UWord,length);
3075 PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3080 *flags |= SfMayBlock;
3081 PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3082 PRE_REG_READ3(long, "getdents",
3083 unsigned int, fd, struct linux_dirent *, dirp,
3084 unsigned int, count);
3085 PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3092 POST_MEM_WRITE( ARG2, RES );
3097 *flags |= SfMayBlock;
3098 PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3099 PRE_REG_READ3(long, "getdents64",
3100 unsigned int, fd, struct linux_dirent64 *, dirp,
3101 unsigned int, count);
3102 PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3105 POST(sys_getdents64)
3109 POST_MEM_WRITE( ARG2, RES );
3114 PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3115 PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3117 PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3123 if (ARG1 > 0 && RES > 0)
3124 POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3129 // Comment from linux/fs/dcache.c:
3130 // NOTE! The user-level library version returns a character pointer.
3131 // The kernel system call just returns the length of the buffer filled
3132 // (which includes the ending '\0' character), or a negative error
3134 // Is this Linux-specific? If so it should be moved to syswrap-linux.c.
3135 PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3136 PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3137 PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3143 if (RES != (Addr)NULL)
3144 POST_MEM_WRITE( ARG1, RES );
3149 PRINT("sys_geteuid ( )");
3150 PRE_REG_READ0(long, "geteuid");
3155 PRINT("sys_getegid ( )");
3156 PRE_REG_READ0(long, "getegid");
3161 PRINT("sys_getgid ( )");
3162 PRE_REG_READ0(long, "getgid");
3167 PRINT("sys_getpid ()");
3168 PRE_REG_READ0(long, "getpid");
3173 PRINT("sys_getpgid ( %ld )", ARG1);
3174 PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3179 PRINT("sys_getpgrp ()");
3180 PRE_REG_READ0(long, "getpgrp");
3185 PRINT("sys_getppid ()");
3186 PRE_REG_READ0(long, "getppid");
3189 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3191 POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3193 #ifdef _RLIMIT_POSIX_FLAG
3194 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3195 // Unset it here to make the switch case below work correctly.
3196 a1 &= ~_RLIMIT_POSIX_FLAG;
3200 case VKI_RLIMIT_NOFILE:
3201 ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3202 ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3205 case VKI_RLIMIT_DATA:
3206 *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3209 case VKI_RLIMIT_STACK:
3210 *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3215 PRE(sys_old_getrlimit)
3217 PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3218 PRE_REG_READ2(long, "old_getrlimit",
3219 unsigned int, resource, struct rlimit *, rlim);
3220 PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3223 POST(sys_old_getrlimit)
3225 common_post_getrlimit(tid, ARG1, ARG2);
3230 PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3231 PRE_REG_READ2(long, "getrlimit",
3232 unsigned int, resource, struct rlimit *, rlim);
3233 PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3238 common_post_getrlimit(tid, ARG1, ARG2);
3243 PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3244 PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3245 PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3252 POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3255 PRE(sys_gettimeofday)
3257 PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3258 PRE_REG_READ2(long, "gettimeofday",
3259 struct timeval *, tv, struct timezone *, tz);
3260 // GrP fixme does darwin write to *tz anymore?
3262 PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3264 PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3267 POST(sys_gettimeofday)
3272 POST_timeval_WRITE( ARG1 );
3274 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3278 PRE(sys_settimeofday)
3280 PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3281 PRE_REG_READ2(long, "settimeofday",
3282 struct timeval *, tv, struct timezone *, tz);
3284 PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3286 PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3287 /* maybe should warn if tz->tz_dsttime is non-zero? */
3293 PRINT("sys_getuid ( )");
3294 PRE_REG_READ0(long, "getuid");
3297 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3299 /* We don't have any specific information on it, so
3300 try to do something reasonable based on direction and
3301 size bits. The encoding scheme is described in
3302 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3304 According to Simon Hausmann, _IOC_READ means the kernel
3305 writes a value to the ioctl value passed from the user
3306 space and the other way around with _IOC_WRITE. */
3308 UInt dir = _VKI_IOC_DIR(request);
3309 UInt size = _VKI_IOC_SIZE(request);
3310 if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3312 * Be very lax about ioctl handling; the only
3313 * assumption is that the size is correct. Doesn't
3314 * require the full buffer to be initialized when
3315 * writing. Without this, using some device
3316 * drivers with a large number of strange ioctl
3317 * commands becomes very tiresome.
3319 } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3320 //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3321 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3322 static Int moans = 3;
3323 if (moans > 0 && !VG_(clo_xml)) {
3325 VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3326 " with no size/direction hints\n", request);
3327 VG_(umsg)(" This could cause spurious value errors to appear.\n");
3328 VG_(umsg)(" See README_MISSING_SYSCALL_OR_IOCTL for "
3329 "guidance on writing a proper wrapper.\n" );
3332 //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3333 //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3334 if ((dir & _VKI_IOC_WRITE) && size > 0)
3335 PRE_MEM_READ( "ioctl(generic)", arg, size);
3336 if ((dir & _VKI_IOC_READ) && size > 0)
3337 PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3341 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3343 /* We don't have any specific information on it, so
3344 try to do something reasonable based on direction and
3345 size bits. The encoding scheme is described in
3346 /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3348 According to Simon Hausmann, _IOC_READ means the kernel
3349 writes a value to the ioctl value passed from the user
3350 space and the other way around with _IOC_WRITE. */
3352 UInt dir = _VKI_IOC_DIR(request);
3353 UInt size = _VKI_IOC_SIZE(request);
3354 if (size > 0 && (dir & _VKI_IOC_READ)
3356 && arg != (Addr)NULL)
3358 POST_MEM_WRITE(arg, size);
3363 If we're sending a SIGKILL to one of our own threads, then simulate
3364 it rather than really sending the signal, so that the target thread
3365 gets a chance to clean up. Returns True if we did the killing (or
3366 no killing is necessary), and False if the caller should use the
3367 normal kill syscall.
3369 "pid" is any pid argument which can be passed to kill; group kills
3370 (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3371 they'll most likely hit all the threads and we won't need to worry
3372 about cleanup. In truth, we can't fully emulate these multicast
3375 "tgid" is a thread group id. If it is not -1, then the target
3376 thread must be in that thread group.
3378 Bool ML_(do_sigkill)(Int pid, Int tgid)
3386 tid = VG_(lwpid_to_vgtid)(pid);
3387 if (tid == VG_INVALID_THREADID)
3388 return False; /* none of our threads */
3390 tst = VG_(get_ThreadState)(tid);
3391 if (tst == NULL || tst->status == VgTs_Empty)
3392 return False; /* hm, shouldn't happen */
3394 if (tgid != -1 && tst->os_state.threadgroup != tgid)
3395 return False; /* not the right thread group */
3397 /* Check to see that the target isn't already exiting. */
3398 if (!VG_(is_exiting)(tid)) {
3399 if (VG_(clo_trace_signals))
3400 VG_(message)(Vg_DebugMsg,
3401 "Thread %d being killed with SIGKILL\n",
3404 tst->exitreason = VgSrc_FatalSig;
3405 tst->os_state.fatalsig = VKI_SIGKILL;
3407 if (!VG_(is_running_thread)(tid))
3408 VG_(get_thread_out_of_syscall)(tid);
3416 PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3417 PRE_REG_READ2(long, "kill", int, pid, int, sig);
3418 if (!ML_(client_signal_OK)(ARG2)) {
3419 SET_STATUS_Failure( VKI_EINVAL );
3423 /* If we're sending SIGKILL, check to see if the target is one of
3424 our threads and handle it specially. */
3425 if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3426 SET_STATUS_Success(0);
3428 /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3429 affecting how posix-compliant the call is. I guess it is
3430 harmless to pass the 3rd arg on other platforms; hence pass
3432 SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3434 if (VG_(clo_trace_signals))
3435 VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3438 /* This kill might have given us a pending signal. Ask for a check once
3439 the syscall is done. */
3440 *flags |= SfPollAfter;
3445 *flags |= SfMayBlock;
3446 PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3447 PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3448 PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3449 PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3454 PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3455 PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3456 PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3457 PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3463 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3468 *flags |= SfMayBlock;
3469 PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3470 PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3471 PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3476 PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3477 PRE_REG_READ3(long, "mprotect",
3478 unsigned long, addr, vki_size_t, len, unsigned long, prot);
3480 if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3481 SET_STATUS_Failure( VKI_ENOMEM );
3483 #if defined(VKI_PROT_GROWSDOWN)
3485 if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3486 /* Deal with mprotects on growable stack areas.
3488 The critical files to understand all this are mm/mprotect.c
3489 in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3492 The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3493 round the start/end address of mprotect to the start/end of
3494 the underlying vma and glibc uses that as an easy way to
3495 change the protection of the stack by calling mprotect on the
3496 last page of the stack with PROT_GROWSDOWN set.
3498 The sanity check provided by the kernel is that the vma must
3499 have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate. */
3500 UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3501 NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3502 NSegment const *rseg;
3506 if (grows == VKI_PROT_GROWSDOWN) {
3507 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3509 rseg->kind == SkResvn &&
3510 rseg->smode == SmUpper &&
3511 rseg->end+1 == aseg->start) {
3512 Addr end = ARG1 + ARG2;
3514 ARG2 = end - aseg->start;
3515 ARG3 &= ~VKI_PROT_GROWSDOWN;
3517 SET_STATUS_Failure( VKI_EINVAL );
3519 } else if (grows == VKI_PROT_GROWSUP) {
3520 rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3522 rseg->kind == SkResvn &&
3523 rseg->smode == SmLower &&
3524 aseg->end+1 == rseg->start) {
3525 ARG2 = aseg->end - ARG1 + 1;
3526 ARG3 &= ~VKI_PROT_GROWSUP;
3528 SET_STATUS_Failure( VKI_EINVAL );
3531 /* both GROWSUP and GROWSDOWN */
3532 SET_STATUS_Failure( VKI_EINVAL );
3535 #endif // defined(VKI_PROT_GROWSDOWN)
3544 ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3549 if (0) VG_(printf)(" munmap( %#lx )\n", ARG1);
3550 PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3551 PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3553 if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3554 SET_STATUS_Failure( VKI_EINVAL );
3562 ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3567 PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3568 PRE_REG_READ3(long, "mincore",
3569 unsigned long, start, vki_size_t, length,
3570 unsigned char *, vec);
3571 PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3575 POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3580 *flags |= SfMayBlock|SfPostOnFail;
3581 PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3582 PRE_REG_READ2(long, "nanosleep",
3583 struct timespec *, req, struct timespec *, rem);
3584 PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3586 PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3591 vg_assert(SUCCESS || FAILURE);
3592 if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3593 POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3598 if (ARG2 & VKI_O_CREAT) {
3600 PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3601 PRE_REG_READ3(long, "open",
3602 const char *, filename, int, flags, int, mode);
3605 PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3606 PRE_REG_READ2(long, "open",
3607 const char *, filename, int, flags);
3609 PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3611 #if defined(VGO_linux)
3612 /* Handle the case where the open is of /proc/self/cmdline or
3613 /proc/<pid>/cmdline, and just give it a copy of the fd for the
3614 fake file we cooked up at startup (in m_main). Also, seek the
3615 cloned fd back to the start. */
3618 Char* arg1s = (Char*) ARG1;
3621 VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3622 if (ML_(safe_to_deref)( arg1s, 1 ) &&
3623 (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3626 sres = VG_(dup)( VG_(cl_cmdline_fd) );
3627 SET_STATUS_from_SysRes( sres );
3628 if (!sr_isError(sres)) {
3629 OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3631 SET_STATUS_Failure( VKI_EMFILE );
3637 /* Handle the case where the open is of /proc/self/auxv or
3638 /proc/<pid>/auxv, and just give it a copy of the fd for the
3639 fake file we cooked up at startup (in m_main). Also, seek the
3640 cloned fd back to the start. */
3643 Char* arg1s = (Char*) ARG1;
3646 VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
3647 if (ML_(safe_to_deref)( arg1s, 1 ) &&
3648 (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/auxv"))
3651 sres = VG_(dup)( VG_(cl_auxv_fd) );
3652 SET_STATUS_from_SysRes( sres );
3653 if (!sr_isError(sres)) {
3654 OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3656 SET_STATUS_Failure( VKI_EMFILE );
3661 #endif // defined(VGO_linux)
3663 /* Otherwise handle normally */
3664 *flags |= SfMayBlock;
3670 if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3672 SET_STATUS_Failure( VKI_EMFILE );
3674 if (VG_(clo_track_fds))
3675 ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3681 *flags |= SfMayBlock;
3682 PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3683 PRE_REG_READ3(ssize_t, "read",
3684 unsigned int, fd, char *, buf, vki_size_t, count);
3686 if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3687 SET_STATUS_Failure( VKI_EBADF );
3689 PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3695 POST_MEM_WRITE( ARG2, RES );
3701 *flags |= SfMayBlock;
3702 PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3703 PRE_REG_READ3(ssize_t, "write",
3704 unsigned int, fd, const char *, buf, vki_size_t, count);
3705 /* check to see if it is allowed. If not, try for an exemption from
3706 --sim-hints=enable-outer (used for self hosting). */
3707 ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3708 if (!ok && ARG1 == 2/*stderr*/
3709 && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3712 SET_STATUS_Failure( VKI_EBADF );
3714 PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3719 *flags |= SfMayBlock;
3720 PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3721 PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3722 PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3728 if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3730 SET_STATUS_Failure( VKI_EMFILE );
3732 if (VG_(clo_track_fds))
3733 ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3740 int fd; -- file descriptor
3741 short events; -- requested events
3742 short revents; -- returned events
3744 int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3747 struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3748 *flags |= SfMayBlock;
3749 PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3750 PRE_REG_READ3(long, "poll",
3751 struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3753 for (i = 0; i < ARG2; i++) {
3754 PRE_MEM_READ( "poll(ufds.fd)",
3755 (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3756 PRE_MEM_READ( "poll(ufds.events)",
3757 (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3758 PRE_MEM_WRITE( "poll(ufds.reventss)",
3759 (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3767 struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3768 for (i = 0; i < ARG2; i++)
3769 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3775 FUSE_COMPATIBLE_MAY_BLOCK();
3778 PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3779 PRE_REG_READ3(long, "readlink",
3780 const char *, path, char *, buf, int, bufsiz);
3781 PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3782 PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3785 #if defined(VGO_linux)
3787 * Handle the case where readlink is looking at /proc/self/exe or
3791 Char* arg1s = (Char*) ARG1;
3792 VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3793 if (ML_(safe_to_deref)(arg1s, 1) &&
3794 (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3797 VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3798 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3801 #endif // defined(VGO_linux)
3804 SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3808 if (SUCCESS && RES > 0)
3809 POST_MEM_WRITE( ARG2, RES );
3815 struct vki_iovec * vec;
3816 *flags |= SfMayBlock;
3817 PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3818 PRE_REG_READ3(ssize_t, "readv",
3819 unsigned long, fd, const struct iovec *, vector,
3820 unsigned long, count);
3821 if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3822 SET_STATUS_Failure( VKI_EBADF );
3824 PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3827 /* ToDo: don't do any of the following if the vector is invalid */
3828 vec = (struct vki_iovec *)ARG2;
3829 for (i = 0; i < (Int)ARG3; i++)
3830 PRE_MEM_WRITE( "readv(vector[...])",
3831 (Addr)vec[i].iov_base, vec[i].iov_len );
3841 struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3844 /* RES holds the number of bytes read. */
3845 for (i = 0; i < (Int)ARG3; i++) {
3846 Int nReadThisBuf = vec[i].iov_len;
3847 if (nReadThisBuf > remains) nReadThisBuf = remains;
3848 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3849 remains -= nReadThisBuf;
3850 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3857 FUSE_COMPATIBLE_MAY_BLOCK();
3858 PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3859 PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3860 PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3861 PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3866 *flags |= SfMayBlock;
3867 PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3868 PRE_REG_READ1(long, "rmdir", const char *, pathname);
3869 PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3874 *flags |= SfMayBlock;
3875 PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3876 PRE_REG_READ5(long, "select",
3877 int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3878 vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3879 // XXX: this possibly understates how much memory is read.
3881 PRE_MEM_READ( "select(readfds)",
3882 ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3884 PRE_MEM_READ( "select(writefds)",
3885 ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3887 PRE_MEM_READ( "select(exceptfds)",
3888 ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3890 PRE_timeval_READ( "select(timeout)", ARG5 );
3895 PRINT("sys_setgid ( %ld )", ARG1);
3896 PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3901 PRINT("sys_setsid ( )");
3902 PRE_REG_READ0(long, "setsid");
3907 PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3908 PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3910 PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3915 PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3916 PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3921 PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3922 PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3927 PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3928 PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3934 PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3935 PRE_REG_READ2(long, "setrlimit",
3936 unsigned int, resource, struct rlimit *, rlim);
3937 PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3939 #ifdef _RLIMIT_POSIX_FLAG
3940 // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3941 // Unset it here to make the if statements below work correctly.
3942 arg1 &= ~_RLIMIT_POSIX_FLAG;
3946 ((struct vki_rlimit *)ARG2)->rlim_cur > ((struct vki_rlimit *)ARG2)->rlim_max) {
3947 SET_STATUS_Failure( VKI_EINVAL );
3949 else if (arg1 == VKI_RLIMIT_NOFILE) {
3950 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3951 ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3952 SET_STATUS_Failure( VKI_EPERM );
3955 VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3956 SET_STATUS_Success( 0 );
3959 else if (arg1 == VKI_RLIMIT_DATA) {
3960 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3961 ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3962 SET_STATUS_Failure( VKI_EPERM );
3965 VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3966 SET_STATUS_Success( 0 );
3969 else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3970 if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3971 ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3972 SET_STATUS_Failure( VKI_EPERM );
3975 VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3976 VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3977 SET_STATUS_Success( 0 );
3984 PRINT("sys_setuid ( %ld )", ARG1);
3985 PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3990 PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3991 PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3992 PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3993 PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3998 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4003 PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
4004 PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4005 PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4006 PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4010 POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4015 PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4016 PRE_REG_READ3(long, "statfs64",
4017 const char *, path, vki_size_t, size, struct statfs64 *, buf);
4018 PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4019 PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4023 POST_MEM_WRITE( ARG3, ARG2 );
4028 *flags |= SfMayBlock;
4029 PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4030 PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4031 PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4032 PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4037 /* time_t time(time_t *t); */
4038 PRINT("sys_time ( %#lx )",ARG1);
4039 PRE_REG_READ1(long, "time", int *, t);
4041 PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4048 POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4054 PRINT("sys_times ( %#lx )", ARG1);
4055 PRE_REG_READ1(long, "times", struct tms *, buf);
4057 PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4064 POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4070 PRINT("sys_umask ( %ld )", ARG1);
4071 PRE_REG_READ1(long, "umask", int, mask);
4076 *flags |= SfMayBlock;
4077 PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4078 PRE_REG_READ1(long, "unlink", const char *, pathname);
4079 PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4084 PRINT("sys_newuname ( %#lx )", ARG1);
4085 PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4086 PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4092 POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4098 *flags |= SfMayBlock;
4099 PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4100 PRE_REG_READ3(long, "waitpid",
4101 vki_pid_t, pid, unsigned int *, status, int, options);
4103 if (ARG2 != (Addr)NULL)
4104 PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4109 if (ARG2 != (Addr)NULL)
4110 POST_MEM_WRITE( ARG2, sizeof(int) );
4115 *flags |= SfMayBlock;
4116 PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4118 PRE_REG_READ4(long, "wait4",
4119 vki_pid_t, pid, unsigned int *, status, int, options,
4120 struct rusage *, rusage);
4121 if (ARG2 != (Addr)NULL)
4122 PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4123 if (ARG4 != (Addr)NULL)
4124 PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4129 if (ARG2 != (Addr)NULL)
4130 POST_MEM_WRITE( ARG2, sizeof(int) );
4131 if (ARG4 != (Addr)NULL)
4132 POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4138 struct vki_iovec * vec;
4139 *flags |= SfMayBlock;
4140 PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4141 PRE_REG_READ3(ssize_t, "writev",
4142 unsigned long, fd, const struct iovec *, vector,
4143 unsigned long, count);
4144 if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4145 SET_STATUS_Failure( VKI_EBADF );
4147 PRE_MEM_READ( "writev(vector)",
4148 ARG2, ARG3 * sizeof(struct vki_iovec) );
4150 /* ToDo: don't do any of the following if the vector is invalid */
4151 vec = (struct vki_iovec *)ARG2;
4152 for (i = 0; i < (Int)ARG3; i++)
4153 PRE_MEM_READ( "writev(vector[...])",
4154 (Addr)vec[i].iov_base, vec[i].iov_len );
4161 FUSE_COMPATIBLE_MAY_BLOCK();
4162 PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4163 PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4164 PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4166 PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4167 PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4173 PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4174 PRE_REG_READ1(long, "acct", const char *, filename);
4175 PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4180 *flags |= SfMayBlock;
4181 PRINT("sys_pause ( )");
4182 PRE_REG_READ0(long, "pause");
4185 PRE(sys_sigaltstack)
4187 PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4188 PRE_REG_READ2(int, "sigaltstack",
4189 const vki_stack_t *, ss, vki_stack_t *, oss);
4191 const vki_stack_t *ss = (vki_stack_t *)ARG1;
4192 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4193 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4194 PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4197 PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4200 SET_STATUS_from_SysRes(
4201 VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4205 POST(sys_sigaltstack)
4208 if (RES == 0 && ARG2 != 0)
4209 POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4215 #endif // defined(VGO_linux) || defined(VGO_darwin)
4217 /*--------------------------------------------------------------------*/
4219 /*--------------------------------------------------------------------*/