Friday, October 25, 2013

NULL Pointer in __up() in Custom Driver

My client calls and says he has a panic; I request stack traces and one comes to me as cute as lemon pie (see below).

This is Linux 2.6.29 with RTAI and has been running well for three years in production systems.

So the bug cannot be in __up(). I disass the crash point and it looked like
*(%ecx + 4) := %eax
This looks like "NULL->offset4". The kernel code looked like "waiter_list->prev". Hmm.

This must be something sprinkling memory. I reviewed custommodule and indeed some debug code looked like
array[index++] = debug_info;
No checking on array bounds, see?

Customer reworked the code and lived happy until the next crash ;)


PS. There were other crashes caused by the same problem from other power cycles but none as beautiful and explicit as this.
BUG: unable to handle kernel NULL pointer dereference at 00000004
IP: [<c0286e8a>] __up+0xb/0x2e
*pde = 365c1067 *pte = 00000000 
Oops: 0002 [#1] 
Modules linked in: custommodule(P) module3x20(P) moduleDSPcode(P)\
                   rdtsc customdebug coretemp fakertnet(P) e1000e \
                   irqregistrar(P) \
                   rtai_smi rtai_mbx rtai_sched \
                   rtai_math rtai_hal uhci_hcd

Pid: 1873, comm: customproc.bin Tainted: P ( #54)  
EIP: 0060:[<c0286e8a>] EFLAGS: 00010007 CPU: 0
EIP is at __up+0xb/0x2e
EAX: 73694c67 EBX: 00000200 ECX: 00000000 EDX: 00000000
ESI: f65cdbe0 EDI: f9feed50 EBP: f65cdb14 ESP: f65cdb14
 DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
Process mts5000.bin (pid: 1873, ti=f65cc000 task=f70eacc0 task.ti=f65cc000)
I-pipe domain Linux
 f65cdb20 c012627a f9f286d4 f65cdbec f86a32c4 00004e1f 00004e20 f9e72441
 00000013 00000002 000c0f19 00000001 f86b819b 20203130 65532f3c 6e697474
 73694c67 00003e74 00000000 00000000 00000000 00000000 00000000 00000000
Call Trace:
 [<c012627a>] ? up+0x2e/0x44
 [<f86a32c4>] ? dequeueCommsRequest+0x217/0x225 [custommodule]
 [<f86a0382>] ? customdriver_read+0x15e2/0x1cdb [custommodule]
 [<c01331b8>] ? __ipipe_restore_root+0x16/0x18
 [<c01331b8>] ? __ipipe_restore_root+0x16/0x18
 [<c0131e6e>] ? cpu_quiet+0x71/0xcb
 [<c0118ff1>] ? __do_softirq+0xc5/0xcd
 [<c0119110>] ? irq_exit+0x28/0x2a
 [<c0104285>] ? do_IRQ+0x55/0x68
 [<f86b0024>] ? pfc_runInInterrupt+0xe0/0x6cf [custommodule]
 [<f86ae1a4>] ? sampleInterruptHandler+0x2944/0x2958 [custommodule]
 [<f86ae1a4>] ? sampleInterruptHandler+0x2944/0x2958 [custommodule]
 [<f86b0024>] ? pfc_runInInterrupt+0xe0/0x6cf [custommodule]
 [<f86ae1a4>] ? sampleInterruptHandler+0x2944/0x2958 [custommodule]
 [<c011222a>] ? enqueue_task_fair+0x12b/0x133
 [<c0110df5>] ? check_preempt_wakeup+0x82/0xa5
 [<c0112922>] ? try_to_wake_up+0xa2/0xad
 [<c0112944>] ? wake_up_state+0xa/0xc
 [<c011d59f>] ? signal_wake_up+0x51/0x55
 [<c011d717>] ? complete_signal+0x174/0x18c
 [<c011d8b1>] ? send_signal+0x182/0x197
 [<c01331b8>] ? __ipipe_restore_root+0x16/0x18
 [<c011df89>] ? group_send_sig_info+0x54/0x5d
 [<c011dfbd>] ? kill_pid_info+0x2b/0x35
 [<c011e129>] ? sys_kill+0x6f/0x114
 [<f869eda0>] ? customdriver_read+0x0/0x1cdb [custommodule]
 [<c014ffbe>] ? vfs_read+0x87/0x101
 [<c01500d1>] ? sys_read+0x3b/0x60
 [<c0102c07>] ? syscall_call+0x7/0xb
EIP: [<c0286e8a>] __up+0xb/0x2e SS:ESP 0068:f65cdb14
---[ end trace 2aa77bbc7c743932 ]---

Thursday, August 1, 2013

What to do about a dead-slow disk I/O VPS?

My new VPS provider uses OpenVZ and the disk I/O is even slower that the one of my previous hoster.

I wrote an Asterisk AGI application which compiles weather info from various sources. It is Perl (so it uses many piddly modules/shared objects on-disk) and instructs Asterisk to stream ~100 gsm/sln audio files.

When using the naked VPS this is slow as each of these files needs to be loaded in-memory.

So what to do? Wire them into the Linux file cache of course! I compiled a list of all non-system shared objects [e.g. not libc/libm/libdl] my Perl apps/CGI scripts use, all the sound files I need and I am mmap/mlock-ing them in core using the utility below.

The added bonus is that my external bit of code that does the Gōōgle TTS now only takes only 14s to complete instead of 54s which in audio terms is almost instantaneous.

I am not quite sure if I have to keep the fds open... have to experiment.

// This code is licensed under GPLv2
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>

  #define O_LARGEFILE  0100000

typedef struct {
  char name[513];
  int fd;
  size_t size;
  const void* mmaploc;
} mapinfo_t;

char* name = "mmaplock";

size_t mmap_mlock(const char* name, mapinfo_t* info)
   if(name == NULL || name[0] == '\0') return (size_t)-1;
   if(info == NULL) return (size_t)-1;

   const fd = open(name, O_RDONLY|O_LARGEFILE);
   if(fd < 0) {
      fprintf(stderr, "%s: Cannot open %s for reading: %s\n",
              __func__, name, strerror(errno));
      return (size_t)-1;

   struct stat st;
   fstat(fd, &st);

   void* memblock = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
   if (memblock == MAP_FAILED) {
      fprintf(stderr, "%s: Cannot mmap %s for reading: %s\n",
              __func__, name, strerror(errno));
      return (size_t)-1;
   if(mlock(memblock, st.st_size) == -1) {
      fprintf(stderr, "%s: Cannot mlock %s (size %d): %s\n",
              __func__, name, st.st_size, strerror(errno));
      munmap(memblock, st.st_size);
      return (size_t)-1;

   strncpy(info->name, name, 512); info->name[0] = '\0';
   info->fd = fd;
   info->size = st.st_size;
   info->mmaploc = memblock;

   return st.st_size;

mapinfo_t info[1021]; // this many fd's available by default

#define MAX_FILES (sizeof(info)/ sizeof(info[0]))

void usage()
   fprintf(stderr, "usage: %s \n", name);

int main(int argc, char* argv[])
   if(argc < 1) usage();

   const char* listName = argv[1];

   FILE* flist = fopen(listName, "r");
   if(flist == NULL) {
      fprintf(stderr, "%s: Cannot open %s for reading: %s\n",
              name, listName, strerror(errno));
      return 1;

   close(0); // save one fd
   int count = 0;
   while(! feof(flist)) {
      char buf[513] = {0};
      fgets(buf, 512, flist);
      if(buf[0] == '\0') break;

      if(buf[0] == '#') continue;

      int N = strlen(buf);
      if(buf[N-1] == '\r' || buf[N-1] == '\n') buf[--N] = '\0';
      if(buf[N-1] == '\r' || buf[N-1] == '\n') buf[--N] = '\0';

      mmap_mlock(buf, &info[count++]);

      if(count >= MAX_FILES) break;

      sleep(60); // ping mmaped files??`

   return 0;