Thursday, August 1, 2013

What to do about a dead-slow disk I/O VPS?

My new VPS provider uses OpenVZ and the disk I/O is even slower that the one of my previous hoster.

I wrote an Asterisk AGI application which compiles weather info from various sources. It is Perl (so it uses many piddly modules/shared objects on-disk) and instructs Asterisk to stream ~100 gsm/sln audio files.

When using the naked VPS this is slow as each of these files needs to be loaded in-memory.

So what to do? Wire them into the Linux file cache of course! I compiled a list of all non-system shared objects [e.g. not libc/libm/libdl] my Perl apps/CGI scripts use, all the sound files I need and I am mmap/mlock-ing them in core using the utility below.

The added bonus is that my external bit of code that does the Gōōgle TTS now only takes only 14s to complete instead of 54s which in audio terms is almost instantaneous.

I am not quite sure if I have to keep the fds open... have to experiment.

-ulianov
// This code is licensed under GPLv2
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>

#ifndef O_LARGEFILE
  #define O_LARGEFILE  0100000
#endif

typedef struct {
  char name[513];
  int fd;
  size_t size;
  const void* mmaploc;
} mapinfo_t;

char* name = "mmaplock";

size_t mmap_mlock(const char* name, mapinfo_t* info)
{
   if(name == NULL || name[0] == '\0') return (size_t)-1;
   if(info == NULL) return (size_t)-1;

   const fd = open(name, O_RDONLY|O_LARGEFILE);
   if(fd < 0) {
      fprintf(stderr, "%s: Cannot open %s for reading: %s\n",
              __func__, name, strerror(errno));
      return (size_t)-1;
   }

   struct stat st;
   fstat(fd, &st);

   void* memblock = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
   if (memblock == MAP_FAILED) {
      fprintf(stderr, "%s: Cannot mmap %s for reading: %s\n",
              __func__, name, strerror(errno));
      close(fd);
      return (size_t)-1;
   }
   if(mlock(memblock, st.st_size) == -1) {
      fprintf(stderr, "%s: Cannot mlock %s (size %d): %s\n",
              __func__, name, st.st_size, strerror(errno));
      munmap(memblock, st.st_size);
      close(fd);
      return (size_t)-1;
   }

   strncpy(info->name, name, 512); info->name[0] = '\0';
   info->fd = fd;
   info->size = st.st_size;
   info->mmaploc = memblock;

   return st.st_size;
}

mapinfo_t info[1021]; // this many fd's available by default

#define MAX_FILES (sizeof(info)/ sizeof(info[0]))

void usage()
{
   fprintf(stderr, "usage: %s \n", name);
   exit(0);
}

int main(int argc, char* argv[])
{
   if(argc < 1) usage();

   const char* listName = argv[1];

   FILE* flist = fopen(listName, "r");
   if(flist == NULL) {
      fprintf(stderr, "%s: Cannot open %s for reading: %s\n",
              name, listName, strerror(errno));
      return 1;
   }

   close(0); // save one fd
   int count = 0;
   while(! feof(flist)) {
      char buf[513] = {0};
      fgets(buf, 512, flist);
      if(buf[0] == '\0') break;

      if(buf[0] == '#') continue;

      int N = strlen(buf);
      if(buf[N-1] == '\r' || buf[N-1] == '\n') buf[--N] = '\0';
      if(buf[N-1] == '\r' || buf[N-1] == '\n') buf[--N] = '\0';

      mmap_mlock(buf, &info[count++]);

      if(count >= MAX_FILES) break;
   }
   fclose(flist);

   for(;;)
      sleep(60); // ping mmaped files??`

   return 0;
}