Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Determined in userspace by the following:
    Code Block
    const size_t maxmem = 1 * 1024 * 1024 * 1024;
    
    int
    main()
    {
            const int loops = 100 * 1000 * 1000;
            uint64_t *buf = getbuf();
            uint64_t b, i, j, total;
    
            memset(buf, 0, maxmem);
    
            size_t maxidx = maxmem / sizeof(buf[0]);
    
            //randomly link our pointers
            for (i = 0; i < maxidx; i++) {
                    int idx = random() % maxidx;
                    buf[i] = (uint64_t)&buf[idx];
            }
    
            total = 0;
            for (j = 0; j < 100; j++) {
                    uint64_t *p = &buf[random() % maxidx];
                    b = rdtsc();
                    for (i = 0; i < loops; i++) {
                            if (*p & 0x1) {
    
                                   printf("encountered loop after %" PRIu64 " walks; ", i);
                                    break;
                 
              }                         uint64_t *next = (uint64_t *)*p;
                            *p |= 0x1;
                            p = next;
                    }
                    uint64_t diff = rdtsc() - b;
                    if (i == 0)
                            i = 1;break;
    
                    printf("walk %" PRIu64 " did %" PRIu64 " accesses in %" avg.PRIu64 ticks" /average walkticks\n", j, i, (diff / i));
                    total += (diff / i);
    
                    //clean up & wreck the cache
                    for (i = 0; i < maxidx; i++)
                            buf[i] &= ~0x1;
            }
    
            printf("average of all walks: %" PRIu64 " ticks\n", total / j);
    
            return 0;
    }
    
  • Where getbuf() returns a 1GB region of va based on maxmem.
  • Note that Phenom and Nehalem have about 23MB of L1 and L2 data TLB coverage. The Xeon is likely similar, if less.
  • All chips have < 10MB cache, so > 99% of the data set is uncached.

...