Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Determined in userspace by the following:
    Code Block
    const size_t maxmem = 1 * 1024 * 1024 * 1024;
    
    int
    main()
    {
    	uint32_t *buf = getbuf();
    	        const int loops = 100 * 1000 * 1000;
    	
            uint64_t b;
    	 *buf = getbuf();
            uint64_t blah = 0;	// don't compile away
    	int i;
    
    	b = rdtsc();
    	b, i, j;
    
            memset(buf, 0, maxmem);
    
            size_t maxidx = maxmem / sizeof(buf[0]);
    
            //randomly link our pointers
            for (i = 0; i < loopsmaxidx; i++)
    		blah + {
                    int idx = random() % (maxmem / sizeof(buf[0]));
    	uint64_t random_ticks = rdtsc() - b; 
    
    	printf("%" PRIu64 " ticks for random-mod (%" PRIu64 " each)\n",
    	    random_ticks, random_ticks / loops);
    
    	maxidx;
                    buf[i] = (uint64_t)&buf[idx];
            }
    
            for (j = 0; j < 100; j++) {
                    uint64_t *p = &buf[random() % maxidx];
                    b = rdtsc();
    	
                    for (i = 0; i < loops; i++)
    		blah += buf[random() % (maxmem / sizeof(buf[0]))];
    	uint64_t access_ticks {
                            if (*p & 0x1) {
                                    printf("encountered loop after %" PRIu64 " walks; ", i);
                                    break;
                            }
                            uint64_t *next = (uint64_t *)*p;
                            *p |= 0x1;
                            p = next;
                    }
                    uint64_t diff = rdtsc() - b;
                    if (i == 0)
                            i = 1;
                   	 printf("%" PRIu64 " totalavg. ticks (%" PRIu64 " each)\n", access_ticks,
    	 / walk\n", diff / i);
    
                   access_ticks / loops);
    	printf("%" PRIu64 " ticks not including random-mod (%" PRIu64 " each)\n",
    	    access_ticks - random_ticks, (access_ticks - random_ticks) / loops);
    
    	return blah/clean up & wreck the cache
                    for (i = 0; i < maxidx; i++)
                            buf[i] &= ~0x1;
            }
    
            return 0;
    }
    
  • Where getbuf() returns a 1GB region of va (maxmem = 1 * 1024 * 1024 * 1024)based on maxmem.
  • Note that Phenom and Nehalem have about 23MB of L1 and L2 data TLB coverage. The Xeon is likely similar, if less.
  • All chips have < 10MB cache, so > 99% of the data set is uncached.

...