Beneath is a slightly unoptimized implementation (although I skipped the intermediate list and print directly) of what I think you were supposed to do. Running that program on an AMD A8-6600K with a small load (mainly a Youtube music-video for some personal entertainment) results in
real 0m1.211s
user 0m0.047s
sys 0m0.122s
averaged over a couple of runs. So the problem lies in your implementation of the sieve or you are hiding some essential facts about your hardware.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <limits.h>
#include <string.h>
/* I call it a general bitset. Others might call it an abomination. YMMV. */
# define ERAT_BITS (sizeof(uint32_t)*CHAR_BIT)
# define GET_BIT(s,n) ((*(s+(n/ERAT_BITS)) & ( 1<<( n % ERAT_BITS ))) != 0)
# define SET_BIT(s,n) (*(s+(n/ERAT_BITS)) |= ( 1<<( n % ERAT_BITS )))
# define CLEAR_BIT(s,n) (*(s+(n/ERAT_BITS)) &= ~( 1<<( n % ERAT_BITS )))
# define TOG_BIT(s,n) (*(s+(n/ERAT_BITS)) ^= ( 1<<( n % ERAT_BITS )))
/* size is the size in bits, the overall size might be bigger */
typedef struct mp_bitset_t {
uint32_t size;
uint32_t *content;
} mp_bitset_t;
# define mp_bitset_alloc(bst, n) \
do {\
(bst)->content=malloc(( n /(sizeof(uint32_t)) + 1 ));\
if ((bst)->content == NULL) {\
fprintf(stderr, "memory allocation for bitset failed");\
exit(EXIT_FAILURE);\
}\
(bst)->size = n;\
} while (0)
# define mp_bitset_size(bst) ((bst)->size)
# define mp_bitset_setall(bst) memset((bst)->content,~(uint32_t)(0),\
(bst->size /(sizeof(uint32_t) ) +1 ))
# define mp_bitset_clearall(bst) memset((bst)->content,0,\
(bst->size /(sizeof(uint32_t) ) +1 ))
# define mp_bitset_clear(bst,n) CLEAR_BIT((bst)->content, n)
# define mp_bitset_set(bst,n) SET_BIT((bst)->content, n)
# define mp_bitset_get(bst,n) GET_BIT((bst)->content, n)
# define mp_bitset_free(bst) \
do {\
free((bst)->content);\
free(bst);\
} while (0)
uint32_t mp_bitset_nextset(mp_bitset_t * bst, uint32_t n);
uint32_t mp_bitset_prevset(mp_bitset_t * bst, uint32_t n);
void mp_eratosthenes(mp_bitset_t * bst);
/* It's called Hallek's method but it has many inventors*/
static uint32_t isqrt(uint32_t n)
{
uint32_t s, rem, root;
if (n < 1)
return 0;
/* This is actually the highest square but it goes
* downward from this, quite fast */
s = 1 << 30;
rem = n;
root = 0;
while (s > 0) {
if (rem >= (s | root)) {
rem -= (s | root);
root >>= 1;
root |= s;
} else {
root >>= 1;
}
s >>= 2;
}
return root;
}
uint32_t mp_bitset_nextset(mp_bitset_t *bst, uint32_t n)
{
while ((n < mp_bitset_size(bst)) && (!mp_bitset_get(bst, n))) {
n++;
}
return n;
}
/*
* Standard method, quite antique now, but good enough for the handful
* of primes needed here.
*/
void mp_eratosthenes(mp_bitset_t *bst)
{
uint32_t n, k, r, j;
mp_bitset_setall(bst);
mp_bitset_clear(bst, 0);
mp_bitset_clear(bst, 1);
n = mp_bitset_size(bst);
r = isqrt(n);
for (k = 4; k < n; k += 2)
mp_bitset_clear(bst, k);
k = 0;
while ((k = mp_bitset_nextset(bst, k + 1)) < n) {
if (k > r) {
break;
}
for (j = k * k; j < n; j += k * 2) {
mp_bitset_clear(bst, j);
}
}
}
#define UPPER_LIMIT 1000000 /* one million */
int main(void) {
mp_bitset_t *bst;
uint32_t n, k, j;
bst = malloc(sizeof(mp_bitset_t));
if(bst == NULL) {
fprintf(stderr, "failed to allocate %zu bytes\n",sizeof(mp_bitset_t));
exit(EXIT_FAILURE);
}
mp_bitset_alloc(bst, UPPER_LIMIT);
mp_bitset_setall(bst);
mp_bitset_clear(bst, 0); // 0 is not prime b.d.
mp_bitset_clear(bst, 1); // 1 is not prime b.d.
n = mp_bitset_size(bst);
for (k = 4; k < n; k += 2) {
mp_bitset_clear(bst, k);
}
k = 0;
while ((k = mp_bitset_nextset(bst, k + 1)) < n) {
printf("%" PRIu32 "\n", k);
for (j = k * k; j < n; j += k * 2) {
mp_bitset_clear(bst, j);
}
}
mp_bitset_free(bst);
return EXIT_SUCCESS;
}
Compiled with
gcc-4.9 -O3 -g3 -W -Wall -Wextra -Wuninitialized -Wstrict-aliasing -pedantic -std=c11 tests.c -o tests
(GCC is gcc-4.9.real (Ubuntu 4.9.4-2ubuntu1~14.04.1) 4.9.4
)