0

I am now trying to make a program to find the Absolute Euler Pseudoprimes ('AEPSP' in short, not Euler-Jacobi Pseudoprimes), with the definition that n is an AEPSP if

a(n-1)/2 ≡ ±1 (mod n)

for all positive integers a satisfying that the GCD of a and n is 1.

I used a C++ code to generate AEPSPs, which is based on a code to generate Carmichael numbers:

#include <iostream>
#include <cmath>
#include <algorithm>
#include <numeric>

using namespace std;

unsigned int bm(unsigned int a, unsigned int n, unsigned int p){
    unsigned long long b;
    switch (n) {
        case 0:
            return 1;
        case 1:
            return a % p;
        default:
            b = bm(a,n/2,p);
            b = (b*b) % p;
            if (n % 2 == 1) b = (b*a) % p;
            return b;
        }
} 

int numc(unsigned int n){
    int a, s;
    int found = 0;
    if (n % 2 == 0) return 0;
    s = sqrt(n);
    a = 2;
    while (a < n) {
        if (a > s && !found) {
            return 0;
        }
        if (gcd(a, n) > 1) {
            found = 1;
        }
        else {
            if (bm(a, (n-1)/2, n) != 1) {
                return 0;
            }
        }
        a++;
    }
    return 1;
}

int main(void) {
    unsigned int n;
    for (n = 3; n < 1e9; n += 2){
    if (numc(n)) printf("%u\n",n);
    }
    return 0; 
}  

Yet, the program is very slow. It generates AEPSPs up to 1.5e6 in 20 minutes. Does anyone have any ideas on optimizing the program?

Any help is most appreciated. :)

Bob__
  • 12,361
  • 3
  • 28
  • 42
  • 2
    you can use boost or other libraries, https://www.boost.org/doc/libs/. If you want to optimize your code yourself, I would start with memoization (i.e. caching) calculations that you repeat frequently – A-_-S Feb 15 '23 at 09:06
  • Have you considered implementing `numc` as a sieve, instead of checking the gcd of all the *a* values? BTW, shouldn't you avoid all the *a* such that gcd(*a*, *n*) == 0? – Bob__ Feb 15 '23 at 09:28

1 Answers1

0

I've come up with a different algorithm, based on sieving for primes upfront while simultaneously marking off non-squarefree numbers. I've applied a few optimizations to pack the information into memory a bit tighter, to help with cache-friendliness as well. Here is the code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#define PRIME_BIT  (1UL << 31)
#define SQUARE_BIT (1UL << 30)
#define FACTOR_MASK (SQUARE_BIT - 1)

void sieve(uint64_t size, uint32_t *buffer) {
    for (uint64_t i = 3; i * i < size; i += 2) {
        if (buffer[i/2] & PRIME_BIT) {
            for (uint64_t j = i * i; j < size; j += 2 * i) {
                buffer[j/2] &= SQUARE_BIT;
                buffer[j/2] |= i;
            }
            for (uint64_t j = i * i; j < size; j += 2 * i * i) {
                buffer[j/2] |= SQUARE_BIT;
            }
        }
    }
}

int main(int argc, char **argv) {
    if (argc < 2) {
        printf("Usage: prog LIMIT\n");
        return 1;
    }

    uint64_t size = atoi(argv[1]);

    uint32_t *buffer = malloc(size * sizeof(uint32_t) / 2);
    memset(buffer, 0x80, size * sizeof(uint32_t) / 2);

    sieve(size, buffer);

    for (uint64_t i = 5; i < size; i += 4) {
        if (buffer[i/2] & PRIME_BIT)
            continue;
        if (buffer[i/2] & SQUARE_BIT)
            continue;

        uint64_t num = i;
        uint64_t factor;

        while (num > 1) {
            if (buffer[num/2] & PRIME_BIT)
                factor = num;
            else
                factor = buffer[num/2] & FACTOR_MASK;
            
            if ((i / 2) % (factor - 1) != 0) {
                break;
            }
            num /= factor;
        }

        if (num == 1)
            printf("Found pseudo-prime: %ld\n", i);
    }
}

This produces the pseudo-primes up to 1.5e6 in about 8ms on my machine, and for 2e9 it takes 1.8sec.

The time complexity of the solution is O(n log n) - the sieve is O(n log n), and for each number we either do constant time checks or do a divisibility test for each of its factors, which there are at most log n. So, the main loop is also O(n log n), resulting in O(n log n) overall.

Dillon Davis
  • 6,679
  • 2
  • 15
  • 37
  • Thanks! However, when I tried to run it on OnlineGDB with C++20, it ran into error, showing ```main.cpp: In function ‘int main(int, char**)’: main.cpp:32:30: error: invalid conversion from ‘void*’ to ‘uint32_t*’ {aka ‘unsigned int*’} [-fpermissive] 32 | uint32_t *buffer = malloc(size * sizeof(uint32_t) / 2); | ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | void* ``` – Mason Chan Feb 19 '23 at 18:57
  • @MasonChan my code is for C- it should work in C++, so that's strange. In C, you shouldn't cast the result of malloc, but apparently its necessary in C++. See- https://stackoverflow.com/a/605858/6221024, for why the C code doesn't have the explicit cast. – Dillon Davis Feb 20 '23 at 03:00