There are a number of ways to compute the sexy primes between one billion and two billion. Here are four.
Our first solution identifies sexy primes p by using a primality test to check both p and p + 6 for primality:
def isSexy(n):
return isPrime(n) and isPrime(n+6)
Then we check each odd number from one billion to two billion:
counter = 0
for n in xrange(1000000001, 2000000000, 2):
if isSexy(n): counter += 1
print counter
That takes an estimated two hours on my machine, determined by running it from 1 billion to 1.1 billion and multiplying by 10. We need something better. Here's the complete code:
Python 2.7.9 (default, Jun 21 2019, 00:38:53)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> def isqrt(n): # newton
... x = n; y = (x + 1) // 2
... while y < x:
... x = y
... y = (x + n // x) // 2
... return x
...
>>> def isSquare(n):
... # def q(n):
... # from sets import Set
... # s, sum = Set(), 0
... # for x in xrange(0,n):
... # t = pow(x,2,n)
... # if t not in s:
... # s.add(t)
... # sum += pow(2,t)
... # return sum
... # q(32) => 33751571
... # q(27) => 38348435
... # q(25) => 19483219
... # q(19) => 199411
... # q(17) => 107287
... # q(13) => 5659
... # q(11) => 571
... # q(7) => 23
... # 99.82% of non-squares
... # caught by filters before
... # square root calculation
... if 33751571>>(n%32)&1==0:
... return False
... if 38348435>>(n%27)&1==0:
... return False
... if 19483219>>(n%25)&1==0:
... return False
... if 199411>>(n%19)&1==0:
... return False
... if 107287>>(n%17)&1==0:
... return False
... if 5659>>(n%13)&1==0:
... return False
... if 571>>(n%11)&1==0:
... return False
... if 23>>(n% 7)&1==0:
... return False
... s = isqrt(n)
... if s * s == n: return s
... return False
...
>>> def primes(n): # sieve of eratosthenes
... i, p, ps, m = 0, 3, [2], n // 2
... sieve = [True] * m
... while p <= n:
... if sieve[i]:
... ps.append(p)
... for j in range((p*p-3)/2, m, p):
... sieve[j] = False
... i, p = i+1, p+2
... return ps
...
>>> pLimit,pList = 0,[]
>>> pLen,pMax = 0,0
>>>
>>> def storePrimes(n):
... # call with n=0 to clear
... global pLimit, pList
... global pLen, pMax
... if n == 0:
... pLimit,pList = 0,[]
... pLen,pMax = 0,0
... elif pLimit < n:
... pLimit = n
... pList = primes(n)
... # x=primesRange(pLimit,n)
... # pList += x
... pLen = len(pList)
... pMax = pList[-1]
...
>>> storePrimes(1000)
>>> def gcd(a, b): # euclid
... if b == 0: return a
... return gcd(b, a%b)
...
>>> def kronecker(a,b):
... # for any integers a and b
... # cohen 1.4.10
... if b == 0:
... if abs(a) == 1: return 1
... else: return 0
... if a%2==0 and b%2==0:
... return 0
... tab2=[0,1,0,-1,0,-1,0,1]
... v = 0
... while b%2==0: v,b=v+1,b/2
... if v%2==0: k = 1
... else: k = tab2[a%8]
... if b < 0:
... b = -b
... if a < 0: k = -k
... while True:
... if a == 0:
... if b > 1: return 0
... else: return k
... v = 0
... while a%2==0: v,a=v+1,a/2
... if v%2==1: k*=tab2[b%8]
... if (a&b&2): k = -k
... r=abs(a); a=b%r; b=r
...
>>> def jacobi(a,b):
... # for integers a and odd b
... if b%2 == 0:
... m="modulus must be odd"
... raise ValueError(m)
... return kronecker(a,b)
...
>>> def isSpsp(n,a,r=-1,s=-1):
... # strong pseudoprime
... if r < 0:
... r, s = 0, n - 1
... while s % 2 == 0:
... r, s = r + 1, s / 2
... if pow(a,s,n) == 1:
... return True
... for i in range(0,r):
... if pow(a,s,n) == n-1:
... return True
... s = s * 2
... return False
...
>>> def lucasPQ(p, q, m, n):
... # nth element of lucas
... # sequence with parameters
... # p and q (mod m); ignore
... # modulus operation when
... # m is zero
... def mod(x):
... if m == 0: return x
... return x % m
... def half(x):
... if x%2 == 1: x=x+m
... return mod(x / 2)
... un, vn, qn = 1, p, q
... u=0 if n%2==0 else 1
... v=2 if n%2==0 else p
... k=1 if n%2==0 else q
... n,d = n//2, p*p-4*q
... while n > 0:
... u2 = mod(un*vn)
... v2 = mod(vn*vn-2*qn)
... q2 = mod(qn*qn)
... n2 = n // 2
... if n % 2 == 1:
... uu = half(u*v2+u2*v)
... vv = half(v*v2+d*u*u2)
... u,v,k = uu,vv,k*q2
... un,vn,qn,n = u2,v2,q2,n2
... return u, v, k
...
>>> def isSlpsp(n):
... # strong lucas pseudoprime
... def selfridge(n):
... d,s = 5,1
... while True:
... ds = d * s
... if gcd(ds, n) > 1:
... return ds, 0, 0
... if jacobi(ds,n) == -1:
... return ds,1,(1-ds)/4
... d,s = d+2, -s
... d, p, q = selfridge(n)
... if p == 0: return n == d
... s, t = 0, n + 1
... while t % 2 == 0:
... s, t = s + 1, t / 2
... u,v,k = lucasPQ(p,q,n,t)
... if u == 0 or v == 0:
... return True
... for r in range(1, s):
... v = (v*v-2*k) % n
... if v == 0: return True
... k = (k * k) % n
... return False
...
>>> def isPrime(n):
... # mathematica method
... if n < 2: return False
... for p in pList[:25]:
... if n%p == 0: return n==p
... if isSquare(n):
... return False
... r, s = 0, n - 1
... while s % 2 == 0:
... r, s = r + 1, s / 2
... if not isSpsp(n,2,r,s):
... return False
... if not isSpsp(n,3,r,s):
... return False
... if not isSlpsp(n):
... return False
... return True
...
>>> def isSexy(n):
... return isPrime(n) and isPrime(n+6)
...
>>> counter = 0
>>> for n in xrange(1000000001, 2000000000, 2):
... if isSexy(n): counter += 1
...
>>> print counter
5924680
By the way, if you want to see how slow Python is for things like this, here is the equivalent program in Pari/GP, a programming environment designed for number-theoretic calculations, which finishes in 70253 milliseconds, just over a minute:
gettime(); c = 0;
forprime(p = 1000000000, 2000000000, if (isprime(p+6), c++));
print (c); print (gettime());
5924680
70253
Our second solution uses our standard prime generator to generate the primes from one billion to two billion, checking for each prime p if p - 6 is on the list:
counter = 0
ps = primeGen(1000000000)
p2 = next(ps); p1 = next(ps); p = next(ps)
while p < 2000000000:
if p - p2 == 6 or p - p1 == 6:
counter += 1
p2 = p1; p1 = p; p = next(ps)
print counter
That took about 8.5 minutes and produced the correct result. Here's the complete code:
Python 2.7.9 (default, Jun 21 2019, 00:38:53)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> def primeGen(start=0):
... if start <= 2: yield 2
... if start <= 3: yield 3
... ps = primeGen()
... p=next(ps); p=next(ps)
... q = p*p; D = {}
... def add(m,s):
... while m in D: m += s
... D[m] = s
... while q <= start:
... x = (start // p) * p
... if x < start: x += p
... if x%2 == 0: x += p
... add(x, p+p)
... p=next(ps); q=p*p
... c = max(start-2, 3)
... if c%2 == 0: c += 1
... while True:
... c += 2
... if c in D:
... s = D.pop(c)
... add(c+s, s)
... elif c < q: yield c
... else: # c == q
... add(c+p+p, p+p)
... p=next(ps); q=p*p
...
>>> counter = 0
>>> ps = primeGen(1000000000)
>>> p2 = next(ps); p1 = next(ps); p = next(ps)
>>> while p < 2000000000:
... if p - p2 == 6 or p - p1 == 6:
... counter += 1
... p2 = p1; p1 = p; p = next(ps)
...
p>>> print counter
5924680
If you will permit me another digression on Pari/GP, here is the equivalent program using a prime generator, computing the solution in just 37 seconds:
gettime(); p2 = nextprime(1000000000); p1=nextprime(p2+1); c = 0;
forprime(p = nextprime(p1+1), 2000000000,
if (p-p2==6 || p-p1==6, c++); p2=p1; p1=p);
print (c); print (gettime());
5924680
37273
Our third solution uses a segmented sieve to make a list of primes from one billion to two billion, then scans the list counting the sexy primes:
counter = 0
ps = primes(1000000000, 2000000000)
if ps[1] - ps[0] == 6: counter += 1
for i in xrange(2,len(ps)):
if ps[i] - ps[i-2] == 6 or ps[i] - ps[i-1] == 6:
counter += 1
print counter
That takes about four minutes to run, and produces the correct result. Here's the complete code:
Python 2.7.9 (default, Jun 21 2019, 00:38:53)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> def isqrt(n): # newton
... x = n; y = (x + 1) // 2
... while y < x:
... x = y
... y = (x + n // x) // 2
... return x
...
>>> def primes(lo, hi=False):
... if not hi: lo,hi = 0,lo
...
... if hi-lo <= 50:
... xs = range(lo,hi)
... return filter(isPrime,xs)
...
... # sieve of eratosthenes
... if lo<=2 and hi<=1000000:
... i,p,ps,m = 0,3,[2],hi//2
... sieve = [True] * m
... while p <= hi:
... if sieve[i]:
... ps.append(p)
... s = (p*p-3)/2
... for j in xrange(s,m,p):
... sieve[j] = False
... i,p = i+1, p+2
... return ps
...
... if lo < isqrt(hi):
... r = isqrt(hi) + 1
... loPs=primes(lo,r)
... hiPs=primes(r+1,hi)
... return loPs + hiPs
...
... # segmented sieve
... if lo%2==1: lo-=1
... if hi%2==1: hi+=1
... r = isqrt(hi) + 1
... b = r//2; bs = [True] * b
... ps = primes(r)[1:]
... qs = [0] * len(ps); zs = []
... for i in xrange(len(ps)):
... q = (lo+1+ps[i]) / -2
... qs[i]= q % ps[i]
... for t in xrange(lo,hi,b+b):
... if hi<(t+b+b): b=(hi-t)/2
... for j in xrange(b):
... bs[j] = True
... for k in xrange(len(ps)):
... q,p = qs[k], ps[k]
... for j in xrange(q,b,p):
... bs[j] = False
... qs[k] = (qs[k]-b)%ps[k]
... for j in xrange(b):
... if bs[j]:
... zs.append(t+j+j+1)
... return zs
...
>>> counter = 0
>>> ps = primes(1000000000, 2000000000)
>>> for i in xrange(2, len(ps)):
... if ps[i] - ps[i-2] == 6 or ps[i] - ps[i-1] == 6:
... counter += 1
...
>>> print counter
5924680
Here is our fourth and final program to count the sexy primes, suggested in the comments above. The idea is to sieve each of the polynomials 6 n + 1 and 6 n − 1 separately, scan for adjacent pairs, and combine the counts.
This is a little bit tricky, so let's look at an example: sieve 6 n + 1 on the range 100 to 200 using the primes 5, 7, 11 and 13, which are the sieving primes less than 200 (excluding 2 and 3, which divide 6). The sieve has 17 elements 103, 109, 115, 121, 127, 133, 139, 145, 151, 157, 163, 169, 175, 181, 187, 193, 199. The least multiple of 5 in the list is 115, so we strike 115, 145 and 175 (every 5th item) from the sieve. The least multiple of 7 in the list is 133, so we strike 133 and 175 (every 7th item) from the sieve. The least multiple of 11 in the list is 121, so we strike 121 and 187 (every 11th item) from the list. And the least multiple of 13 in the list is 169, so we strike it from the list (it's in the middle of a 17-item list, and has no other multiples in the list). The primes that remain in the list are 103, 109, 127, 139, 151, 157, 163, 181, 193, and 199; of those, 103, 151, 157 and 193 are sexy.
The trick is finding the offset in the sieve of the first multiple of the prime. The formula is (lo + p) / -6 (mod p), where lo is the first element in the sieve (103 in the example above) and p is the prime; -6 comes from the gap between successive elements of the sieve. In modular arithmetic, division is undefined, so we can't just divide by -6; instead, we find the modular inverse. And since modular inverse is undefined for negative numbers, we first convert -6 to its equivalent mod p. Thus, for our four sieving primes, the offsets into the sieve are:
((103 + 5) * inverse(-6 % 5, 5)) % 5 = 2 ==> points to 115
((103 + 7) * inverse(-6 % 7, 7)) % 7 = 5 ==> points to 133
((103 + 11) * inverse(-6 % 11, 11)) % 11 = 3 ==> points to 121
((103 + 13) * inverse(-6 % 13, 13)) % 13 = 11 ==> points to 169
Sieving sieve 6 n - 1 works the same way, except that lo is 101 instead of 103; the sieve contains 101, 107, 113, 119, 125, 131, 137, 143, 149, 155, 161, 167, 173, 179, 185, 191, 197:
((101 + 5) * inverse(-6 % 5, 5)) % 5 = 4 ==> points to 125
((101 + 7) * inverse(-6 % 7, 7)) % 7 = 3 ==> points to 119
((101 + 11) * inverse(-6 % 11, 11)) % 11 = 7 ==> points to 143
((101 + 13) * inverse(-6 % 13, 13)) % 13 = 7 ==> points to 143
After sieving, the numbers that remain in the sieve are 101, 107, 113, 131, 137, 149, 167, 173, 179, 191, 197, of which 101, 107, 131, 167, 173 and 191 are sexy, so there are 10 sexy primes between 100 and 200.
Here's the code:
Python 2.7.9 (default, Jun 21 2019, 00:38:53)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> def isqrt(n): # newton
... x = n; y = (x + 1) // 2
... while y < x:
... x = y
... y = (x + n // x) // 2
... return x
...
>>> def inverse(x, m): # euclid
... a, b, u = 0, m, 1
... while x > 0:
... x,a,b,u=b%x,u,x,a-b//x*u
... if b == 1: return a % m
... return 0 # must be coprime
...
>>> def primes(n): # sieve of eratosthenes
... i, p, ps, m = 0, 3, [2], n // 2
... sieve = [True] * m
... while p <= n:
... if sieve[i]:
... ps.append(p)
... for j in range((p*p-3)/2, m, p):
... sieve[j] = False
... i, p = i+1, p+2
... return ps
...
>>> counter = 0
>>> ps = primes(isqrt(2000000000))[2:]
>>> size = (2000000000-1000000000)/6+1
>>>
>>> # sieve on 6n-1
... lo = 1000000000/6*6+5
>>> sieve = [True] * size
>>> for p in ps:
... q = ((lo+p)*inverse(-6%p,p))%p
... for i in xrange(q,size,p):
... sieve[i] = False
...
>>> for i in xrange(1,size):
... if sieve[i] and sieve[i-1]:
... counter += 1
...
>>> # sieve on 6n+1
... lo += 2
>>> sieve = [True] * size
>>> for i in xrange(0,size):
... sieve[i] = True
...
>>> for p in ps:
... q = ((lo+p)*inverse(-6%p,p))%p
... for i in xrange(q,size,p):
... sieve[i] = False
...
>>> for i in xrange(1,size):
... if sieve[i] and sieve[i-1]:
... counter += 1
...
>>> print counter
5924680
That took about three minutes to run and produced the correct result.
If you are determined to count only consecutive primes that differ by 6, instead of counting all the sexy primes, the easiest way is to use the segmented sieve of the third method and change the predicate in the counting test to look only at ps[i] - ps[i-1] == 6
. Or you can do this in just 22 seconds in Pari/GP:
gettime(); prev = nextprime(1000000000); c = 0;
forprime(p = nextprime(prev+1), 2000000000, if (p-prev==6, c++); prev=p);
print (c); print (gettime());
5317860
22212