I hope that @Greybeard will and @tyghn will permit me a second solution - because it is recursive and different from the others. It is unashamedly influenced by Greybeard's prompt to look for "Divide-and-Conquer" algorithms: things like mergeSort.
NOTE: I had hoped to end up with an O(N^2.log(N)) solution, but, sadly, each subdivision still ends up doing O(N^2) work, so the overall algorithm is still O(N^3). In fact, it is slower than my O(N^3) dynamic-programming solution. I leave the solution here in the hope that others might be able to see how to make the "conquer" bit of divide-and-conquer do less work on a smaller set of rows.
The idea, like many divide-and-conquer techniques, is to repeatedly divide the domain into 2 and then merge optimals.
Here, we successively divide the matrix into upper (1) and lower (2) layers, with top and bottom rows being i1 and i2:
--------------------- i1
upper (1)
.....................
lower (2)
--------------------- i2
In EACH layer we calculate "best" arrays B[][], U[][], N[][], L[][] of different shapes, where B[j1][j2] etc. means the best starting in column j1 and finishing in column j2. The shapes of these are:
+---+ + + +---+ + +
B: | | U: | | N: | | L: | |
+---+ +---+ + + + +
Note that U and N do not need "drop lines" - they could consist of just horizontal lines. However, they must extend to the
top or bottom of layer, whilst L extends through the whole depth.
EDIT: actually, for B we only need to retain a scalar from each layer, because it is not necessary to merge it across layers; this is done in the edited code below. (Reduces the number of arrays and makes code slightly simpler and faster.)
When we come to "merge" the layers then there are only a small number of candidates. e.g. for B
-------------------------------------------------
+---+
Either |B1 | +---+
+---+ | |
.........................................| | N1 + U2
+---+ | |
or |B2 | or +---+
+---+
------------------------------------------------
and similarly for the other shapes.
When you have divided down to a single row then the evaluation of B, U, N, L for each is trivial.
Below is the code, with some checking routines and a random solution.
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <cstdlib>
#include <ctime>
#include <utility>
#include <limits>
using namespace std;
using matrix = vector<vector<int>>;
const int LARGE_NEGATIVE = numeric_limits<int>::min();
//------------------------------------------------------------------------------
ostream &operator << ( ostream &out, const matrix &M )
{
for ( auto &row : M )
{
for ( auto e : row ) out << setw( 10 ) << e << " ";
out << '\n';
}
return out;
}
//------------------------------------------------------------------------------
matrix transpose( const matrix &M )
{
int m = M.size(), n = M[0].size();
matrix result( n, vector<int>( m ) );
for ( int i = 0; i < m; i++ )
{
for ( int j = 0; j < n; j++ ) result[j][i] = M[i][j];
}
return result;
}
//------------------------------------------------------------------------------
matrix fillRand( int m, int n, int lower, int higher )
{
matrix M( m, vector<int>( n ) );
for ( int i = 0; i < m; i++ )
{
for ( int j = 0; j < n; j++ ) M[i][j] = lower + rand() % ( higher - lower + 1 );
}
return M;
}
//------------------------------------------------------------------------------
int allShapes( matrix &M, int i1, int i2, int n, matrix &U, matrix &N, matrix &L )
{
int B;
if ( i2 == i1 ) // Single row
{
vector<int> sumLeft( n );
sumLeft[0] = M[i1][0];
for ( int j2 = 1; j2 < n; j2++ )
{
sumLeft[j2] = sumLeft[j2-1] + M[i1][j2];
for ( int j1 = 0; j1 < j2; j1++ )
{
U[j1][j2] = N[j1][j2] = sumLeft[j2] - ( j1 > 0 ? sumLeft[j1-1] : 0 );
L[j1][j2] = M[i1][j1] + M[i2][j2];
}
}
B = LARGE_NEGATIVE; // No B's from a single row.
}
else // Split into upper and lower layers
{
// *** Divide and "conquer" (not quite, sadly - always do O(n^2) work) ***
// Workspace - potentially slow to set up multiple times
matrix U1, U2, N1, N2, L1, L2;
U1 = U2 = N1 = N2 = L1 = L2 = matrix( n, vector<int>( n ) );
int im = i1 + ( i2 - i1 ) / 2;
int B1 = allShapes( M, i1 , im, n, U1, N1, L1 );
int B2 = allShapes( M, im + 1, i2, n, U2, N2, L2 );
B = max( B1, B2 );
// *** Merge ***
for ( int j2 = 1; j2 < n; j2++ )
{
for ( int j1 = 0; j1 < j2; j1++ )
{
B = max( B, N1[j1][j2] + U2[j1][j2] );
U[j1][j2] = max( U1[j1][j2], U2[j1][j2] + L1[j1][j2] );
N[j1][j2] = max( N2[j1][j2], N1[j1][j2] + L2[j1][j2] );
L[j1][j2] = L1[j1][j2] + L2[j1][j2];
}
}
}
return B;
}
//------------------------------------------------------------------------------
int maxPerimeterSum( matrix &M )
{
int m = M.size(), n = M[0].size();
// Transpose if necessary to work with smaller maximum width (==> less memory needed)
if ( m < n )
{
M = transpose( M );
int t = m; m = n; n = t;
}
// Main arrays
matrix( n, vector<int>( n ) );
matrix U, N, L; U = N = L = matrix( n, vector<int>( n ) );
// Call master routine
return allShapes( M, 0, m - 1, n, U, N, L );
}
//------------------------------------------------------------------------------
int bruteForce( const vector<vector<int>> &M )
{
int m = M.size(), n = M[0].size();
vector<vector<int>> sumLeft( m, vector<int>( n, 0 ) ), sumAbove( m, vector<int>( n, 0 ) );
for ( int i = 0; i < m; i++ )
{
sumLeft[i][0] = M[i][0];
for ( int j = 1; j < n; j++ ) sumLeft[i][j] = sumLeft[i][j-1] + M[i][j];
}
for ( int j = 0; j < n; j++ )
{
sumAbove[0][j] = M[0][j];
for ( int i = 1; i < m; i++ ) sumAbove[i][j] = sumAbove[i-1][j] + M[i][j];
}
int best = M[0][0] + M[0][1] + M[1][0] + M[1][1];
for ( int i1 = 0; i1 < m - 1; i1++ )
{
for ( int i2 = i1 + 1; i2 < m; i2++ )
{
for ( int j1 = 0; j1 < n - 1; j1++ )
{
for ( int j2 = j1 + 1; j2 < n; j2++ )
{
int candidate = sumLeft[i1][j2] + sumLeft[i2][j2];
if ( j1 > 0 ) candidate -= ( sumLeft[i1][j1-1] + sumLeft[i2][j1-1] );
if ( i2 - 1 > i1 ) candidate += ( sumAbove[i2-1][j1] - sumAbove[i1][j1] + sumAbove[i2-1][j2] - sumAbove[i1][j2] );
if ( candidate > best ) best = candidate;
}
}
}
}
return best;
}
//------------------------------------------------------------------------------
void check( matrix &M )
{
int m = M.size(), n = M[0].size();
cout << "Case: m = " << m << " n = " << n << "\n\n";
// if ( m <= 5 && n <= 5 ) cout << "Matrix:\n" << M << "\n\n";
cout << "Main solver: " << maxPerimeterSum( M ) << '\n'; // <==== O(N^3) solution
cout << "Not-quite brute force: " << bruteForce ( M ) << '\n'; // <==== O(N^4) solution
cout << "\n=======\n\n";
}
//------------------------------------------------------------------------------
int main()
{
matrix M;
// Example 1 (first example of Motomotes)
M = { { 2, -8, -9, -2, 8 },
{ 6, 0, 2, 7, 4 },
{ -6, -8, -4, -4, 1 },
{ 0, -8, -1, 4, 4 } };
check( M );
// Example 2 (second example of Motomotes)
M = { { 9, -2, -1, 3 },
{ -10, -5, 1, -4 },
{ 1, -1, 12, -2 },
{ 3, 8, 0, -1 },
{ 2, 2, -1, 2 } };
check( M );
// Examples of random matrices
const int lower = -100, higher = 100;
srand( time( 0 ) );
vector<pair<int,int>> tests = { { 2, 2 }, { 2, 4 }, { 4, 2 }, { 10, 10 }, { 100, 50 }, { 50, 100 }, { 500, 500 } };
for ( pair<int,int> pr : tests )
{
M = fillRand( pr.first, pr.second, lower, higher );
check( M );
}
}
Output:
Case: m = 4 n = 5
Main solver: 22
Not-quite brute force: 22
=======
Case: m = 5 n = 4
Main solver: 23
Not-quite brute force: 23
=======
Case: m = 2 n = 2
Main solver: -64
Not-quite brute force: -64
=======
Case: m = 2 n = 4
Main solver: 129
Not-quite brute force: 129
=======
Case: m = 4 n = 2
Main solver: 16
Not-quite brute force: 16
=======
Case: m = 10 n = 10
Main solver: 1059
Not-quite brute force: 1059
=======
Case: m = 100 n = 50
Main solver: 3448
Not-quite brute force: 3448
=======
Case: m = 50 n = 100
Main solver: 4457
Not-quite brute force: 4457
=======
Case: m = 500 n = 500
Main solver: 11369
Not-quite brute force: 11369
=======