2

I'm fetching some data and I want to group / split the results based on multiple field values. A similar question has been asked, about grouping results based on a single field.

In the answers there is also a solution on how to split on multiple fields. But I was trying to find a way how to do this without recursion. I came up with 2 solutions, one using a "fieldValue1.fieldValue2" used a result array keys or using ... eval.

The I also included the third solution from the answers linked above.

Code:

<?php

$data = array(
    (object) array(
        'name' => 'plastic',
        'price' => 99,
        'type' => 'processed',
        'importance' => 12,
    ),
    (object) array(
        'name' => 'water',
        'price' => 89,
        'type' => 'raw',
        'importance' => 11,
    ),
    (object) array(
        'name' => 'glass',
        'price' => 100,
        'type' => 'processed',
        'importance' => 11,
    ),
    (object) array(
        'name' => 'time',
        'price' => 1,
        'type' => 'raw',
        'importance' => 11,
    ),
);

$splitFields = ['type', 'importance'];

$solution1 = array(
    'processed.12' => array(
        (object) array(
            'name' => 'plastic',
            'price' => 99,
            'type' => 'processed',
            'importance' => 12,
        ),
    ),

    'processed.11' => array(
        (object) array(
            'name' => 'glass',
            'price' => 100,
            'type' => 'processed',
            'importance' => 11,
        )
    ),
    'raw.11' => array(
        (object)  array(
            'name' => 'water',
            'price' => 89,
            'type' => 'raw',
            'importance' => 11,
        ),
        (object) array(
            'name' => 'time',
            'price' => 1,
            'type' => 'raw',
            'importance' => 11,
        )
    ),
);

$solution2 = array(
    'processed' => array(
        12 => array(
            (object) array(
                'name' => 'plastic',
                'price' => 99,
                'type' => 'processed',
                'importance' => 12,
            )
        ),
        11 => array(
            (object) array(
                'name' => 'glass',
                'price' => 100,
                'type' => 'processed',
                'importance' => 11,
            )
        )
    ),
    'raw' => array(
        11 => array(
            (object)  array(
                'name' => 'water',
                'price' => 89,
                'type' => 'raw',
                'importance' => 11,
            ),
            (object) array(
                'name' => 'time',
                'price' => 1,
                'type' => 'raw',
                'importance' => 11,
            )
        )
    ),
);




abstract class Base
{
    protected $data;
    protected $splitFields;

    abstract protected function getResults();

    public function __construct(array $data)
    {
        $this->data = $data;
    }

    public function splitBy(string ...$fields): array
    {
        if (empty($fields))
            return $this->data;

        $this->splitFields = $fields;
        return $this->getResults();
    }


    public function isValid(object $fields): bool
    {
        foreach ($this->splitFields as $field)
            if (!isset($fields->$field))
                return false;
        return true;
    }
};


$splitBy = ['type', 'importance'];


// solution #1: Use a combined key
class CombinedKey extends Base
{
    protected function getResults(): array
    {
        $return = [];
        foreach ($this->data as $fields)
            if ($this->isValid($fields))
                $return[$this->_getKey($fields)][] = $fields;
        return $return;
    }

    private function _getKey(object $fields): string
    {
        $key = '';

        foreach ($this->splitFields as $i => $field) {
            $key .= $i ? '.' : '';
            $key .= $fields->$field;
        }

        return $key;
    }
}


// solution #2: Use eval -_-
class UseEval extends Base
{
    protected function getResults(): array
    {
        $return = [];
        foreach ($this->data as $fields)
            if ($this->isValid($fields))
                eval($this->_getEval($fields));
        return $return;
    }

    private function _getEval(object $fields): string
    {
        $evalString = '$return';

        foreach ($this->splitFields as $field)
            $evalString .= "['" . addslashes($fields->$field) . "']";

        $evalString .= '[]';
        return "$evalString = \$fields;";
    }
}


// credits: https://stackoverflow.com/a/37559218/12640521 
function array_group_by($arr, array $keys)
{

    if (!is_array($arr)) {
        trigger_error('array_group_by(): The first argument should be an array', E_USER_ERROR);
    }
    if (count($keys) == 0) {
        trigger_error('array_group_by(): The Second argument Array can not be empty', E_USER_ERROR);
    }

    // Load the new array, splitting by the target key
    $grouped = [];
    foreach ($arr as $value) {
        $grouped[$value->{$keys[0]}][] = $value;
    }

    // Recursively build a nested grouping if more parameters are supplied
    // Each grouped array value is grouped according to the next sequential key
    if (count($keys) > 1) {
        foreach ($grouped as $key => $value) {
            $parms = array_merge([$value], [array_slice($keys, 1, count($keys))]);
            $grouped[$key] = call_user_func_array('array_group_by', $parms);
        }
    }

    return $grouped;
}

// test solution #1
$dataSplitter = new CombinedKey($data);
$result = $dataSplitter->splitBy(...$splitFields);
if ($result == $solution1)
    echo 'Solution 1 works' . PHP_EOL;

// test solution #2
$dataSplitter = new UseEval($data);
$result = $dataSplitter->splitBy(...$splitFields);
if ($result == $solution2)
    echo 'Solution 2 works' . PHP_EOL;

// test solution #3
$result = array_group_by($data, $splitFields);
if ($result == $solution2)
    echo 'Solution 3 works' . PHP_EOL;

die(PHP_EOL);

Questions:

  1. Is it possible to sort values into a multidimensional array based on data field values without or using eval?
  2. Would this be considered a non evil usage of eval?

Bonus questions:

  1. Is the addslashes in solution #2 sufficient or should there be more escaping?
  2. Is using eval in this context actually more performant than using recursion? This is a simple assignment so I don't think any JIT optimization in php 8 would be lost nor opcaching since it is a single assignment statement?

Update: I tested all this solutions:

$start = hrtime(true);
...
$end = hrtime(true);

#1 TOOK: 21863290
#2 TOOK: 123387381
#3 TOOK: 19273249
# of ITERATIONS: 10000

#PHP 8.0.0alpha2 (cli) (built: Jul  9 2020 13:32:05) ( ZTS )
#Copyright (c) The PHP Group
#Zend Engine v4.0.0-dev, Copyright (c) Zend Technologies
#    with Zend OPcache v8.0.0alpha2, Copyright (c), by Zend Technologies

It appears recursion #3 is none the less the faster. Although, this may be due to the fact I'm instantiating a class in #1 and #2.

Update: Replaced solution #3 with a class adaptation that performs the same validation checks as #1 an #2.

// solution #3: based on https://stackoverflow.com/a/37559218/12640521
class UseRecursion extends Base
{
    protected function getResults(): array
    {
        return $this->_getResults($this->data, $this->splitFields);
    }

    private function _getResults(array $data, array $fieldsToCheck)
    {

        $grouped = [];
        foreach ($data as $fields)
            if ($this->isValid($fields))
                $grouped[$fields->{$fieldsToCheck[0]}][] = $fields;

        if (count($fieldsToCheck) == 1)
            return $grouped;

        array_shift($fieldsToCheck);
        foreach ($grouped as $key => $fields)
            $grouped[$key] = $this->_getResults($fields, $fieldsToCheck);

        return $grouped;
    }
}

// Benchmarks: 
#1 TOOK: 21978272
#2 TOOK: 127363531
#3 TOOK: 26720230
# of ITERATIONS: 10000

Update: As per @yolenoyer comment, that the performance hit in #2 may be because of string operations, I commented out the eval constructor to see how long it took to just build the string ie. the solution is not viable anymore but we get to see how long it took to build the strings.

#1 TOOK: 21506419
#2 TOOK: 24446435
#3 TOOK: 28065878
# of ITERATIONS: 10000

Note: As an answer to this question already exist I think this question might be closed. I think it should stay open since it does ask specifically about a multi field search. If a question dedicated to this problem already exists I have no problem deleting this question.

  • 1
    The performance difference is not because you created a class, it is a super fast operation (depending on what is inside the constructor of course); performance difference may be better explained because many string operations are performed in solution 2. – yolenoyer Jul 16 '20 at 06:19
  • @yolenoyer Replaced #3 with an object oriented solution. I also noticed that I performed ->isValid in #1 and #2 and not in #3 which probably also contributed to the performance degradation. –  Jul 16 '20 at 06:40

0 Answers0