0

I have some browse nodes returned form the Amazon API as XML which looks like the output below. How can I walk through this mess/flatten it and extract out the data I need. This is the input:

object(SimpleXMLElement)#72 (1) {
  ["BrowseNode"]=>
  array(2) {
    [0]=>
    object(SimpleXMLElement)#73 (3) {
      ["BrowseNodeId"]=>
      string(10) "1342630031"
      ["Name"]=>
      string(8) "Chargers"
      ["Ancestors"]=>
      object(SimpleXMLElement)#75 (1) {
        ["BrowseNode"]=>
        object(SimpleXMLElement)#76 (3) {
          ["BrowseNodeId"]=>
          string(9) "389516011"
          ["Name"]=>
          string(11) "Accessories"
          ["Ancestors"]=>
          object(SimpleXMLElement)#77 (1) {
            ["BrowseNode"]=>
            object(SimpleXMLElement)#78 (3) {
              ["BrowseNodeId"]=>
              string(9) "389514011"
              ["Name"]=>
              string(38) "Sat Nav, GPS, Navigation & Accessories"
              ["Ancestors"]=>
              object(SimpleXMLElement)#79 (1) {
                ["BrowseNode"]=>
                object(SimpleXMLElement)#80 (4) {
                  ["BrowseNodeId"]=>
                  string(6) "560800"
                  ["Name"]=>
                  string(10) "Categories"
                  ["IsCategoryRoot"]=>
                  string(1) "1"
                  ["Ancestors"]=>
                  object(SimpleXMLElement)#81 (1) {
                    ["BrowseNode"]=>
                    object(SimpleXMLElement)#82 (2) {
                      ["BrowseNodeId"]=>
                      string(6) "560798"
                      ["Name"]=>
                      string(19) "Electronics & Photo"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    [1]=>
    object(SimpleXMLElement)#74 (3) {
      ["BrowseNodeId"]=>
      string(9) "340328031"
      ["Name"]=>
      string(12) "Car Chargers"
      ["Ancestors"]=>
      object(SimpleXMLElement)#75 (1) {
        ["BrowseNode"]=>
        object(SimpleXMLElement)#76 (3) {
          ["BrowseNodeId"]=>
          string(9) "340327031"
          ["Name"]=>
          string(8) "Chargers"
          ["Ancestors"]=>
          object(SimpleXMLElement)#77 (1) {
            ["BrowseNode"]=>
            object(SimpleXMLElement)#78 (3) {
              ["BrowseNodeId"]=>
              string(6) "560826"
              ["Name"]=>
              string(11) "Accessories"
              ["Ancestors"]=>
              object(SimpleXMLElement)#79 (1) {
                ["BrowseNode"]=>
                object(SimpleXMLElement)#80 (3) {
                  ["BrowseNodeId"]=>
                  string(10) "1340509031"
                  ["Name"]=>
                  string(29) "Mobile Phones & Communication"
                  ["Ancestors"]=>
                  object(SimpleXMLElement)#81 (1) {
                    ["BrowseNode"]=>
                    object(SimpleXMLElement)#82 (4) {
                      ["BrowseNodeId"]=>
                      string(6) "560800"
                      ["Name"]=>
                      string(10) "Categories"
                      ["IsCategoryRoot"]=>
                      string(1) "1"
                      ["Ancestors"]=>
                      object(SimpleXMLElement)#83 (1) {
                        ["BrowseNode"]=>
                        object(SimpleXMLElement)#84 (2) {
                          ["BrowseNodeId"]=>
                          string(6) "560798"
                          ["Name"]=>
                          string(19) "Electronics & Photo"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

I want to walk through it and flatten it into a structure I can work with like the following:

array(

    (1342630031,'Chargers'),

    (389516011,'Accessories'),

    (389514011,'Sat Nav, GPS, Navigation & Accessories'),

    (560800,'Categories'),

    (560798,'Electronics & Photo'),

    (340328031,'Car Chargers'),

    (340327031,'Chargers'),

    (560826,'Accessories'),

    (1340509031,'Mobile Phones & Communication'),

    (560800,'Categories'),

    (560798,'Electronics & Photo')

)

This would then allow me to:

echo $array[0][0];

echo $array[0][1];

echo $array[5][1];

Which would give:

1342630031

Chargers

Electronics & Photo

etc...

If it helps here is the original XML

    <?xml version="1.0" encoding="UTF-8"?>
<BrowseNodes>
   <BrowseNode>
      <BrowseNodeId>1342630031</BrowseNodeId>
      <Name>Chargers</Name>
      <Ancestors>
         <BrowseNode>
            <BrowseNodeId>389516011</BrowseNodeId>
            <Name>Accessories</Name>
            <Ancestors>
               <BrowseNode>
                  <BrowseNodeId>389514011</BrowseNodeId>
                  <Name>Sat Nav, GPS, Navigation &amp; Accessories</Name>
                  <Ancestors>
                     <BrowseNode>
                        <BrowseNodeId>560800</BrowseNodeId>
                        <Name>Categories</Name>
                        <IsCategoryRoot>1</IsCategoryRoot>
                        <Ancestors>
                           <BrowseNode>
                              <BrowseNodeId>560798</BrowseNodeId>
                              <Name>Electronics &amp; Photo</Name>
                           </BrowseNode>
                        </Ancestors>
                     </BrowseNode>
                  </Ancestors>
               </BrowseNode>
            </Ancestors>
         </BrowseNode>
      </Ancestors>
   </BrowseNode>
   <BrowseNode>
      <BrowseNodeId>340328031</BrowseNodeId>
      <Name>Car Chargers</Name>
      <Ancestors>
         <BrowseNode>
            <BrowseNodeId>340327031</BrowseNodeId>
            <Name>Chargers</Name>
            <Ancestors>
               <BrowseNode>
                  <BrowseNodeId>560826</BrowseNodeId>
                  <Name>Accessories</Name>
                  <Ancestors>
                     <BrowseNode>
                        <BrowseNodeId>1340509031</BrowseNodeId>
                        <Name>Mobile Phones &amp; Communication</Name>
                        <Ancestors>
                           <BrowseNode>
                              <BrowseNodeId>560800</BrowseNodeId>
                              <Name>Categories</Name>
                              <IsCategoryRoot>1</IsCategoryRoot>
                              <Ancestors>
                                 <BrowseNode>
                                    <BrowseNodeId>560798</BrowseNodeId>
                                    <Name>Electronics &amp; Photo</Name>
                                 </BrowseNode>
                              </Ancestors>
                           </BrowseNode>
                        </Ancestors>
                     </BrowseNode>
                  </Ancestors>
               </BrowseNode>
            </Ancestors>
         </BrowseNode>
      </Ancestors>
   </BrowseNode>
</BrowseNodes>
Ben Paton
  • 1,432
  • 9
  • 35
  • 59
  • DOMDocument (http://php.net/manual/en/class.domdocument.php) and DOMXpath (http://php.net/manual/en/class.domxpath.php) are your friend – Jake N Nov 22 '16 at 22:15
  • I've not used either of those before. A little bit more of a pointer of where to start would be useful – Ben Paton Nov 22 '16 at 22:44
  • Possible duplicate of [How to convert xml into array in php?](http://stackoverflow.com/questions/6578832/how-to-convert-xml-into-array-in-php) – Jason Yost Nov 23 '16 at 00:12

4 Answers4

1

Using Xpath is the most easy way to read data from an XML document. You use one expression to iterate the items and several to extract the data for each item.

$document = new DOMDocument();
$document->loadXml($xml);
$xpath = new DOMXpath($document);

$result = [];
foreach($xpath->evaluate('//BrowseNode[BrowseNodeId]') as $browseNode) {
  $id = $xpath->evaluate('string(BrowseNodeId)', $browseNode);
  if (array_key_exists($id, $result)) {
    continue;
  }
  $result[$id] = [
    'id' => $id,
    'name' => $xpath->evaluate('string(Name)', $browseNode)
  ];
}

var_dump($result);

Output:

array(9) {
  [1342630031]=>
  array(2) {
    ["id"]=>
    string(10) "1342630031"
    ["name"]=>
    string(8) "Chargers"
  }
  [389516011]=>
  array(2) {
    ["id"]=>
    string(9) "389516011"
    ["name"]=>
    string(11) "Accessories"
  }
  ...
}

//BrowseNode[BrowseNodeId] fetches any BrowseNode element in the document what has a child node BrowseNodeId. string(BrowseNodeId) is executed in the context of a node, it return all BrowseNodeId children and casts the first into an string (an empty string if no node is found).

By using the id as the key of the array, duplicates will be eliminated.

ThW
  • 19,120
  • 3
  • 22
  • 44
0

This is a bit ugly but flattens it into a structure I can work with, not quite the output I wanted but maybe close enough to use.

$json = json_encode($xml);

$array = json_decode($json,TRUE);

$it = new RecursiveIteratorIterator(new RecursiveArrayIterator($array));

foreach($it as $v) {

    $values[] = $v;

}
Ben Paton
  • 1,432
  • 9
  • 35
  • 59
0
$DOM = new DOMDocument();
$DOM->loadHTML($xml);

$XPATH = new DOMXpath($DOM);

// Gets all BrowseNodeId anywhere within the document
$r = $XPATH->query("//BrowseNodeId");

// Gets only BrowseNodeIds that re directly below a BrowseNodes and then a BrowseNodes
$r = $XPATH->query("/BrowseNodes/BrowseNode/BrowseNodeId");

You probably want to use the first Xpath query to get all the Ids elements.

$r = $XPATH->query("//BrowseNodeId");

foreach ($r as $element) { // $element will be a DOMElement object
     $original = $element;
     while($element->nextSibling != null) { 
          if("Name" == $element->tagName) {
                echo "The ID for " . $element->nodeValue . " is " . $original->nodeValue;
          }
          $element = $element->nextSibling;
     }
}

That gives you a start / idea at least.

Its untested.

Jake N
  • 10,535
  • 11
  • 66
  • 112
0

Consider XSLT to flatten source XML and then loop through result to populate your array:

// Load the XML source and XSLT string
$doc = simplexml_load_file('Input.xml');

$xslstr = '<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
             <xsl:output version="1.0" encoding="UTF-8" indent="yes" />
             <xsl:strip-space elements="*"/>      
             <xsl:template match="/BrowseNodes">
                <xsl:copy>            
                   <xsl:apply-templates select="descendant::BrowseNodeId"/>
                </xsl:copy>
             </xsl:template>      
             <xsl:template match="BrowseNodeId">
                <data>            
                    <xsl:copy-of select="."/>
                    <xsl:copy-of select="following-sibling::Name"/>
                </data>
            </xsl:template>  
          </xsl:transform>';
$xsl = new SimpleXMLElement($xslstr);

// Configure and run the transformer
$proc = new XSLTProcessor;
$proc->importStyleSheet($xsl); 
$newXML = $proc->transformToXML($doc);

// Populate flattened array
$output = new SimpleXMLElement($newXML);

values = [];
foreach ($output->data as $line){
    $inner = [];
    $inner[] = (string)$line->BrowseNodeId;
    $inner[] = (string)$line->Name;
    $values[] = $inner;
}

New XML

<?xml version="1.0" encoding="UTF-8"?>
<BrowseNodes>
  <data>
    <BrowseNodeId>1342630031</BrowseNodeId>
    <Name>Chargers</Name>
  </data>
  <data>
    <BrowseNodeId>389516011</BrowseNodeId>
    <Name>Accessories</Name>
  </data>
  <data>
    <BrowseNodeId>389514011</BrowseNodeId>
    <Name>Sat Nav, GPS, Navigation &amp; Accessories</Name>
  </data>
  <data>
    <BrowseNodeId>560800</BrowseNodeId>
    <Name>Categories</Name>
  </data>
  <data>
    <BrowseNodeId>560798</BrowseNodeId>
    <Name>Electronics &amp; Photo</Name>
  </data>
  <data>
    <BrowseNodeId>340328031</BrowseNodeId>
    <Name>Car Chargers</Name>
  </data>
  <data>
    <BrowseNodeId>340327031</BrowseNodeId>
    <Name>Chargers</Name>
  </data>
  <data>
    <BrowseNodeId>560826</BrowseNodeId>
    <Name>Accessories</Name>
  </data>
  <data>
    <BrowseNodeId>1340509031</BrowseNodeId>
    <Name>Mobile Phones &amp; Communication</Name>
  </data>
  <data>
    <BrowseNodeId>560800</BrowseNodeId>
    <Name>Categories</Name>
  </data>
  <data>
    <BrowseNodeId>560798</BrowseNodeId>
    <Name>Electronics &amp; Photo</Name>
  </data>
</BrowseNodes>

Values Array

array(11) {
  [0]=>
  array(2) {
    [0]=>
    string(10) "1342630031"
    [1]=>
    string(8) "Chargers"
  }
  [1]=>
  array(2) {
    [0]=>
    string(9) "389516011"
    [1]=>
    string(11) "Accessories"
  }
  [2]=>
  array(2) {
    [0]=>
    string(9) "389514011"
    [1]=>
    string(38) "Sat Nav, GPS, Navigation & Accessories"
  }
  [3]=>
  array(2) {
    [0]=>
    string(6) "560800"
    [1]=>
    string(10) "Categories"
  }
  [4]=>
  array(2) {
    [0]=>
    string(6) "560798"
    [1]=>
    string(19) "Electronics & Photo"
  }
  [5]=>
  array(2) {
    [0]=>
    string(9) "340328031"
    [1]=>
    string(12) "Car Chargers"
  }
  [6]=>
  array(2) {
    [0]=>
    string(9) "340327031"
    [1]=>
    string(8) "Chargers"
  }
  [7]=>
  array(2) {
    [0]=>
    string(6) "560826"
    [1]=>
    string(11) "Accessories"
  }
  [8]=>
  array(2) {
    [0]=>
    string(10) "1340509031"
    [1]=>
    string(29) "Mobile Phones & Communication"
  }
  [9]=>
  array(2) {
    [0]=>
    string(6) "560800"
    [1]=>
    string(10) "Categories"
  }
  [10]=>
  array(2) {
    [0]=>
    string(6) "560798"
    [1]=>
    string(19) "Electronics & Photo"
  }
}
Parfait
  • 104,375
  • 17
  • 94
  • 125