Following snippet will attempt to read & parse recursively damaged serialized string (blob data). For example if you stored into database column string too long and it got cut off. Numeric primitives and bool are guaranteed to be valid, strings may be cut off and/or array keys may be missing. The routine may be useful e.g. if recovering significant (not all) part of data is sufficient solution to you.
class Unserializer
{
/**
* Parse blob string tolerating corrupted strings & arrays
* @param string $str Corrupted blob string
*/
public static function parseCorruptedBlob(&$str)
{
// array pattern: a:236:{...;}
// integer pattern: i:123;
// double pattern: d:329.0001122;
// boolean pattern: b:1; or b:0;
// string pattern: s:14:"date_departure";
// null pattern: N;
// not supported: object O:{...}, reference R:{...}
// NOTES:
// - primitive types (bool, int, float) except for string are guaranteed uncorrupted
// - arrays are tolerant to corrupted keys/values
// - references & objects are not supported
// - we use single byte string length calculation (strlen rather than mb_strlen) since source string is ISO-8859-2, not utf-8
if(preg_match('/^a:(\d+):{/', $str, $match)){
list($pattern, $cntItems) = $match;
$str = substr($str, strlen($pattern));
$array = [];
for($i=0; $i<$cntItems; ++$i){
$key = self::parseCorruptedBlob($str);
if(trim($key)!==''){ // hmm, we wont allow null and "" as keys..
$array[$key] = self::parseCorruptedBlob($str);
}
}
$str = ltrim($str, '}'); // closing array bracket
return $array;
}elseif(preg_match('/^s:(\d+):/', $str, $match)){
list($pattern, $length) = $match;
$str = substr($str, strlen($pattern));
$val = substr($str, 0, $length + 2); // include also surrounding double quotes
$str = substr($str, strlen($val) + 1); // include also semicolon
$val = trim($val, '"'); // remove surrounding double quotes
if(preg_match('/^a:(\d+):{/', $val)){
// parse instantly another serialized array
return (array) self::parseCorruptedBlob($val);
}else{
return (string) $val;
}
}elseif(preg_match('/^i:(\d+);/', $str, $match)){
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (int) $val;
}elseif(preg_match('/^d:([\d.]+);/', $str, $match)){
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (float) $val;
}elseif(preg_match('/^b:(0|1);/', $str, $match)){
list($pattern, $val) = $match;
$str = substr($str, strlen($pattern));
return (bool) $val;
}elseif(preg_match('/^N;/', $str, $match)){
$str = substr($str, strlen('N;'));
return null;
}
}
}
// usage:
$unserialized = Unserializer::parseCorruptedBlob($serializedString);