php序列化serialize() 与反序列化unserialize()

时间：2022-06-24 17:24:06 编辑：袖梨来源：一聚教程网

把复杂的数据类型压缩到一个字符串中

serialize() 把变量和它们的值编码成文本形式

unserialize() 恢复原先变量

eg:

代码如下

复制代码

$stooges = array('Moe','Larry','Curly');
$new = serialize($stooges);
print_r($new);
echo "
";
print_r(unserialize($new));

结果：
a:3:{i:0;s:3:"Moe";i:1;s:5:"Larry";i:2;s:5:"Curly";}
Array ( [0] => Moe [1] => Larry [2] => Curly )

当把这些序列化的数据放在URL中在页面之间会传递时，需要对这些数据调用urlencode()，以确保在其中的URL元字符进行处理：

代码如下

复制代码

$shopping = array('Poppy seed bagel' => 2,'Plain Bagel' =>1,'Lox' =>4);

echo 'next';

margic_quotes_gpc和magic_quotes_runtime配置项的设置会影响传递到unserialize()中的数据。

如果magic_quotes_gpc项是启用的，那么在URL、POST变量以及cookies中传递的数据在反序列化之前必须用stripslashes()进行处理：

代码如下

复制代码

$new_cart = unserialize(stripslashes($cart));

//如果magic_quotes_gpc开启
$new_cart = unserialize($cart);

如果magic_quotes_runtime是启用的，那么在向文件中写入序列化的数据之前必须用addslashes()进行处理，而在读取它们之前则必须用stripslashes()进行处理：

代码如下

复制代码

$fp = fopen('/tmp/cart','w');

fputs($fp,addslashes(serialize($a)));

fclose($fp);

//如果magic_quotes_runtime开启
$new_cat = unserialize(stripslashes(file_get_contents('/tmp/cart')));

//如果magic_quotes_runtime关闭
$new_cat = unserialize(file_get_contents('/tmp/cart'));

在启用了magic_quotes_runtime的情况下，从数据库中读取序列化的数据也必须经过stripslashes()的处理，保存到数据库中的序列化数据必须要经过addslashes()的处理，以便能够适当地存储。

代码如下

复制代码

mysql_query("insert into cart(id,data) values(1,'".addslashes(serialize($cart))."')");

$rs = mysql_query('select data from cart where id=1');

$ob = mysql_fetch_object($rs);

//如果magic_quotes_runtime开启
$new_cart = unserialize(stripslashes($ob->data));

//如果magic_quotes_runtime关闭
$new_cart = unserialize($ob->data);

当对一个对象进行反序列化操作时，PHP会自动地调用其__wakeUp()方法。这样就使得对象能够重新建立起序列化时未能保留的各种状态。例如：数据库连接等。

用例子给你说明一下

代码如下

复制代码

//声明一个类

class dog {

var $name;

var $age;

var $owner;

function dog($in_name="unnamed",$in_age="0",$in_owner="unknown") {

$this->name = $in_name;

$this->age = $in_age;

$this->owner = $in_owner;

}

function getage() {

return ($this->age * 365);

}

function getowner() {

return ($this->owner);

}

function getname() {

return ($this->name);

}

//实例化这个类

$ourfirstdog = new dog("Rover",12,"Lisa and Graham");

//用serialize函数将这个实例转化为一个序列化的字符串

$dogdisc = serialize($ourfirstdog);

print $dogdisc; //$ourfirstdog 已经序列化为字符串 O:3:"dog":3:{s:4:"name";s:5:"Rover";s:3:"age";i:12;s:5:"owner";s:15:"Lisa and Graham";}

-----------------------------------------------------------------------------------------

在这里你可以将字符串 $dogdisc 存储到任何地方如 session,cookie,数据库,php文件

-----------------------------------------------------------------------------------------

//我们在此注销这个类

unset($ourfirstdog);

b.php

//声明一个类

class dog {

var $name;

var $age;

var $owner;

function dog($in_name="unnamed",$in_age="0",$in_owner="unknown") {

$this->name = $in_name;

$this->age = $in_age;

$this->owner = $in_owner;

}

function getage() {

return ($this->age * 365);

}

function getowner() {

return ($this->owner);

}

function getname() {

return ($this->name);

}

/*还原操作 */

-----------------------------------------------------------------------------------------

在这里将字符串 $dogdisc 从你存储的地方读出来如 session,cookie,数据库,php文件

-----------------------------------------------------------------------------------------

$dogdisc='O:3:"dog":3:{s:4:"name";s:5:"Rover";s:3:"age";i:12;s:5:"owner";s:15:"Lisa and Graham";}';

//我们在这里用 unserialize() 还原已经序列化的对象

$pet = unserialize($dogdisc); //此时的 $pet 已经是前面的 $ourfirstdog 对象了

//获得年龄和名字属性

$old = $pet->getage();

$name = $pet->getname();

//这个类此时无需实例化可以继续使用,而且属性和值都是保持在序列化之前的状态

print "Our first dog is called $name and is $old days old
";

序列化与反序列化语法解析不一致带来的安全隐患

. PHP string serialize() 相关源码分析
------------------------------------

代码如下

复制代码

static inline void php_var_serialize_string(smart_str *buf, char *str, int len) /* {{{ */
{
smart_str_appendl(buf, "s:", 2);
smart_str_append_long(buf, len);
smart_str_appendl(buf, ":＼"", 2);
smart_str_appendl(buf, str, len);
smart_str_appendl(buf, "＼";", 2);
}

通过上面的代码片段可以看到 serialize() 对 string 序列化处理方式如下：

代码如下	复制代码
$str = 'ryatsyne'; var_dump(serialize($str)); // $str serialized string output // s:8:"ryatsyne";

ii. PHP string unserialize() 相关源码分析
---------------------------------------

unserialize() 函数对 string 的反序列化则分为两种，一种是对 `s:` 格式的序列化 string 进行处理：

代码如下

复制代码

switch (yych) {
...
case 's': goto yy9;
...
yy9:
yych = *(YYMARKER = ++YYCURSOR);
if (yych == ':') goto yy46;
goto yy3;
...
yy46:
yych = *++YYCURSOR;
if (yych == '+') goto yy47;
if (yych <= '/') goto yy18;
if (yych <= '9') goto yy48;
goto yy18;
yy47:
yych = *++YYCURSOR;
if (yych <= '/') goto yy18;
if (yych >= ':') goto yy18;
yy48:
++YYCURSOR;
if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if (yych <= '/') goto yy18;
if (yych <= '9') goto yy48;
if (yych >= ';') goto yy18;
yych = *++YYCURSOR;
if (yych != '"') goto yy18;
++YYCURSOR;
{
size_t len, maxlen;
char *str;

len = parse_uiv(start + 2);
maxlen = max - YYCURSOR;
if (maxlen < len) {
*p = start + 2;
return 0;
}

str = (char*)YYCURSOR;

YYCURSOR += len;

if (*(YYCURSOR) != '"') {
*p = YYCURSOR;
return 0;
}
// 确保格式为 s:x:"x"

YYCURSOR += 2;
*p = YYCURSOR;
// 注意这里，*p 指针直接后移了两位，也就是说没有判断 " 后面是否为 ;

INIT_PZVAL(*rval);
ZVAL_STRINGL(*rval, str, len, 1);
return 1;

另一种是对 S: 格式的序列 string 进行处理（此格式在 serialize() 函数序列化处理中并没有定义）：

代码如下

复制代码

static char *unserialize_str(const unsigned char **p, size_t *len, size_t maxlen)
{
size_t i, j;
char *str = safe_emalloc(*len, 1, 1);
unsigned char *end = *(unsigned char **)p+maxlen;

if (end < *p) {
efree(str);
return NULL;
}

for (i = 0; i < *len; i++) {
if (*p >= end) {
efree(str);
return NULL;
}
if (**p != '＼＼') {
str[i] = (char)**p;
} else {
unsigned char ch = 0;

for (j = 0; j < 2; j++) {
(*p)++;
if (**p >= '0' && **p <= '9') {
ch = (ch << 4) + (**p -'0');
} else if (**p >= 'a' && **p <= 'f') {
ch = (ch << 4) + (**p -'a'+10);
} else if (**p >= 'A' && **p <= 'F') {
ch = (ch << 4) + (**p -'A'+10);
} else {
efree(str);
return NULL;
}
}
str[i] = (char)ch;
}
(*p)++;
}
str[i] = 0;
*len = i;
return str;
}
// 上面的函数是对＼72＼79＼61＼74＼73＼79＼6e＼65 这样十六进制形式字符串进行转换
...
switch (yych) {
...
case 'S': goto yy10;
// 处理过程与 s: 相同
if ((str = unserialize_str(&YYCURSOR, &len, maxlen)) == NULL) {
return 0;
}
// 处理过程与 s: 相同

从上面的代码片段可以看到 unserialize() 对序列化后的 string 反序列化处理如下：

代码如下

复制代码

$str1 = 's:8:"ryatsyne";';
$str2 = 's:8:"ryatsyne"t';
$str3 = 'S:8:"＼72＼79＼61＼74＼73＼79＼6e＼65"';
var_dump(unserialize($str));
// $str1, $str2 and $str3 unserialized string output
// ryatsyne;

iii. 语法解析处理不一致导致的安全隐患
-----------------------------

从上述分析过程可以看到 PHP 在反序列化 string 时没有严格按照序列化格式 s:x:"x"; 进行处理，没有对 " 后面的是否存在 ; 进行判断，同时增加了对十六进制形式字符串的处理，这样前后处理的不一致让人很费解，同时由于 PHP 手册中对此没有详细的说明，大部分程序员对此处理过程并不了解，这可能导致其在编码过程中出现疏漏，甚至导致严重的安全问题。

回到文章开头提到的 IPB 漏洞上，利用这个 funny feature of PHP 可以很容易的 bypass safeUnserialize() 函数的过滤：）

代码如下

复制代码

* mixed safe_unserialize(string $serialized)
* Safely unserialize, that is only unserialize string, numbers and arrays, not objects
*
* @license Public Domain
* @author dcz (at) phpbb-seo (dot) com
*/
static public function safeUnserialize( $serialized )
{
// unserialize will return false for object declared with small cap o
// as well as if there is any ws between O and :
if ( is_string( $serialized ) && strpos( $serialized, "＼0" ) === false )
{
if ( strpos( $serialized, 'O:' ) === false )
{
// the easy case, nothing to worry about
// let unserialize do the job
return @unserialize( $serialized );
}
else if ( ! preg_match('/(^|;|{|})O:[+＼-0-9]+:"/', $serialized ) )
{
// in case we did have a string with O: in it,
// but it was not a true serialized object
return @unserialize( $serialized );
}
}

return false;
}

// a:1:{s:8:"ryatsyne"tO:8:"ryatsyne":0:{}}
// 只要构造类似的序列化字符串就可以轻易突破这里的过滤了

推荐专题

最新下载

热门教程

php序列化serialize() 与反序列化unserialize()

相关文章

热门栏目

php教程

asp.net教程

手机开发

css教程

网页制作

办公数码

jsp教程