背景:
1、手上有几个大的xml文件,基本都在300m至600m之间;
2、xml内容包括title,co-author,abstract,affiliation等;
3、用的是xmlreader进行解析;
遇到的问题:
如果解析所有内容,经常只能把xml文件的一部分解析出来,似乎是内存不够的迹象;
如果只把title或affiliation单独解析出来,就能全部解析xml文件;
附上代码:
set_time_limit(0);header(content-type: text/html;charset=utf-8);$num=0;$reader = new xmlreader();$reader->open(jacs.xml);while ($reader->read()) { if($reader->nodetype==xmlreader::element) { if ($reader->localname == pubmedarticle) { $num++; echo 'number:'.$num; while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == pubdate) { while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == year) { $reader->read(); echo 'publicationdate:'.$reader->value.' '; break; } } } while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == month) { $reader->read(); echo $reader->value.' '; break; } } } while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == day) { $reader->read(); echo $reader->value; break; } } } echo '
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == title) { $reader->read(); echo 'journalname:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == articletitle) { $reader->read(); echo 'articletitle:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == abstracttext) { $reader->read(); echo 'abstract:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == affiliation) { $reader->read(); echo 'affiliation:'.$reader->value.'
'; break; } } } } } } $reader->close();}
回复内容: 背景:
1、手上有几个大的xml文件,基本都在300m至600m之间;
2、xml内容包括title,co-author,abstract,affiliation等;
3、用的是xmlreader进行解析;
遇到的问题:
如果解析所有内容,经常只能把xml文件的一部分解析出来,似乎是内存不够的迹象;
如果只把title或affiliation单独解析出来,就能全部解析xml文件;
附上代码:
set_time_limit(0);header(content-type: text/html;charset=utf-8);$num=0;$reader = new xmlreader();$reader->open(jacs.xml);while ($reader->read()) { if($reader->nodetype==xmlreader::element) { if ($reader->localname == pubmedarticle) { $num++; echo 'number:'.$num; while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == pubdate) { while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == year) { $reader->read(); echo 'publicationdate:'.$reader->value.' '; break; } } } while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == month) { $reader->read(); echo $reader->value.' '; break; } } } while ($reader->read()){ if ($reader->nodetype == xmlreader::element) { if ($reader->localname == day) { $reader->read(); echo $reader->value; break; } } } echo '
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == title) { $reader->read(); echo 'journalname:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == articletitle) { $reader->read(); echo 'articletitle:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == abstracttext) { $reader->read(); echo 'abstract:'.$reader->value.'
'; break; } } } while ($reader->read()) { if ($reader->nodetype == xmlreader::element) { if ($reader->localname == affiliation) { $reader->read(); echo 'affiliation:'.$reader->value.'
'; break; } } } } } } $reader->close();}
可以参考一下 这个 php处理比较大的xml文件
为啥要装那么大 txt打开那么大也死机了 多分几个文件吧