Skip to content

Commit f2ae19b

Browse files
Refactor XML item parsing in Xml source to use XMLReader (#99)
* Refactor XML item parsing in Xml source to use XMLReader for improved performance and memory efficiency * Import `DOMDocument` and `XMLReader` --------- Co-authored-by: Duncan McClean <[email protected]>
1 parent d64c65c commit f2ae19b

File tree

1 file changed

+33
-21
lines changed

1 file changed

+33
-21
lines changed

src/Sources/Xml.php

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,53 @@
22

33
namespace Statamic\Importer\Sources;
44

5+
use DOMDocument;
56
use Illuminate\Support\LazyCollection;
7+
use XMLReader;
68

79
class Xml extends AbstractSource
810
{
911
public function getItems(string $path): LazyCollection
1012
{
11-
$xml = simplexml_load_file($path);
12-
13-
return LazyCollection::make(function () use ($xml) {
14-
foreach ($xml->channel->item as $item) {
15-
$array = [];
13+
return LazyCollection::make(function () use ($path) {
14+
$reader = new XMLReader;
15+
$reader->open($path);
16+
17+
while ($reader->read()) {
18+
if ($reader->nodeType === XMLReader::ELEMENT && $reader->name === 'item') {
19+
$node = $reader->expand();
20+
$array = [];
21+
22+
$doc = new DOMDocument;
23+
$node = $doc->importNode($node, true);
24+
$doc->appendChild($node);
25+
$item = simplexml_import_dom($doc);
26+
27+
foreach ($item as $key => $value) {
28+
$array[$key] = (string) $value;
29+
}
1630

17-
foreach ($item as $key => $value) {
18-
$array[$key] = (string) $value;
19-
}
31+
foreach ($item->getDocNamespaces(true) as $namespace => $uri) {
32+
// Access namespaced elements using the namespace prefix
33+
foreach ($item->children($uri) as $key => $value) {
34+
$array[$namespace.':'.$key] = (string) $value;
35+
}
2036

21-
foreach ($item->getDocNamespaces(true) as $namespace => $uri) {
22-
// Access namespaced elements using the namespace prefix
23-
foreach ($item->children($uri) as $key => $value) {
24-
$array[$namespace.':'.$key] = (string) $value;
37+
// If you want to access attributes in the namespaced elements
38+
foreach ($item->attributes($uri) as $key => $value) {
39+
$array[$namespace.':'.$key] = (string) $value;
40+
}
2541
}
2642

27-
// If you want to access attributes in the namespaced elements
28-
foreach ($item->attributes($uri) as $key => $value) {
29-
$array[$namespace.':'.$key] = (string) $value;
43+
if (isset($array['wp:post_type']) && $array['wp:post_type'] === 'attachment') {
44+
continue;
3045
}
31-
}
3246

33-
// WordPress: Filter out any `attachment` post types.
34-
if (isset($array['wp:post_type']) && $array['wp:post_type'] === 'attachment') {
35-
continue;
47+
yield $array;
3648
}
37-
38-
yield $array;
3949
}
50+
51+
$reader->close();
4052
});
4153
}
4254
}

0 commit comments

Comments
 (0)