Skip to content

Commit

Permalink
Introduce the nested() XML Reader matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
veewee committed Nov 1, 2023
1 parent 13eec24 commit 8e5631f
Show file tree
Hide file tree
Showing 8 changed files with 474 additions and 12 deletions.
67 changes: 65 additions & 2 deletions docs/reader.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,66 @@ use \VeeWee\Xml\Reader\Matcher;
Matcher\namespaced_element('https://some', 'item');
```

#### nested

Provide nested matchers that represents parts of an XML tree.
Every provided matcher acts as a breakpoint for the next matcher, making it composable with the exact XML tree [sequence](#sequence) matcher as well.

Given:

```xml
<root>
<users>
<user locale="nl">Jos</user>
<user>Bos</user>
<user>Mos</user>
</users>
</root>
```

This matcher will grab the `user` element with `locale="nl"`

```php
use \VeeWee\Xml\Reader\Matcher;

Matcher\nested(
// Breakpoint 1: <root />
Matcher\document_element(),
// Breakpoint 2: <user locale="nl">Jos</user>
// Searches for all elements that matches `<user />` and attribute `locale="nl"` in the `<root />` document.
// Note that you can skip matching on `<users />` here : it's not an exact matcher
Matcher\all(
Matcher\element_name('user'),
Matcher\attribute_value('locale', 'nl')
)
);
```

Since every match will create a breakpoint in the `NodeSequence`, you can combine it with the sequence matcher:

```php
use \VeeWee\Xml\Reader\Matcher;

Matcher\nested(
// Breakpoint 1: <root />
Matcher\document_element(),
// Breakpoint 2: <user />
// The nested matcher will provide the NodeSequence starting from the element after previous match.
// The sequence will basically receive: 'users > user'
Matcher\sequence(
// Level 0: The element inside <root /> at level 0 must exactly match <users />
Matcher\element_name('users'),
// Level 1: The element inside <root /> at level 1 must exactly match <user />
Matcher\element_name('user'),
),
// Breakpoint 3: <email />
// After matching a sequence, you can still continue matching deeper or adding even more sequences:
Matcher\element_name('email')
);
```

If you want every level of the XML to match exactly, you might use the [sequence](#sequence) matcher instead.

#### not

Inverses a matcher's result.
Expand All @@ -318,8 +378,9 @@ Matcher\not(

#### sequence

Provide a sequence of matchers that represents the XML tree.
Only the items that are described by the sequence will match.
Provide a sequence of matchers that represents the exact XML tree.
Every provided matcher step must result in an exact match with the matcher on the same index.
Only the items that are described by the sequence will match:

Given:

Expand Down Expand Up @@ -352,6 +413,8 @@ Matcher\sequence(
);
```

If you don't want every level of XML to match exactly, you might use the [nested](#nested) matcher instead.


#### Writing your own matcher

Expand Down
63 changes: 63 additions & 0 deletions src/Xml/Reader/Matcher/nested.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

declare(strict_types=1);

namespace VeeWee\Xml\Reader\Matcher;

use Closure;
use VeeWee\Xml\Reader\Node\NodeSequence;
use function array_shift;

/**
* A nested matcher can be used to match parts of an XML path in order to detect node you are interested in.
* Every match will create some kind of breakpoint of which the next matcher will start.
* This makes it possible to combine it with sequences.
* Between every matcher, there might be multiple nodes in between.
*
* ```php
* nested(
* document_element(),
* sequence(element_name('users'), element_name('user')),
* element_name('email')
* );
* ```
*
* @param non-empty-list<callable(NodeSequence): bool> $matchers
*
* @return \Closure(NodeSequence): bool
*/
function nested(callable ... $matchers): Closure
{
return static function (NodeSequence $sequence) use ($matchers) : bool {
$lastMatchedAtIndex = -1;
$currentMatcher = array_shift($matchers);
if (!$currentMatcher) {
return false;
}

$stepCount = $sequence->count();
foreach ($sequence->replay() as $index => $step) {
// Slice the step NodeSequence based on previous "match" breakpoint
// and see if it matches on current matcher:
$step = $step->slice($lastMatchedAtIndex + 1);
if (!$currentMatcher($step)) {
continue;
}

// If there was a match, select the next matcher and store the last matched NodeSequence index.
$currentMatcher = array_shift($matchers);
$lastMatchedAtIndex = $index;

// If the list of matchers is empty
// The function will return true if the element is the last step in the complete sequence.
// Otherwise, the nested match has an even deeper element on which we don't wish to match.
if (!$currentMatcher) {
$isLastStep = $index === $stepCount - 1;

return $isLastStep;
}
}

return false;
};
}
2 changes: 1 addition & 1 deletion src/Xml/Reader/Matcher/not.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use VeeWee\Xml\Reader\Node\NodeSequence;

/**
* @param callable(NodeSequence) $matcher
* @param callable(NodeSequence): bool $matcher
*
* @return \Closure(NodeSequence): bool
*/
Expand Down
9 changes: 3 additions & 6 deletions src/Xml/Reader/Matcher/sequence.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,12 @@
function sequence(callable ... $matcherSequence): Closure
{
return static function (NodeSequence $sequence) use ($matcherSequence) : bool {
$nodeSequence = $sequence->sequence();
if (count($matcherSequence) !== count($nodeSequence)) {
if (count($matcherSequence) !== $sequence->count()) {
return false;
}

$currentSequence = new NodeSequence();
foreach ($nodeSequence as $i => $node) {
$currentSequence = $currentSequence->append($node);
$matcher = $matcherSequence[$i];
foreach ($sequence->replay() as $index => $currentSequence) {
$matcher = $matcherSequence[$index];
if (!$matcher($currentSequence)) {
return false;
}
Expand Down
33 changes: 32 additions & 1 deletion src/Xml/Reader/Node/NodeSequence.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

namespace VeeWee\Xml\Reader\Node;

use Countable;
use Generator;
use InvalidArgumentException;
use Webmozart\Assert\Assert;
use function Psl\Vec\slice;

final class NodeSequence
final class NodeSequence implements Countable
{
/**
* @var list<ElementNode>
Expand Down Expand Up @@ -64,6 +67,34 @@ public function sequence(): array
return $this->elementNodes;
}

public function count(): int
{
return \count($this->elementNodes);
}

/**
* @param non-negative-int $start
* @param non-negative-int|null $length
*/
public function slice(int $start, ?int $length = null): self
{
return new self(...slice($this->elementNodes, $start, $length));
}

/**
* Replays every step in the sequence
*
* @return Generator<non-negative-int, NodeSequence, mixed, void>
*/
public function replay(): Generator
{
$step = new self();
foreach ($this->elementNodes as $index => $node) {
$step = $step->append($node);
yield $index => $step;
}
}

/**
* @throws InvalidArgumentException
*/
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
require_once __DIR__.'/Xml/Reader/Matcher/namespaced_attribute.php';
require_once __DIR__.'/Xml/Reader/Matcher/namespaced_attribute_value.php';
require_once __DIR__.'/Xml/Reader/Matcher/namespaced_element.php';
require_once __DIR__.'/Xml/Reader/Matcher/nested.php';
require_once __DIR__.'/Xml/Reader/Matcher/node_attribute.php';
require_once __DIR__.'/Xml/Reader/Matcher/node_name.php';
require_once __DIR__.'/Xml/Reader/Matcher/not.php';
Expand Down
Loading

0 comments on commit 8e5631f

Please sign in to comment.