Merge branch 'wip-i255-single-match'

This commit is contained in:
Martin Angers
2021-06-14 09:09:43 -04:00
4 changed files with 156 additions and 1 deletions
+20
View File
@@ -2,6 +2,8 @@ package goquery
import (
"testing"
"github.com/andybalholm/cascadia"
)
func BenchmarkFind(b *testing.B) {
@@ -800,3 +802,21 @@ func BenchmarkClosestNodes(b *testing.B) {
b.Fatalf("want 2, got %d", n)
}
}
func BenchmarkSingleMatcher(b *testing.B) {
doc := Doc()
multi := cascadia.MustCompile(`div`)
single := SingleMatcher(multi)
b.ResetTimer()
b.Run("multi", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = doc.FindMatcher(multi)
}
})
b.Run("single", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = doc.FindMatcher(single)
}
})
}
+28
View File
@@ -80,3 +80,31 @@ func ExampleNewDocumentFromReader_string() {
// Output: Header
}
func ExampleSingle() {
html := `
<html>
<body>
<div>1</div>
<div>2</div>
<div>3</div>
</body>
</html>
`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
log.Fatal(err)
}
// By default, the selector string selects all matching nodes
multiSel := doc.Find("div")
fmt.Println(multiSel.Text())
// Using goquery.Single, only the first match is selected
singleSel := doc.FindMatcher(goquery.Single("div"))
fmt.Println(singleSel.Text())
// Output:
// 123
// 1
}
+63 -1
View File
@@ -7,7 +7,6 @@ import (
"net/url"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
@@ -122,6 +121,45 @@ type Matcher interface {
Filter([]*html.Node) []*html.Node
}
// Single compiles a selector string to a Matcher that stops after the first
// match is found.
//
// By default, Selection.Find and other functions that accept a selector string
// to select nodes will use all matches corresponding to that selector. By
// using the Matcher returned by Single, at most the first match will be
// selected.
//
// For example, those two statements are semantically equivalent:
//
// sel1 := doc.Find("a").First()
// sel2 := doc.FindMatcher(goquery.Single("a"))
//
// The one using Single is optimized to be potentially much faster on large
// documents.
//
// Only the behaviour of the MatchAll method of the Matcher interface is
// altered compared to standard Matchers. This means that the single-selection
// property of the Matcher only applies for Selection methods where the Matcher
// is used to select nodes, not to filter or check if a node matches the
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
// FilterMatcher(Single("div")) will still result in a Selection with multiple
// "div"s if there were many "div"s in the Selection to begin with).
func Single(selector string) Matcher {
return singleMatcher{compileMatcher(selector)}
}
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
// after the first match is found.
//
// See the documentation of function Single for more details.
func SingleMatcher(m Matcher) Matcher {
if _, ok := m.(singleMatcher); ok {
// m is already a singleMatcher
return m
}
return singleMatcher{m}
}
// compileMatcher compiles the selector string s and returns
// the corresponding Matcher. If s is an invalid selector string,
// it returns a Matcher that fails all matches.
@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
return cs
}
type singleMatcher struct {
Matcher
}
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
// Optimized version - stops finding at the first match (cascadia-compiled
// matchers all use this code path).
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
node := mm.MatchFirst(n)
if node == nil {
return nil
}
return []*html.Node{node}
}
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
// method.
nodes := m.Matcher.MatchAll(n)
if len(nodes) > 0 {
return nodes[:1:1]
}
return nil
}
// invalidMatcher is a Matcher that always fails to match.
type invalidMatcher struct{}
+45
View File
@@ -7,6 +7,7 @@ import (
"strings"
"testing"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
@@ -208,3 +209,47 @@ func TestIssue103(t *testing.T) {
}
t.Log(text)
}
func TestSingle(t *testing.T) {
data := `
<html>
<body>
<div class="b">1</div>
<div class="a">2</div>
<div class="a">3</div>
<p class="b">4</p>
</body>
</html>
`
doc, err := NewDocumentFromReader(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
text := doc.FindMatcher(Single("div")).Text()
if text != "1" {
t.Fatalf("want %q, got %q", "1", text)
}
// Verify semantic equivalence
sel1 := doc.Find("div").First()
sel2 := doc.FindMatcher(Single("div"))
if sel1.Text() != sel2.Text() {
t.Fatalf("want sel1 to equal sel2")
}
// Here, the Single has no effect as the selector is used to filter
// from the existing selection, not to find nodes in the document.
divs := doc.Find("div")
text = divs.FilterMatcher(Single(".a")).Text()
if text != "23" {
t.Fatalf("want %q, got %q", "23", text)
}
classA := cascadia.MustCompile(".a")
classB := cascadia.MustCompile(".b")
text = doc.FindMatcher(classB).AddMatcher(SingleMatcher(classA)).Text()
if text != "142" {
t.Fatalf("want %q, got %q", "142", text)
}
}