mirror of
https://github.com/PuerkitoBio/goquery
synced 2026-04-22 23:37:04 +08:00
Merge branch 'wip-i255-single-match'
This commit is contained in:
@@ -2,6 +2,8 @@ package goquery
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/andybalholm/cascadia"
|
||||
)
|
||||
|
||||
func BenchmarkFind(b *testing.B) {
|
||||
@@ -800,3 +802,21 @@ func BenchmarkClosestNodes(b *testing.B) {
|
||||
b.Fatalf("want 2, got %d", n)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSingleMatcher(b *testing.B) {
|
||||
doc := Doc()
|
||||
multi := cascadia.MustCompile(`div`)
|
||||
single := SingleMatcher(multi)
|
||||
b.ResetTimer()
|
||||
|
||||
b.Run("multi", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = doc.FindMatcher(multi)
|
||||
}
|
||||
})
|
||||
b.Run("single", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = doc.FindMatcher(single)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -80,3 +80,31 @@ func ExampleNewDocumentFromReader_string() {
|
||||
|
||||
// Output: Header
|
||||
}
|
||||
|
||||
func ExampleSingle() {
|
||||
html := `
|
||||
<html>
|
||||
<body>
|
||||
<div>1</div>
|
||||
<div>2</div>
|
||||
<div>3</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// By default, the selector string selects all matching nodes
|
||||
multiSel := doc.Find("div")
|
||||
fmt.Println(multiSel.Text())
|
||||
|
||||
// Using goquery.Single, only the first match is selected
|
||||
singleSel := doc.FindMatcher(goquery.Single("div"))
|
||||
fmt.Println(singleSel.Text())
|
||||
|
||||
// Output:
|
||||
// 123
|
||||
// 1
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"net/url"
|
||||
|
||||
"github.com/andybalholm/cascadia"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -122,6 +121,45 @@ type Matcher interface {
|
||||
Filter([]*html.Node) []*html.Node
|
||||
}
|
||||
|
||||
// Single compiles a selector string to a Matcher that stops after the first
|
||||
// match is found.
|
||||
//
|
||||
// By default, Selection.Find and other functions that accept a selector string
|
||||
// to select nodes will use all matches corresponding to that selector. By
|
||||
// using the Matcher returned by Single, at most the first match will be
|
||||
// selected.
|
||||
//
|
||||
// For example, those two statements are semantically equivalent:
|
||||
//
|
||||
// sel1 := doc.Find("a").First()
|
||||
// sel2 := doc.FindMatcher(goquery.Single("a"))
|
||||
//
|
||||
// The one using Single is optimized to be potentially much faster on large
|
||||
// documents.
|
||||
//
|
||||
// Only the behaviour of the MatchAll method of the Matcher interface is
|
||||
// altered compared to standard Matchers. This means that the single-selection
|
||||
// property of the Matcher only applies for Selection methods where the Matcher
|
||||
// is used to select nodes, not to filter or check if a node matches the
|
||||
// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
|
||||
// FilterMatcher(Single("div")) will still result in a Selection with multiple
|
||||
// "div"s if there were many "div"s in the Selection to begin with).
|
||||
func Single(selector string) Matcher {
|
||||
return singleMatcher{compileMatcher(selector)}
|
||||
}
|
||||
|
||||
// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
|
||||
// after the first match is found.
|
||||
//
|
||||
// See the documentation of function Single for more details.
|
||||
func SingleMatcher(m Matcher) Matcher {
|
||||
if _, ok := m.(singleMatcher); ok {
|
||||
// m is already a singleMatcher
|
||||
return m
|
||||
}
|
||||
return singleMatcher{m}
|
||||
}
|
||||
|
||||
// compileMatcher compiles the selector string s and returns
|
||||
// the corresponding Matcher. If s is an invalid selector string,
|
||||
// it returns a Matcher that fails all matches.
|
||||
@@ -133,6 +171,30 @@ func compileMatcher(s string) Matcher {
|
||||
return cs
|
||||
}
|
||||
|
||||
type singleMatcher struct {
|
||||
Matcher
|
||||
}
|
||||
|
||||
func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
|
||||
// Optimized version - stops finding at the first match (cascadia-compiled
|
||||
// matchers all use this code path).
|
||||
if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
|
||||
node := mm.MatchFirst(n)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
return []*html.Node{node}
|
||||
}
|
||||
|
||||
// Fallback version, for e.g. test mocks that don't provide the MatchFirst
|
||||
// method.
|
||||
nodes := m.Matcher.MatchAll(n)
|
||||
if len(nodes) > 0 {
|
||||
return nodes[:1:1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// invalidMatcher is a Matcher that always fails to match.
|
||||
type invalidMatcher struct{}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/andybalholm/cascadia"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@@ -208,3 +209,47 @@ func TestIssue103(t *testing.T) {
|
||||
}
|
||||
t.Log(text)
|
||||
}
|
||||
|
||||
func TestSingle(t *testing.T) {
|
||||
data := `
|
||||
<html>
|
||||
<body>
|
||||
<div class="b">1</div>
|
||||
<div class="a">2</div>
|
||||
<div class="a">3</div>
|
||||
<p class="b">4</p>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
doc, err := NewDocumentFromReader(strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
text := doc.FindMatcher(Single("div")).Text()
|
||||
if text != "1" {
|
||||
t.Fatalf("want %q, got %q", "1", text)
|
||||
}
|
||||
|
||||
// Verify semantic equivalence
|
||||
sel1 := doc.Find("div").First()
|
||||
sel2 := doc.FindMatcher(Single("div"))
|
||||
if sel1.Text() != sel2.Text() {
|
||||
t.Fatalf("want sel1 to equal sel2")
|
||||
}
|
||||
|
||||
// Here, the Single has no effect as the selector is used to filter
|
||||
// from the existing selection, not to find nodes in the document.
|
||||
divs := doc.Find("div")
|
||||
text = divs.FilterMatcher(Single(".a")).Text()
|
||||
if text != "23" {
|
||||
t.Fatalf("want %q, got %q", "23", text)
|
||||
}
|
||||
|
||||
classA := cascadia.MustCompile(".a")
|
||||
classB := cascadia.MustCompile(".b")
|
||||
text = doc.FindMatcher(classB).AddMatcher(SingleMatcher(classA)).Text()
|
||||
if text != "142" {
|
||||
t.Fatalf("want %q, got %q", "142", text)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user