Skip to content

Commit

Permalink
syntax: avoid a slice bounds check on every input byte
Browse files Browse the repository at this point in the history
When our lexer peeks one byte or consumes one rune,
we do a check like `p.bsp < len(p.bs)` followed by `p.bs[p.bsp]`.
In practice this made a bounds check unnecessary, as we knew we had
enough input bytes in p.bs to grab the byte at p.bsp,
but since p.bsp was a signed integer, the compiler knew that in theory
p.bsp could have overflowed to a negative value and caused a panic.

In the past I tried swapping p.bsp from an int to an uint to solve
this issue, but the compiler wasn't yet clever enough to take note.
It seems like it now is:

            │     old     │                new                 │
            │   sec/op    │   sec/op     vs base               │
    Parse-8   14.86µ ± 1%   14.46µ ± 1%  -2.74% (p=0.000 n=10)
  • Loading branch information
mvdan committed Apr 27, 2024
1 parent 8598796 commit 68768e5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
20 changes: 10 additions & 10 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (p *Parser) rune() rune {
p.col += int64(p.w)
bquotes := 0
retry:
if p.bsp < len(p.bs) {
if p.bsp < uint(len(p.bs)) {
if b := p.bs[p.bsp]; b < utf8.RuneSelf {
p.bsp++
if b == '\x00' {
Expand All @@ -86,7 +86,7 @@ retry:
return escNewl
}
if p.openBquotes > 0 && bquotes < p.openBquotes &&
p.bsp < len(p.bs) && bquoteEscaped(p.bs[p.bsp]) {
p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) {
// We turn backquote command substitutions into $(),
// so we remove the extra backslashes needed by the backquotes.
// For good position information, we still include them in p.w.
Expand All @@ -110,9 +110,9 @@ retry:
var w int
p.r, w = utf8.DecodeRune(p.bs[p.bsp:])
if p.litBs != nil {
p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+w]...)
p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+uint(w)]...)
}
p.bsp += w
p.bsp += uint(w)
if p.r == utf8.RuneError && w == 1 {
p.posErr(p.nextPos(), "invalid UTF-8 encoding")
}
Expand All @@ -135,7 +135,7 @@ retry:
// beginning of the buffer.
func (p *Parser) fill() {
p.offs += int64(p.bsp)
left := len(p.bs) - p.bsp
left := len(p.bs) - int(p.bsp)
copy(p.readBuf[:left], p.readBuf[p.bsp:])
readAgain:
n, err := 0, p.readErr
Expand Down Expand Up @@ -254,7 +254,7 @@ skipSpace:
}
if p.stopAt != nil && (p.spaced || p.tok == illegalTok || p.stopToken()) {
w := utf8.RuneLen(r)
if bytes.HasPrefix(p.bs[p.bsp-w:], p.stopAt) {
if bytes.HasPrefix(p.bs[p.bsp-uint(w):], p.stopAt) {
p.r = utf8.RuneSelf
p.w = 1
p.tok = _EOF
Expand Down Expand Up @@ -388,7 +388,7 @@ func (p *Parser) extendedGlob() bool {
}

func (p *Parser) peekBytes(s string) bool {
peekEnd := p.bsp + len(s)
peekEnd := int(p.bsp) + len(s)
// TODO: This should loop for slow readers, e.g. those providing one byte at
// a time. Use a loop and test it with testing/iotest.OneByteReader.
if peekEnd > len(p.bs) {
Expand All @@ -398,10 +398,10 @@ func (p *Parser) peekBytes(s string) bool {
}

func (p *Parser) peekByte(b byte) bool {
if p.bsp == len(p.bs) {
if p.bsp == uint(len(p.bs)) {
p.fill()
}
return p.bsp < len(p.bs) && p.bs[p.bsp] == b
return p.bsp < uint(len(p.bs)) && p.bs[p.bsp] == b
}

func (p *Parser) regToken(r rune) token {
Expand Down Expand Up @@ -809,7 +809,7 @@ func (p *Parser) newLit(r rune) {
p.litBs[0] = byte(r)
case r > escNewl:
w := utf8.RuneLen(r)
p.litBs = append(p.litBuf[:0], p.bs[p.bsp-w:p.bsp]...)
p.litBs = append(p.litBuf[:0], p.bs[p.bsp-uint(w):p.bsp]...)
default:
// don't let r == utf8.RuneSelf go to the second case as RuneLen
// would return -1
Expand Down
4 changes: 2 additions & 2 deletions syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) {
type Parser struct {
src io.Reader
bs []byte // current chunk of read bytes
bsp int // pos within chunk for the rune after r
bsp uint // pos within chunk for the rune after r; uint helps eliminate bounds checks
r rune // next rune
w int // width of r

Expand Down Expand Up @@ -733,7 +733,7 @@ func (p *Parser) matched(lpos Pos, left, right token) Pos {
func (p *Parser) errPass(err error) {
if p.err == nil {
p.err = err
p.bsp = len(p.bs) + 1
p.bsp = uint(len(p.bs)) + 1
p.r = utf8.RuneSelf
p.w = 1
p.tok = _EOF
Expand Down

0 comments on commit 68768e5

Please sign in to comment.