Skip to content

Commit

Permalink
Refactor to improve postprocess performance
Browse files Browse the repository at this point in the history
Closes GH-23.
Closes GH-169.
Closes GH-171.

Reviewed-by: Titus Wormer <[email protected]>
  • Loading branch information
robsimmons committed Apr 15, 2024
1 parent 15a609e commit ef86838
Show file tree
Hide file tree
Showing 7 changed files with 388 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ packages/micromark-util-decode-string/index.js
packages/micromark-util-normalize-identifier/index.js
packages/micromark-util-sanitize-uri/index.js
packages/micromark-util-subtokenize/index.js
packages/micromark-util-subtokenize/splice-buffer.js
test/fixtures/
test/fuzz-bundle.cjs
!packages/micromark-util-types/index.d.ts
36 changes: 23 additions & 13 deletions packages/micromark-util-subtokenize/dev/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,26 @@
* @typedef {import('micromark-util-types').Token} Token
*/

/**
* @template {{} | null} T
* @typedef {import('micromark-util-types').SpliceBuffer<T>} SpliceBuffer<T>
*/

import {splice} from 'micromark-util-chunked'
import {codes, types} from 'micromark-util-symbol'
import {ok as assert} from 'devlop'
import {spliceBuffer} from './splice-buffer.js'

/**
* Tokenize subcontent.
*
* @param {Array<Event>} events
* @param {Array<Event>} eventsArray
* List of events.
* @returns {boolean}
* Whether subtokens were found.
*/
// eslint-disable-next-line complexity
export function subtokenize(events) {
export function subtokenize(eventsArray) {
/** @type {Record<string, number>} */
const jumps = {}
let index = -1
Expand All @@ -35,20 +41,22 @@ export function subtokenize(events) {
let subevents
/** @type {boolean | undefined} */
let more
/** @type {SpliceBuffer<Event>} */
const events = spliceBuffer(eventsArray)

while (++index < events.length) {
while (index in jumps) {
index = jumps[index]
}

event = events[index]
event = events.get(index)

// Add a hook for the GFM tasklist extension, which needs to know if text
// is in the first content of a list item.
if (
index &&
event[1].type === types.chunkFlow &&
events[index - 1][1].type === types.listItemPrefix
events.get(index - 1)[1].type === types.listItemPrefix
) {
assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens')
subevents = event[1]._tokenizer.events
Expand Down Expand Up @@ -92,15 +100,15 @@ export function subtokenize(events) {
lineIndex = undefined

while (otherIndex--) {
otherEvent = events[otherIndex]
otherEvent = events.get(otherIndex)

if (
otherEvent[1].type === types.lineEnding ||
otherEvent[1].type === types.lineEndingBlank
) {
if (otherEvent[0] === 'enter') {
if (lineIndex) {
events[lineIndex][1].type = types.lineEndingBlank
events.get(lineIndex)[1].type = types.lineEndingBlank
}

otherEvent[1].type = types.lineEnding
Expand All @@ -113,29 +121,31 @@ export function subtokenize(events) {

if (lineIndex) {
// Fix position.
event[1].end = Object.assign({}, events[lineIndex][1].start)
event[1].end = Object.assign({}, events.get(lineIndex)[1].start)

// Switch container exit w/ line endings.
parameters = events.slice(lineIndex, index)
parameters.unshift(event)
splice(events, lineIndex, index - lineIndex + 1, parameters)
events.splice(lineIndex, index - lineIndex + 1, parameters)
}
}
}

// The changes to the `events` buffer must be copied back into the eventsArray
splice(eventsArray, 0, Number.POSITIVE_INFINITY, events.slice(0))
return !more
}

/**
* Tokenize embedded tokens.
*
* @param {Array<Event>} events
* @param {SpliceBuffer<Event>} events
* @param {number} eventIndex
* @returns {Record<string, number>}
*/
function subcontent(events, eventIndex) {
const token = events[eventIndex][1]
const context = events[eventIndex][2]
const token = events.get(eventIndex)[1]
const context = events.get(eventIndex)[2]
let startPosition = eventIndex - 1
/** @type {Array<number>} */
const startPositions = []
Expand All @@ -162,7 +172,7 @@ function subcontent(events, eventIndex) {
// subtokenizer.
while (current) {
// Find the position of the event for this token.
while (events[++startPosition][1] !== current) {
while (events.get(++startPosition)[1] !== current) {
// Empty.
}

Expand Down Expand Up @@ -247,7 +257,7 @@ function subcontent(events, eventIndex) {
const start = startPositions.pop()
assert(start !== undefined, 'expected a start position when splicing')
jumps.push([start, start + slice.length - 1])
splice(events, start, 2, slice)
events.splice(start, 2, slice)
}

jumps.reverse()
Expand Down
186 changes: 186 additions & 0 deletions packages/micromark-util-subtokenize/dev/splice-buffer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import {constants} from 'micromark-util-symbol'

/**
*
* @template {{} | null} T
* @param {T[]=} initial
* @returns {import("micromark-util-types").SpliceBuffer<T>}
*/
export function spliceBuffer(initial) {
/** @type {T[]} */
const left = initial ? [...initial] : []
/** @type {T[]} */
const right = []

/**
* Avoid stack overflow by pushing items onto the stack in segments
*
* @param {T[]} array
* @param {T[]} items
*/
function chunkedPush(array, items) {
/** @type number */
let chunkStart = 0

if (items.length < constants.v8MaxSafeChunkSize) {
array.push(...items)
} else {
while (chunkStart < items.length) {
array.push(
...items.slice(chunkStart, chunkStart + constants.v8MaxSafeChunkSize)
)
chunkStart += constants.v8MaxSafeChunkSize
}
}
}

/**
* Move the cursor to a specific position in the array. Requires
* time proportional to the distance moved.
*
* If n < 0, the cursor will end up at the beginning.
* If n > length, the cursor will end up at the end.
*
* @param {number} n
*/
function setCursor(n) {
if (
n === left.length ||
(n > left.length && right.length === 0) ||
(n < 0 && left.length === 0)
)
return
if (n < left.length) {
// Move cursor to the left
const removed = left.splice(n, Number.POSITIVE_INFINITY)
chunkedPush(right, removed.reverse())
} else {
// Move cursor to the right
const removed = right.splice(
left.length + right.length - n,
Number.POSITIVE_INFINITY
)
chunkedPush(left, removed.reverse())
}
}

/**
* Array access for the splice buffer (constant time)
*
* @param {number} index
* @returns Event
*/
function get(index) {
if (index < 0 || index >= left.length + right.length)
throw new RangeError(
`index ${index} in a buffer of size ${left.length + right.length}`
)
if (index < left.length) return left[index]
return right[right.length - index + left.length - 1]
}

/**
*
* @param {number} start
* @param {number=} deleteCount
* @param {T[]=} items
*/
function splice(start, deleteCount, items) {
/** @type number */
const count = deleteCount || 0

setCursor(Math.trunc(start))
const removed = right.splice(right.length - count, Number.POSITIVE_INFINITY)
if (items) chunkedPush(left, items)
return removed.reverse()
}

/**
* @returns T | undefined
*/
function pop() {
setCursor(Number.POSITIVE_INFINITY)
return left.pop()
}

/**
* @param {T} item
*/
function push(item) {
setCursor(Number.POSITIVE_INFINITY)
left.push(item)
}

/**
* @param {T[]} items
*/
function pushMany(items) {
setCursor(Number.POSITIVE_INFINITY)
chunkedPush(left, items)
}

/**
* @param {T} item
*/
function unshift(item) {
setCursor(0)
right.push(item)
}

/**
* @param {T[]} items
*/
function unshiftMany(items) {
setCursor(0)
chunkedPush(right, items.reverse())
}

/**
* @returns T | undefined
*/
function shift() {
setCursor(0)
return right.pop()
}

/**
* @param {number} start
* @param {number=} end
*/
function slice(start, end) {
/** @type number */
const stop = end === undefined ? Number.POSITIVE_INFINITY : end

if (stop < left.length) {
return left.slice(start, end)
}

if (start > left.length) {
return right
.slice(
right.length - stop + left.length,
right.length - start + left.length
)
.reverse()
}

return left
.slice(start)
.concat(right.slice(right.length - stop + left.length).reverse())
}

return {
splice,
push,
pushMany,
pop,
unshift,
unshiftMany,
shift,
slice,
get,
get length() {
return left.length + right.length
}
}
}
15 changes: 12 additions & 3 deletions packages/micromark-util-subtokenize/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,20 @@
"dev/",
"index.d.ts.map",
"index.d.ts",
"index.js"
"index.js",
"splice-buffer.d.ts.map",
"splice-buffer.d.ts",
"splice-buffer.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
".": {
"development": "./dev/index.js",
"default": "./index.js"
},
"./splice-buffer": {
"development": "./dev/splice-buffer.js",
"default": "./splice-buffer.js"
}
},
"dependencies": {
"devlop": "^1.0.0",
Expand Down

0 comments on commit ef86838

Please sign in to comment.