Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(scripts): clean up bash scripts #3231

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ log*.html
.vscode
.cache

profile*
fuzz-results

/tree-sitter.pc
test/fuzz/out
test/fixtures/grammars/*
!test/fixtures/grammars/.gitkeep

package-lock.json
node_modules

docs/assets/js/tree-sitter.js

/tree-sitter.pc

/target
*.rs.bk
*.a
Expand Down
12 changes: 8 additions & 4 deletions script/benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

function usage {
cat <<-EOF
cat <<EOF
USAGE

$0 [-h] [-l language-name] [-e example-file-name] [-r repetition-count]
Expand Down Expand Up @@ -43,16 +43,20 @@ while getopts "hgl:e:r:" option; do
r)
export TREE_SITTER_BENCHMARK_REPETITION_COUNT=${OPTARG}
;;
*)
usage
exit 1
;;
esac
done

if [[ "${mode}" == "debug" ]]; then
if [[ $mode == debug ]]; then
test_binary=$(
cargo bench benchmark -p tree-sitter-cli --no-run --message-format=json 2> /dev/null |\
cargo bench benchmark -p tree-sitter-cli --no-run --message-format=json 2> /dev/null |
jq -rs 'map(select(.target.name == "benchmark" and .executable))[0].executable'
)
env | grep TREE_SITTER
echo $test_binary
echo "$test_binary"
else
exec cargo bench benchmark -p tree-sitter-cli
fi
63 changes: 31 additions & 32 deletions script/build-fuzzers
Original file line number Diff line number Diff line change
@@ -1,70 +1,69 @@
#!/bin/bash
#!/usr/bin/env bash

# shellcheck disable=SC2086

set -e

if [[ "$(uname -s)" != Linux ]]; then
echo "Fuzzing is only supported on Linux"
if [[ $(uname -s) != Linux ]]; then
printf 'Fuzzing is only supported on Linux\n' >&2
exit 1
fi

CC=${CC:-clang}
CXX=${CXX:-clang++}

default_fuzz_flags="-fsanitize=fuzzer,address,undefined"
default_fuzz_flags=-fsanitize=fuzzer,address,undefined

CFLAGS=${CFLAGS:-"$default_fuzz_flags"}
CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"}
export CFLAGS="$default_fuzz_flags $CFLAGS"
export CXXFLAGS="$default_fuzz_flags $CXXFLAGS"

export CFLAGS
make CC="$CC" CXX="$CXX"
make CC="$CC" CXX="$CXX" libtree-sitter.a

if [ -z "$@" ]; then
languages=$(ls test/fixtures/grammars)
if [[ -z $* ]]; then
mapfile -t languages < <(ls test/fixtures/grammars)
else
languages="$@"
languages=("$@")
fi

mkdir -p test/fuzz/out

for lang in ${languages[@]}; do
# skip typescript
if [[ $lang == "typescript" ]]; then
continue
for lang in "${languages[@]}"; do
# skip typescript & php
if [[ $lang == typescript || $lang == php ]]; then
continue
fi
echo "Building $lang fuzzer..."
printf 'Building %s fuzzer...\n' "$lang"
lang_dir="test/fixtures/grammars/$lang"
lang_grammar="${lang_dir}/src/grammar.json"

# The following assumes each language is implemented as src/parser.c plus an
# optional scanner in src/scanner.{c,cc}
# optional scanner in src/scanner.c
objects=()

lang_scanner="${lang_dir}/src/scanner"
if [ -e "${lang_scanner}.cc" ]; then
$CXX $CXXFLAGS -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.cc" -o "${lang_scanner}.o"
objects+=("${lang_scanner}.o")
elif [ -e "${lang_scanner}.c" ]; then
$CC $CFLAGS -std=c11 -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o"
if [[ -f "${lang_scanner}.c" ]]; then
$CC $CFLAGS -std=c11 -g -O1 -I "${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o"
objects+=("${lang_scanner}.o")
fi


# Compiling with -O0 speeds up the build dramatically
$CC $CFLAGS -g -O0 "-I${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
$CC $CFLAGS -g -O0 -I "${lang_dir}/src" "${lang_dir}/src/parser.c" -c -o "${lang_dir}/src/parser.o"
objects+=("${lang_dir}/src/parser.o")

highlights_filename="${lang_dir}/queries/highlights.scm"
if [ -e "${highlights_filename}" ]; then
if [[ -f "${highlights_filename}" ]]; then
ts_lang_query_filename="${lang}.scm"
cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}"
else
ts_lang_query_filename=""
fi

# FIXME: We should extract the grammar name from grammar.js. Use the name of
# the directory instead. Also, the grammar name needs to be a valid C
# identifier so replace any '-' characters
ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)"
$CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
"test/fuzz/fuzzer.cc" "${objects[@]}" \
ts_lang="tree_sitter_$(jq -r .name "$lang_grammar")"
$CXX $CXXFLAGS -std=c++11 -Ilib/include \
-D TS_LANG="$ts_lang" \
-D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \
test/fuzz/fuzzer.cc \
"${objects[@]}" \
libtree-sitter.a \
-o "test/fuzz/out/${lang}_fuzzer"

Expand All @@ -73,5 +72,5 @@ for lang in ${languages[@]}; do
| if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end
| select(test("\\S") and length == utf8bytelength)
] | unique | .[]
' | sort
' "$lang_grammar" | sort > "test/fuzz/out/${lang}.dict"
done
105 changes: 52 additions & 53 deletions script/build-wasm
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,19 @@ EOF

set -e

web_dir=lib/binding_web
src_dir=lib/src
emscripten_flags="-O3"
WEB_DIR=lib/binding_web
SRC_DIR=lib/src
EMSCRIPTEN_VERSION=$(< cli/loader/emscripten-version)

minify_js=1
force_docker=0
emscripen_version=$(cat "$(dirname "$0")"/../cli/loader/emscripten-version)
emscripten_flags=(-O3)

while [[ $# > 0 ]]; do
while (($# > 0)); do
case "$1" in
--debug)
minify_js=0
emscripten_flags="-s ASSERTIONS=1 -s SAFE_HEAP=1 -O0"
emscripten_flags=(-s ASSERTIONS=1 -s SAFE_HEAP=1 -O0)
;;

--help)
Expand All @@ -53,59 +54,60 @@ while [[ $# > 0 ]]; do
;;

-v|--verbose)
emscripten_flags="-s VERBOSE=1 -v $emscripten_flags"
emscripten_flags+=(-s VERBOSE=1 -v)
;;

*)
usage
echo "Unrecognized argument '$1'"
printf "Unrecognized argument '%s'\n" "$1" >&2
exit 1
;;
esac
shift
done

emcc=""
docker=""
if which emcc > /dev/null && [[ "$force_docker" == "0" ]]; then
emcc=
docker=
if [[ $force_docker == 0 ]] && command -v emcc > /dev/null; then
emcc=emcc
elif which docker > /dev/null; then
elif command -v docker > /dev/null; then
# detect which one to use
docker=docker
elif which podman > /dev/null; then
elif command -v podman > /dev/null; then
docker=podman
fi

if [ -z "$emcc" ] && [ -n "$docker" ]; then
export PODMAN_USERNS=keep-id
emcc="$docker run \
--rm \
-v $(pwd):/src:Z \
-u $(id -u) \
emscripten/emsdk:$emscripen_version \
if [[ -z $emcc ]] && [[ -n $docker ]]; then
if [[ $docker == podman ]]; then
export PODMAN_USERNS=keep-id
fi
emcc="$docker run \
--rm \
-v $PWD:/src:Z \
-u $UID \
emscripten/emsdk:$EMSCRIPTEN_VERSION \
emcc"
fi

if [ -z "$emcc" ]; then
if [[ "$force_docker" == "1" ]]; then
echo 'You must have `docker` or `podman` on your PATH to run this script with --docker'
if [[ -z $emcc ]]; then
if [[ $force_docker == 1 ]]; then
# shellcheck disable=SC2016
printf 'You must have `docker` or `podman` in your PATH to run this script with --docker\n' >&2
else
echo 'You must have either `docker`, `podman`, or `emcc` on your PATH to run this script'
# shellcheck disable=SC2016
printf 'You must have either `docker`, `podman`, or `emcc` in your PATH to run this script\n' >&2
fi
exit 1
fi

mkdir -p target/scratch

runtime_methods='stringToUTF16','AsciiToString'
runtime_methods=stringToUTF16,AsciiToString

# Remove quotes, add leading underscores, remove newlines, remove trailing comma.
EXPORTED_FUNCTIONS=$( \
cat ${src_dir}/wasm/stdlib-symbols.txt ${web_dir}/exports.txt | \
sed -e 's/"//g' | \
sed -e 's/^/_/g' | \
tr -d '\n"' | \
sed -e 's/,$//' \
exported_functions=$(
cat ${SRC_DIR}/wasm/stdlib-symbols.txt ${WEB_DIR}/exports.txt |
sed -e 's/"//g;s/^/_/g' | tr -d '\n' | sed -e 's/,$//'
)

# Use emscripten to generate `tree-sitter.js` and `tree-sitter.wasm`
Expand All @@ -118,40 +120,37 @@ $emcc \
-s FILESYSTEM=0 \
-s NODEJS_CATCH_EXIT=0 \
-s NODEJS_CATCH_REJECTION=0 \
-s EXPORTED_FUNCTIONS=${EXPORTED_FUNCTIONS} \
-s EXPORTED_FUNCTIONS="${exported_functions}" \
-s EXPORTED_RUNTIME_METHODS=$runtime_methods \
$emscripten_flags \
"${emscripten_flags[@]}" \
-fno-exceptions \
-std=c11 \
-D 'fprintf(...)=' \
-D NDEBUG= \
-I ${src_dir} \
-I ${SRC_DIR} \
-I lib/include \
--js-library ${web_dir}/imports.js \
--pre-js ${web_dir}/prefix.js \
--post-js ${web_dir}/binding.js \
--post-js ${web_dir}/suffix.js \
--js-library ${WEB_DIR}/imports.js \
--pre-js ${WEB_DIR}/prefix.js \
--post-js ${WEB_DIR}/binding.js \
--post-js ${WEB_DIR}/suffix.js \
lib/src/lib.c \
${web_dir}/binding.c \
${WEB_DIR}/binding.c \
-o target/scratch/tree-sitter.js

# Use terser to write a minified version of `tree-sitter.js` into
# the `lib/binding_web` directory.
if [[ "$minify_js" == "1" ]]; then
if [ ! -d ${web_dir}/node_modules/terser ]; then
(
cd ${web_dir}
npm install
)
if [[ $minify_js == 1 ]]; then
if [[ ! -d ${WEB_DIR}/node_modules/terser ]]; then
(cd ${WEB_DIR} && npm install)
fi
${web_dir}/node_modules/.bin/terser \
--compress \
--mangle \
--keep-classnames \
-- target/scratch/tree-sitter.js \
> ${web_dir}/tree-sitter.js
${WEB_DIR}/node_modules/.bin/terser \
--compress \
--mangle \
--keep-classnames \
-- target/scratch/tree-sitter.js \
> ${WEB_DIR}/tree-sitter.js
else
cp target/scratch/tree-sitter.js ${web_dir}/tree-sitter.js
cp target/scratch/tree-sitter.js ${WEB_DIR}/tree-sitter.js
fi

mv target/scratch/tree-sitter.wasm ${web_dir}/tree-sitter.wasm
mv target/scratch/tree-sitter.wasm ${WEB_DIR}/tree-sitter.wasm
18 changes: 6 additions & 12 deletions script/build-wasm-stdlib
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
#!/bin/bash
#!/usr/bin/env bash

set -e

# Remove quotes and commas
EXPORTED_FUNCTIONS=$( \
cat lib/src/wasm/stdlib-symbols.txt | \
tr -d ',"' \
)

EXPORT_FLAGS=""
for function in ${EXPORTED_FUNCTIONS}; do
EXPORT_FLAGS+=" -Wl,--export=${function}"
done
declare -a EXPORT_FLAGS
while read -r -d, function; do
EXPORT_FLAGS+=("-Wl,--export=${function:1:-1}")
done < lib/src/wasm/stdlib-symbols.txt

target/wasi-sdk-21.0/bin/clang-17 \
-o stdlib.wasm \
Expand All @@ -27,7 +21,7 @@ target/wasi-sdk-21.0/bin/clang-17 \
-Wl,--export=reset_heap \
-Wl,--export=__wasm_call_ctors \
-Wl,--export=__stack_pointer \
${EXPORT_FLAGS} \
"${EXPORT_FLAGS[@]}" \
lib/src/wasm/stdlib.c

xxd -C -i stdlib.wasm > lib/src/wasm/wasm-stdlib.h
Expand Down
20 changes: 6 additions & 14 deletions script/check-mallocs
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
#!/usr/bin/env bash

src_dir="lib/src"
src_dir=lib/src
allocation_functions=(malloc calloc realloc free)

allocation_functions=(
malloc
calloc
realloc
free
)

for function in ${allocation_functions[@]}; do
usages=$(grep --line-number -E "\b${function}\(" -r "${src_dir}" --exclude alloc.h )

if [[ ! -z $usages ]]; then
echo "The ${function} function should not be called directly, but is called here:"
echo "$usages"
for function in "${allocation_functions[@]}"; do
usages=$(grep -n -E "\b${function}\(" -r $src_dir --exclude alloc.c --exclude stdlib.c)
if [[ -n $usages ]]; then
printf 'The %s function should not be called directly, but is called here:\n%s\n' "$function" "$usages" >&2
exit 1
fi
done