Skip to content

Commit

Permalink
Fix behavior in non-CJK context
Browse files Browse the repository at this point in the history
  • Loading branch information
stephanoskomnenos committed Jul 18, 2023
1 parent 1e4b135 commit d0259c3
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 97 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"curly": "warn",
"eqeqeq": "warn",
"indent": "off",
"max-len": "warn",
"max-len": ["warn", 120],
"no-throw-literal": "warn",
"no-trailing-spaces": "warn",
"prefer-const": "warn",
Expand Down
11 changes: 2 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"name": "jieba",
"displayName": "Jieba",
"description": "Jieba Chinese word segmenter for VSCode",
"version": "0.1.6",
"version": "0.1.7",
"engines": {
"vscode": "^1.67.0"
"vscode": "^1.78.1"
},
"publisher": "StephanosKomnenos",
"homepage": "https://github.com/stephanoskomnenos/vscode-jieba",
Expand All @@ -21,13 +21,6 @@
"categories": [
"Other"
],
"activationEvents": [
"onCommand:jieba.forwardWord",
"onCommand:jieba.backwardWord",
"onCommand:jieba.killWord",
"onCommand:jieba.backwardKillWord",
"onCommand:jieba.selectWord"
],
"main": "./out/extension.js",
"contributes": {
"keybindings": [
Expand Down
160 changes: 110 additions & 50 deletions src/command.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import * as vscode from "vscode";
import { parseAllSelections } from "./parse";
import { parseSentence } from "./parse";

export function forwardWord() {
const editor = vscode.window.activeTextEditor;
if (editor === undefined) {
return;
}
const { newSelections: newSelections } = searchForward();
const { newSelections } = searchForward();
editor.selections = newSelections;
}

Expand All @@ -15,7 +15,7 @@ export function backwardWord() {
if (editor === undefined) {
return;
}
const { newSelections: newSelections } = searchBackward();
const { newSelections } = searchBackward();
editor.selections = newSelections;
}

Expand Down Expand Up @@ -71,23 +71,22 @@ export function selectWord() {
return;
}

const tokensBySelections = parseAllSelections();
const selections = editor.selections;

const newSelections: vscode.Selection[] = [];

for (const [selection, tokens] of tokensBySelections) {
for (const selection of selections) {
const start = selection.start;
const end = selection.end;
const lineNum = start.line;
const charNum = start.character;

for (const token of tokens) {
if (token.start <= charNum && token.end > charNum) {
const wordStart = new vscode.Position(lineNum, token.start);
const wordEnd = new vscode.Position(lineNum, token.end);
newSelections.push(new vscode.Selection(wordStart, wordEnd));
break;
}
}

const wordStartPos = findWordStartPosition(start);
const wordStart = new vscode.Position(lineNum, wordStartPos);

const wordEndPos = findWordEndPosition(end);
const wordEnd = new vscode.Position(lineNum, wordEndPos);

newSelections.push(new vscode.Selection(wordStart, wordEnd));
}

editor.selections = newSelections;
Expand All @@ -98,15 +97,20 @@ function searchForward(): {
rangesToDelete: vscode.Range[];
} {
const document = vscode.window.activeTextEditor!.document;
const tokensBySelections = parseAllSelections();
const selections = vscode.window.activeTextEditor!.selections;

const newSelections: vscode.Selection[] = [];
const rangesToDelete: vscode.Range[] = [];

for (const [selection, tokens] of tokensBySelections) {
for (const selection of selections) {
let cursor = selection.start;
const line = document.lineAt(cursor.line);

if (cursor.isEqual(line.range.end) && document.lineCount === cursor.line + 1) {
newSelections.push(new vscode.Selection(cursor, cursor));
continue;
}

/*
* if the cursor is not at the end of the line
* and the character after is whitespace,
Expand All @@ -125,34 +129,29 @@ function searchForward(): {
const nextNonSpace = new vscode.Position(cursor.line, nextPos);
rangesToDelete.push(new vscode.Range(cursor, nextNonSpace));
cursor = nextNonSpace;

if (cursor.isEqual(line.range.end)) {
newSelections.push(new vscode.Selection(cursor, cursor));
continue;
}
}

/*
* if the cursor is at the end of the line
* and the next line exists,
* then jump to the beginning of the next line.
*/
if (
cursor.isEqual(line.range.end) &&
document.lineCount > cursor.line + 1
) {
if (cursor.isEqual(line.range.end) && document.lineCount > cursor.line + 1) {
const nextLineStart = new vscode.Position(cursor.line + 1, 0);
newSelections.push(new vscode.Selection(nextLineStart, nextLineStart));
continue;
}

/*
* jump to the end of the word
* and mark range(cursor, end of the word + 1) for deletion.
*/
for (const token of tokens) {
if (token.start <= cursor.character && token.end > cursor.character) {
const wordEnd = new vscode.Position(cursor.line, token.end);
rangesToDelete.push(new vscode.Range(cursor, wordEnd));
newSelections.push(new vscode.Selection(wordEnd, wordEnd));
break;
}
}
const wordEndPos = findWordEndPosition(cursor);
const wordEnd = new vscode.Position(cursor.line, wordEndPos);

rangesToDelete.push(new vscode.Range(cursor, wordEnd));
newSelections.push(new vscode.Selection(wordEnd, wordEnd));
}

return { newSelections, rangesToDelete };
Expand All @@ -163,16 +162,20 @@ function searchBackward(): {
rangesToDelete: vscode.Range[];
} {
const document = vscode.window.activeTextEditor!.document;

const tokensBySelections = parseAllSelections();
const selections = vscode.window.activeTextEditor!.selections;

const newSelections: vscode.Selection[] = [];
const rangesToDelete: vscode.Range[] = [];

for (const [selection, tokens] of tokensBySelections) {
for (const selection of selections) {
let cursor = selection.start;
const line = document.lineAt(cursor.line);

if (cursor.character === 0 && cursor.line === 0) {
newSelections.push(new vscode.Selection(cursor, cursor));
continue;
}

/*
* if the cursor is not at the beginning of the line,
* and the character before is whitespace,
Expand All @@ -186,10 +189,14 @@ function searchBackward(): {
const nonSpacePos = findLastNonSpace(
line.text.slice(0, cursor.character),
);
const nextPos = nonSpacePos === -1 ? 0 : nonSpacePos;
const whitespaceStart = new vscode.Position(cursor.line, nextPos + 1);
const whitespaceStart = new vscode.Position(cursor.line, nonSpacePos + 1);
rangesToDelete.push(new vscode.Range(whitespaceStart, cursor));
cursor = whitespaceStart;

if (cursor.character === 0) {
newSelections.push(new vscode.Selection(cursor, cursor));
continue;
}
}

/*
Expand All @@ -203,18 +210,11 @@ function searchBackward(): {
continue;
}

/*
* jump to the beginning of the word
* and mark range(the beginning of the word, cursor) for deletion
*/
for (const token of tokens) {
if (token.start < cursor.character && token.end >= cursor.character) {
const wordStart = new vscode.Position(cursor.line, token.start);
rangesToDelete.push(new vscode.Range(wordStart, cursor));
newSelections.push(new vscode.Selection(wordStart, wordStart));
break;
}
}
const wordStartPos = findWordStartPosition(cursor);
const wordStart = new vscode.Position(cursor.line, wordStartPos);

rangesToDelete.push(new vscode.Range(wordStart, cursor));
newSelections.push(new vscode.Selection(wordStart, wordStart));
}

return { newSelections, rangesToDelete };
Expand All @@ -235,3 +235,63 @@ function findLastNonSpace(text: string): number {
function isWhiteSpace(c: string): boolean {
return /^[\s]$/.test(c);
}

function findFirstSpaceAfterNonCJK(text: string): number {
const match = text.match(/^(\s*\w+(?<![\u4e00-\u9fff]))\b/);
if (match === null) {
return -1;
}
return match[1].length;
}

function findLastSpaceBeforeNonCJK(text: string): number {
const match = text.match(/\b\w+(?<![\u4e00-\u9fff])\s*$/);
if (match === null) {
return -1;
}
return text.length - match[0].length;
}

function findWordStartPosition(cursor: vscode.Position): number {
const line = vscode.window.activeTextEditor!.document.lineAt(cursor.line);

const wordStartPos = findLastSpaceBeforeNonCJK(line.text.slice(0, cursor.character));

// non CJK context
if (wordStartPos !== -1) {
return wordStartPos;
}

/*
* in CJK context
* jump to the beginning of the word
* and mark range(the beginning of the word, cursor) for deletion
*/
const tokens = parseSentence(line.text);
const target = tokens.find((token) => {
return token.start < cursor.character && token.end >= cursor.character;
})!;
return target.start;
}

function findWordEndPosition(cursor: vscode.Position): number {
const line = vscode.window.activeTextEditor!.document.lineAt(cursor.line);

const wordEndPos = findFirstSpaceAfterNonCJK(line.text.slice(cursor.character));

// non-CJK context
if (wordEndPos !== -1) {
return cursor.character + wordEndPos;
}

/*
* in CJK-context
* jump to the end of the word
* and mark range(cursor, end of the word + 1) for deletion.
*/
const tokens = parseSentence(line.text);
const target = tokens.find((token) => {
return token.start <= cursor.character && token.end > cursor.character;
})!;
return target.end;
}
19 changes: 2 additions & 17 deletions src/parse.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import * as vscode from "vscode";
import { tokenize } from "jieba-wasm";

export interface Token {
Expand All @@ -7,20 +6,6 @@ export interface Token {
end: number;
};

function parseSentence(sentence: string): Token[] {
export function parseSentence(sentence: string): Token[] {
return tokenize(sentence, "default", true);
}

export function parseAllSelections(): Map<vscode.Selection, Token[]> {
const editor = vscode.window.activeTextEditor!;
const document = editor.document;
const selections = editor.selections;

const tokensBySelections = new Map<vscode.Selection, Token[]>();
selections.map((s) => {
const line = document.lineAt(s.start.line).text;
tokensBySelections.set(s, parseSentence(line));
});

return tokensBySelections;
}
}
30 changes: 10 additions & 20 deletions src/test/suite/extension.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,36 +76,26 @@ async function englishTest() {
});
editor.selection = new vscode.Selection(startPos, startPos);

forwardWord();
assert.strictEqual(editor.selection.start.character, 1);
forwardWord();
assert.strictEqual(editor.selection.start.character, 5);

for (let i = 0; i < 9; i++) {
for (let i = 0; i < 20; i++) {
forwardWord();
}
assert.strictEqual(editor.selection.start.character, 59);

assert.strictEqual(
editor.document.getText(
new vscode.Range(new vscode.Position(0, 59), new vscode.Position(0, 63)),
),
" and",
editor.selection.start.isEqual(editor.document.lineAt(0).range.end),
true,
);

await killWord();
assert.strictEqual(editor.selection.start.character, 59);
assert.strictEqual(
editor.document.getText(
new vscode.Range(new vscode.Position(0, 59), new vscode.Position(0, 70)),
),
" community.",
);

for (let i = 0; i < 5; i++) {
backwardWord();
}
for (let i = 0; i < 6; i++) {
for (let i = 0; i < 20; i++) {
await backwardKillWord();
}
assert.ok(editor.selection.start.isEqual(new vscode.Position(0, 0)));

assert.strictEqual(
editor.selection.start.isEqual(editor.document.lineAt(0).range.start),
true,
);
}

0 comments on commit d0259c3

Please sign in to comment.