Skip to content

Commit

Permalink
Fix hyphenation for Czech, Polish, Portuguese and Spanish
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-araujjo committed May 4, 2024
1 parent a4d994d commit 20ecda8
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 8 deletions.
81 changes: 73 additions & 8 deletions crates/typst/src/layout/inline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -816,8 +816,10 @@ fn linebreak_simple<'a>(
let mut last = None;

breakpoints(p, |end, breakpoint| {
let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false);

// Compute the line and its size.
let mut attempt = line(engine, p, start..end, breakpoint);
let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);

// If the line doesn't fit anymore, we push the last fitting attempt
// into the stack and rebuild the line from the attempt's end. The
Expand All @@ -826,7 +828,7 @@ fn linebreak_simple<'a>(
if let Some((last_attempt, last_end)) = last.take() {
lines.push(last_attempt);
start = last_end;
attempt = line(engine, p, start..end, breakpoint);
attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
}
}

Expand Down Expand Up @@ -896,7 +898,7 @@ fn linebreak_optimized<'a>(
let mut table = vec![Entry {
pred: 0,
total: 0.0,
line: line(engine, p, 0..0, Breakpoint::Mandatory),
line: line(engine, p, 0..0, Breakpoint::Mandatory, false),
}];

let em = p.size;
Expand All @@ -910,8 +912,9 @@ fn linebreak_optimized<'a>(
for (i, pred) in table.iter().enumerate().skip(active) {
// Layout the line.
let start = pred.line.end;
let prepend_hyphen = should_repeat_hyphen(&pred.line);

let attempt = line(engine, p, start..end, breakpoint);
let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);

// Determine how much the line's spaces would need to be stretched
// to make it the desired width.
Expand Down Expand Up @@ -1024,6 +1027,7 @@ fn line<'a>(
p: &'a Preparation,
mut range: Range,
breakpoint: Breakpoint,
prepend_hyphen: bool,
) -> Line<'a> {
let end = range.end;
let mut justify =
Expand Down Expand Up @@ -1091,13 +1095,25 @@ fn line<'a>(
// need the shaped empty string to make the line the appropriate
// height. That is the case exactly if the string is empty and there
// are no other items in the line.
if hyphen || start + shaped.text.len() > range.end || maybe_adjust_last_glyph {
if hyphen || start < range.end || before.is_empty() {
if hyphen
|| start + shaped.text.len() > range.end
|| maybe_adjust_last_glyph
|| (prepend_hyphen && before.is_empty())
{
if hyphen
|| start < range.end
|| before.is_empty()
|| (prepend_hyphen && before.is_empty())
{
let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
if hyphen || shy {
reshaped.push_hyphen(engine, p.fallback);
}

if prepend_hyphen && before.is_empty() {
reshaped.prepend_hyphen(engine, p.fallback);
}

if let Some(last_glyph) = reshaped.glyphs.last() {
if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
// If the last glyph is a CJK punctuation, we want to shrink it.
Expand Down Expand Up @@ -1143,10 +1159,18 @@ fn line<'a>(
let end = range.end.min(base + shaped.text.len());

// Reshape if necessary.
if range.start + shaped.text.len() > end || maybe_adjust_first_glyph {
if range.start + shaped.text.len() > end
|| maybe_adjust_first_glyph
|| prepend_hyphen
{
// If the range is empty, we don't want to push an empty text item.
if range.start < end {
let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
let mut reshaped = shaped.reshape(engine, &p.spans, range.start..end);

if prepend_hyphen {
reshaped.prepend_hyphen(engine, p.fallback)
}

width += reshaped.width;
first = Some(Item::Text(reshaped));
}
Expand Down Expand Up @@ -1458,3 +1482,44 @@ fn overhang(c: char) -> f64 {
_ => 0.0,
}
}

/// Whether the hyphen should repeat at the begin of the next line
fn should_repeat_hyphen(pred_line: &Line) -> bool {
// If the predecessor line does not end with with a Dash::HardHyphen, we shouldn't put a hyphen
// on the next line.
if pred_line.dash != Some(Dash::HardHyphen) {
return false;
}

// If there's a trimmed out space, we needn't repeat the hyphen. That's the case of a text like
// "... kebab é a -melhor- comida que existe", where the hyphens are a kind of emphasis marker.
if pred_line.trimmed.end != pred_line.end {
return false;
}

// The hyphen should repeat only in the languages that requires that feature.
// For more information see the discussion at https://github.com/typst/typst/issues/3235
if let Some(Item::Text(shape)) = pred_line.last.as_ref() {
match shape.lang {
// Czech: see https://prirucka.ujc.cas.cz/?id=164
//
// Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
//
// Portuguese: see Base XX of "Acordo Ortográfico da Língua Portuguesa de 1990"
// https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf
Lang::CZECH | Lang::POLISH | Lang::PORTUGUESE => true,
// In Spanish the hyphen is required only if the word next to hyphen isn't capitalized.
//
// See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
// https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
.chars()
.next()
.map(|c| !c.is_uppercase())
.unwrap_or(false),
_ => false,
}
} else {
false
}
}
50 changes: 50 additions & 0 deletions crates/typst/src/layout/inline/shaping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,56 @@ impl<'a> ShapedText<'a> {
});
}

/// Prepend a hyphen to begin of the text.
pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) {
let world = engine.world;
let book = world.book();
let fallback_func = if fallback {
Some(|| book.select_fallback(None, self.variant, "-"))
} else {
None
};
let mut chain = families(self.styles)
.map(|family| book.select(family, self.variant))
.chain(fallback_func.iter().map(|f| f()))
.flatten();

chain.find_map(|id| {
let font = world.font(id)?;
let ttf = font.ttf();
let glyph_id = ttf.glyph_index('-')?;
let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
let range = self
.glyphs
.first()
.map(|g| g.range.start..g.range.start)
// In the unlikely chance that we hyphenate after an empty line,
// ensure that the glyph range still falls after self.base so
// that subtracting either of the endpoints by self.base doesn't
// underflow. See <https://github.com/typst/typst/issues/2283>.
.unwrap_or_else(|| self.base..self.base);
self.width += x_advance.at(self.size);
self.glyphs.to_mut().insert(
0,
ShapedGlyph {
font,
glyph_id: glyph_id.0,
x_advance,
x_offset: Em::zero(),
y_offset: Em::zero(),
adjustability: Adjustability::default(),
range,
safe_to_break: true,
c: '-',
span: (Span::detached(), 0),
is_justifiable: false,
script: Script::Common,
},
);
Some(())
});
}

/// Find the subslice of glyphs that represent the given text range if both
/// sides are safe to break.
fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
Expand Down
Binary file added tests/ref/hyphenate-es-captalized-names.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/ref/hyphenate-es-repeat-hyphen.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/ref/hyphenate-pt-dash-emphasis.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/ref/hyphenate-pt-no-repeat-hyphen.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 49 additions & 0 deletions tests/suite/layout/inline/hyphenate.typ
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,55 @@ It's a #emph[Tree]beard.
#set text(hyphenate: true)
#h(6pt) networks, the rest.

--- hyphenate-pt-repeat-hyphen-natural-word-breaking ---
// The word breaker naturally breaks arco-da-velha at arco-/-da-velha,
// so we shall repeat the hyphen, even that hyphenate is set to false.
#set page(width: 4cm, height: 2cm, margin: 2mm)
#set text(lang: "pt")

Alguma coisa no arco-da-velha é algo que está muito longe.

--- hyphenate-pt-repeat-hyphen-hyphenate-true ---
#set page(width: 4cm, height: 2cm, margin: 2mm)
#set text(lang: "pt", hyphenate: true)

Alguma coisa no arco-da-velha é algo que está muito longe.

--- hyphenate-pt-no-repeat-hyphen ---
// LARGE
#set page(width: 4cm, height: 2cm, margin: 2mm)
#set text(lang: "pt", hyphenate: true)

Um médico otorrinolaringologista cuida da garganta do paciente.

--- hyphenate-pt-dash-emphasis ---
// If the hyphen is followed by a space we shall not repeat the hyphen
// at the next line
#set page(width: 4cm, height: 2cm, margin: 2mm)
#set text(lang: "pt", hyphenate: true)

Quebabe é a -melhor- comida que existe.

--- hyphenate-es-repeat-hyphen ---
// LARGE
#set page(width: 6.25cm, height: 6cm, margin: 2mm)
#set text(lang: "es", hyphenate: true)

Lo que entendemos por nivel léxico-semántico, en cuanto su sentido más
gramatical: es aquel que estudia el origen y forma de las palabras de
un idioma.

--- hyphenate-es-captalized-names ---
// LARGE
// If the hyphen is followed by a capitalized word we shall not repeat
// the hyphen at the next line
#set page(width: 6.5cm, height: 3.5cm, margin: 2mm)
#set text(lang: "es", hyphenate: true)

Tras el estallido de la contienda Ruiz-Giménez fue detenido junto a sus
dos hermanos y puesto bajo custodia por las autoridades republicanas, con
el objetivo de protegerle de las patrullas de milicianos.

--- costs-widow-orphan ---
#set page(height: 60pt)

Expand Down

0 comments on commit 20ecda8

Please sign in to comment.