From fd6f0da7629a67be6b11b1f640e611728c24a241 Mon Sep 17 00:00:00 2001 From: John Downey Date: Wed, 3 Jun 2026 10:15:55 -0700 Subject: [PATCH 1/2] Fix string.drop_start on JavaScript with multi-byte characters The JavaScript target reused unsafe_byte_slice with Erlang byte offsets, but JS strings are indexed by UTF-16 code units, so drop_start returned wrong results for multi-byte input. Add a target-specific implementation that slices off the grapheme prefix using code-unit lengths. --- CHANGELOG.md | 5 +++++ src/gleam/string.gleam | 2 +- src/gleam_stdlib.mjs | 9 +++++++-- test/gleam/string_test.gleam | 13 +++++++++++++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52608459..b3c13fd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Fixed a bug where `string.drop_start` would return incorrect results on + JavaScript when the string contained multi-byte characters. + ## v1.0.4 - 2026-05-30 - Fix a bug where dicts and sets with hash collisions but equal entries would diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index e543bb68..753ea33a 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -200,7 +200,6 @@ pub fn slice(from string: String, at_index idx: Int, length len: Int) -> String fn grapheme_slice(string: String, index: Int, length: Int) -> String @external(erlang, "binary", "part") -@external(javascript, "../gleam_stdlib.mjs", "string_byte_slice") fn unsafe_byte_slice(string: String, index: Int, length: Int) -> String /// Drops contents of the first `String` that occur before the second `String`. @@ -227,6 +226,7 @@ pub fn crop(from string: String, before substring: String) -> String /// assert drop_start(from: "The Lone Gunmen", up_to: 2) == "e Lone Gunmen" /// ``` /// +@external(javascript, "../gleam_stdlib.mjs", "string_drop_start") pub fn drop_start(from string: String, up_to num_graphemes: Int) -> String { case num_graphemes <= 0 { True -> string diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 96582b12..c3de0e57 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -199,8 +199,13 @@ export function length(data) { return data.length; } -export function string_byte_slice(string, index, length) { - return string.slice(index, index + length); +export function string_drop_start(string, num_graphemes) { + if (num_graphemes <= 0) { + return string; + } + + const prefix = string_grapheme_slice(string, 0, num_graphemes); + return string.slice(prefix.length); } export function string_grapheme_slice(string, idx, len) { diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index ec4c0880..201e75b2 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -471,6 +471,19 @@ pub fn drop_start_3499_test() { assert string.drop_start("\r]", 1) == "]" } +pub fn drop_start_multibyte_test() { + // https://github.com/gleam-lang/stdlib/issues/924 + assert string.drop_start("广州abcdefghijklmn", 0) == "广州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 1) == "州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 2) == "abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 3) == "bcdefghijklmn" +} + +pub fn drop_start_grapheme_cluster_test() { + assert string.drop_start("👶🏿abc", 1) == "abc" + assert string.drop_start("e\u{0301}abc", 1) == "abc" +} + pub fn drop_end_basic_test() { assert string.drop_end("gleam", up_to: 2) == "gle" } From 0d447e8d6db7538efa66a654e078ee80bf3e50a1 Mon Sep 17 00:00:00 2001 From: John Downey Date: Wed, 3 Jun 2026 11:58:31 -0700 Subject: [PATCH 2/2] Use grapheme iterator directly in string_drop_start --- src/gleam_stdlib.mjs | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index c3de0e57..936d92e9 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -200,12 +200,38 @@ export function length(data) { } export function string_drop_start(string, num_graphemes) { - if (num_graphemes <= 0) { + if (num_graphemes <= 0 || string === "") { return string; } - const prefix = string_grapheme_slice(string, 0, num_graphemes); - return string.slice(prefix.length); + const iterator = graphemes_iterator(string); + if (iterator) { + let offset = 0; + + while (num_graphemes-- > 0) { + const v = iterator.next().value; + if (v === undefined) { + return ""; + } + + offset += v.segment.length; + } + + return string.slice(offset); + } else { + const codepoints = string.match(/./gsu); + if (num_graphemes >= codepoints.length) { + return ""; + } + + let offset = 0; + + for (let i = 0; i < num_graphemes; i++) { + offset += codepoints[i].length; + } + + return string.slice(offset); + } } export function string_grapheme_slice(string, idx, len) {