diff --git a/CHANGELOG.md b/CHANGELOG.md index 52608459..b3c13fd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Fixed a bug where `string.drop_start` would return incorrect results on + JavaScript when the string contained multi-byte characters. + ## v1.0.4 - 2026-05-30 - Fix a bug where dicts and sets with hash collisions but equal entries would diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index e543bb68..753ea33a 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -200,7 +200,6 @@ pub fn slice(from string: String, at_index idx: Int, length len: Int) -> String fn grapheme_slice(string: String, index: Int, length: Int) -> String @external(erlang, "binary", "part") -@external(javascript, "../gleam_stdlib.mjs", "string_byte_slice") fn unsafe_byte_slice(string: String, index: Int, length: Int) -> String /// Drops contents of the first `String` that occur before the second `String`. @@ -227,6 +226,7 @@ pub fn crop(from string: String, before substring: String) -> String /// assert drop_start(from: "The Lone Gunmen", up_to: 2) == "e Lone Gunmen" /// ``` /// +@external(javascript, "../gleam_stdlib.mjs", "string_drop_start") pub fn drop_start(from string: String, up_to num_graphemes: Int) -> String { case num_graphemes <= 0 { True -> string diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 96582b12..936d92e9 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -199,8 +199,39 @@ export function length(data) { return data.length; } -export function string_byte_slice(string, index, length) { - return string.slice(index, index + length); +export function string_drop_start(string, num_graphemes) { + if (num_graphemes <= 0 || string === "") { + return string; + } + + const iterator = graphemes_iterator(string); + if (iterator) { + let offset = 0; + + while (num_graphemes-- > 0) { + const v = iterator.next().value; + if (v === undefined) { + return ""; + } + + offset += v.segment.length; + } + + return string.slice(offset); + } else { + const codepoints = string.match(/./gsu); + if (num_graphemes >= codepoints.length) { + return ""; + } + + let offset = 0; + + for (let i = 0; i < num_graphemes; i++) { + offset += codepoints[i].length; + } + + return string.slice(offset); + } } export function string_grapheme_slice(string, idx, len) { diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index ec4c0880..201e75b2 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -471,6 +471,19 @@ pub fn drop_start_3499_test() { assert string.drop_start("\r]", 1) == "]" } +pub fn drop_start_multibyte_test() { + // https://github.com/gleam-lang/stdlib/issues/924 + assert string.drop_start("广州abcdefghijklmn", 0) == "广州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 1) == "州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 2) == "abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 3) == "bcdefghijklmn" +} + +pub fn drop_start_grapheme_cluster_test() { + assert string.drop_start("👶🏿abc", 1) == "abc" + assert string.drop_start("e\u{0301}abc", 1) == "abc" +} + pub fn drop_end_basic_test() { assert string.drop_end("gleam", up_to: 2) == "gle" }