From fd4c301cca734051df4c35e30d527c30077e958c Mon Sep 17 00:00:00 2001 From: Boning Date: Sat, 17 Jan 2026 20:38:26 -0500 Subject: [PATCH 01/13] Add init files --- src/languages/definitions/ocaml/ocaml.test.ts | 0 src/languages/definitions/ocaml/ocaml.ts | 0 src/languages/definitions/ocaml/register.ts | 0 src/languages/definitions/register.all.ts | 1 + 4 files changed, 1 insertion(+) create mode 100644 src/languages/definitions/ocaml/ocaml.test.ts create mode 100644 src/languages/definitions/ocaml/ocaml.ts create mode 100644 src/languages/definitions/ocaml/register.ts diff --git a/src/languages/definitions/ocaml/ocaml.test.ts b/src/languages/definitions/ocaml/ocaml.test.ts new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/languages/definitions/ocaml/register.ts b/src/languages/definitions/ocaml/register.ts new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/languages/definitions/register.all.ts b/src/languages/definitions/register.all.ts index ff00612225..193a92d5fb 100644 --- a/src/languages/definitions/register.all.ts +++ b/src/languages/definitions/register.all.ts @@ -43,6 +43,7 @@ import './mdx/register'; import './mips/register'; import './msdax/register'; import './mysql/register'; +import './ocaml/register'; import './objective-c/register'; import './pascal/register'; import './pascaligo/register'; From 4382db1c3cdc887317a794825c73614644abdbeb Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 19 Jan 2026 04:00:08 -0500 Subject: [PATCH 02/13] Impl int and comment --- src/languages/definitions/ocaml/ocaml.test.ts | 67 ++++++++ src/languages/definitions/ocaml/ocaml.ts | 158 ++++++++++++++++++ src/languages/definitions/ocaml/register.ts | 13 ++ 3 files changed, 238 insertions(+) diff --git a/src/languages/definitions/ocaml/ocaml.test.ts b/src/languages/definitions/ocaml/ocaml.test.ts index e69de29bb2..f1f4c0fad0 100644 --- a/src/languages/definitions/ocaml/ocaml.test.ts +++ b/src/languages/definitions/ocaml/ocaml.test.ts @@ -0,0 +1,67 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { testTokenization } from '../test/testRunner'; + +testTokenization('ocaml', [ + // Comment + [ + { + line: '(** documentation *)', + tokens: [ + { startIndex: 0, type: 'comment.doc.ocaml' }, + ] + } + ], + [ + { + line: ' (* comment (* nested comment *) *) ', + tokens: [ + { startIndex: 0, type: '' }, + { startIndex: 1, type: 'comment.ocaml' }, + { startIndex: 35, type: '' } + ] + } + ], + [ + { + line: ' (* comment (* nested comment *)', + tokens: [ + { startIndex: 0, type: '' }, + { startIndex: 1, type: 'comment.ocaml' } + ] + }, + { + line: 'multiline comment', + tokens: [ + { startIndex: 0, type: 'comment.ocaml' } + ] + }, + { + line: ' *)', + tokens: [ + { startIndex: 0, type: 'comment.ocaml' } + ] + }, + ], + // Integer + // String + [ + { + line: 'let a = "This is a string"', + tokens: [ + { startIndex: 0, type: 'keyword.ocaml' }, + { startIndex: 3, type: '' }, + { startIndex: 4, type: 'identifier.ocaml' }, + { startIndex: 5, type: '' }, + { startIndex: 6, type: 'operator.ocaml' }, + { startIndex: 7, type: '' }, + { startIndex: 8, type: 'string.quote.ocaml' }, + { startIndex: 9, type: 'string.ocaml' }, + { startIndex: 25, type: 'string.quote.ocaml' } + ] + } + ], +]); diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index e69de29bb2..41c5b64fd6 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -0,0 +1,158 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type { languages } from '../../../editor'; + +export const conf: languages.LanguageConfiguration = { + comments: { + blockComment: ['(*', '*)'] + }, + brackets: [ + ['{', '}'], + ['[', ']'], + ['(', ')'] + ], + autoClosingPairs: [ + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '(', close: ')' }, + { open: '"', close: '"' } + ], + surroundingPairs: [ + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '(', close: ')' }, + { open: '"', close: '"' }, + { open: "'", close: "'" } + ], + folding: { + markers: { + start: new RegExp('^\\s*//\\s*#region\\b|^\\s*\\(\\*\\s*#region(.*)\\*\\)'), + end: new RegExp('^\\s*//\\s*#endregion\\b|^\\s*\\(\\*\\s*#endregion\\s*\\*\\)') + } + } +}; + +export const language = { + defaultToken: 'invalid', + tokenPostfix: '.ocaml', + + keywords: [ + 'and', + 'as', + 'asr', + 'assert', + 'begin', + 'class', + 'constraint', + 'do', + 'done', + 'downto', + 'else', + 'end', + 'exception', + 'external', + 'false', + 'for', + 'fun', + 'function', + 'functor', + 'if', + 'in', + 'include', + 'inherit', + 'initializer', + 'land', + 'lazy', + 'let', + 'lor', + 'lsl', + 'lsr', + 'lxor', + 'match', + 'method', + 'mod', + 'module', + 'mutable', + 'new', + 'nonrec', + 'object', + 'of', + 'open', + 'open!', + 'or', + 'private', + 'rec', + 'sig', + 'struct', + 'then', + 'to', + 'true', + 'try', + 'type', + 'val', + 'virtual', + 'when', + 'while', + 'with', + ], + + // TODO + typeKeywords: [], + + // we include these common regular expressions + symbols: /[=> import('./ocaml') +}); From 2a5525c75ba44a56910f41c3080fdaf9c1d276ae Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 19 Jan 2026 04:16:08 -0500 Subject: [PATCH 03/13] Impl string --- src/languages/definitions/ocaml/ocaml.ts | 29 ++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index 41c5b64fd6..be47222f19 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -122,8 +122,21 @@ export const language = { } ], - // whitespace + // whitespaces { include: '@whitespace' }, + + // numbers + { include: '@number' }, + + // strings + [/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string + [/"""/, 'string', '@string."""'], + [/"/, 'string', '@string."'], + + // characters + [/'[^\\']'B?/, 'string'], + [/(')(@escapes)(')/, ['string', 'string.escape', 'string']], + [/'/, 'string.invalid'] ], // Done @@ -152,7 +165,19 @@ export const language = { ], string: [ - // Char + [/[^\\"]+/, 'string'], + [/@escapes/, 'string.escape'], + [/\\./, 'string.escape.invalid'], + [ + /("""|")/, + { + cases: { + '$#==$S2': { token: 'string', next: '@pop' }, + '@default': 'string' + } + } + ] + ] } }; From 9195e112322cab987e952720d252508fbf8f07ec Mon Sep 17 00:00:00 2001 From: Boning Date: Sun, 1 Feb 2026 21:22:02 -0500 Subject: [PATCH 04/13] Add more syntax --- src/languages/definitions/ocaml/ocaml.ts | 68 +++++++++++++++++------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index be47222f19..a10d08cc33 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -12,13 +12,15 @@ export const conf: languages.LanguageConfiguration = { brackets: [ ['{', '}'], ['[', ']'], - ['(', ')'] + ['(', ')'], + ['{|', '|}'], + ['[|', '|]'], ], autoClosingPairs: [ { open: '{', close: '}' }, { open: '[', close: ']' }, { open: '(', close: ')' }, - { open: '"', close: '"' } + { open: '"', close: '"', notIn: ['string'] } ], surroundingPairs: [ { open: '{', close: '}' }, @@ -42,7 +44,6 @@ export const language = { keywords: [ 'and', 'as', - 'asr', 'assert', 'begin', 'class', @@ -64,16 +65,10 @@ export const language = { 'include', 'inherit', 'initializer', - 'land', 'lazy', 'let', - 'lor', - 'lsl', - 'lsr', - 'lxor', 'match', 'method', - 'mod', 'module', 'mutable', 'new', @@ -98,15 +93,42 @@ export const language = { 'while', 'with', ], - - // TODO - typeKeywords: [], + operatorKeywords: [ + 'mod', + 'land', + 'lor', + 'lxor', + 'lsl', + 'lsr', + 'asr', + ], + bracketOpenKeywords: [ + 'begin', + 'object', + 'sig', + 'struct' + ], + debuggingConsts: [ + '__FILE__', + '__FUNCTION__', + '__LINE__', + '__LINE_OF__', + '__LOC__', + '__LOC_OF__', + '__MODULE__', + '__POS__', + '__POS_OF__', + ], // we include these common regular expressions - symbols: /[=>@^|]/, + operatorChar: /((@coreOperatorChar)|[~!?%<:.])/, + infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operator)+)/, + infixOp: /(\*|\+|-|-.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, + prefixSym: /(\!(@operatorChar)*|[?~](@operatorChar)+)/, + operators: /((@prefixSym)|(infixOp))/, escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, integersuffix: /[lLn]/, - floatsuffix: /[fFmM]?/, // The main tokenizer for our languages tokenizer: { @@ -116,18 +138,22 @@ export const language = { /[a-zA-Z_]\w*/, { cases: { + 'end': { token: 'keyword.bracket', bracket: '@close' }, + '@bracketOpenKeywords': { token: 'keyword.bracket', bracket: '@open' }, + '@operatorKeywords': 'operator', '@keywords': 'keyword', + '@debuggingConsts': 'constant', '@default': 'identifier' } } ], - // whitespaces { include: '@whitespace' }, - // numbers { include: '@number' }, + [/[;,.]/, 'delimiter'], + // strings [/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string [/"""/, 'string', '@string."""'], @@ -136,10 +162,16 @@ export const language = { // characters [/'[^\\']'B?/, 'string'], [/(')(@escapes)(')/, ['string', 'string.escape', 'string']], - [/'/, 'string.invalid'] + [/'/, 'string.invalid'], + + // brackets + [/[{\[]\|/, '@brackets'], + [/\|[}\]]/, '@brackets'], + [/[{}()\[\]]/, '@brackets'], + + [/@operators/, 'operator'] ], - // Done whitespace: [ [/[ \t\r\n]+/, ''], [/\(\*\*/, 'comment.doc', '@comment'], From 5b231b3929c000b745214d85daaf262f46a14f84 Mon Sep 17 00:00:00 2001 From: Boning Date: Sun, 1 Feb 2026 21:49:39 -0500 Subject: [PATCH 05/13] Update --- src/languages/definitions/ocaml/ocaml.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index a10d08cc33..5ffac63293 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -121,11 +121,11 @@ export const language = { ], // we include these common regular expressions - coreOperatorChar: /[$&*+-/=>@^|]/, + coreOperatorChar: /[$&*+\-\/=>@\^|]/, operatorChar: /((@coreOperatorChar)|[~!?%<:.])/, infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operator)+)/, infixOp: /(\*|\+|-|-.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, - prefixSym: /(\!(@operatorChar)*|[?~](@operatorChar)+)/, + prefixSym: /(!(@operatorChar)*|[?~](@operatorChar)+)/, operators: /((@prefixSym)|(infixOp))/, escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, integersuffix: /[lLn]/, From 31e63c62d1fddc4b28779979a5f98e453e419d40 Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 00:00:20 -0500 Subject: [PATCH 06/13] Update OCaml --- src/languages/definitions/ocaml/ocaml.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index 5ffac63293..ec3c435808 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -128,7 +128,7 @@ export const language = { prefixSym: /(!(@operatorChar)*|[?~](@operatorChar)+)/, operators: /((@prefixSym)|(infixOp))/, escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, - integersuffix: /[lLn]/, + integerSuffix: /[lLn]/, // The main tokenizer for our languages tokenizer: { @@ -187,10 +187,10 @@ export const language = { number: [ // Integer - [/[\d][\d_]*(@intSuffixes)?/, 'number'], - [/0[xX][\da-fA-F][\da-fA-F_]*(@intSuffixes)?/, 'number,hex'], - [/0[oO][0-7][0-7_]*(@intSuffixes)?/, 'number.octal'], - [/0[bB][01][01_]*(@intSuffixes)?/, 'number.binary'], + [/[\d][\d_]*(@integerSuffix)?/, 'number'], + [/0[xX][\da-fA-F][\da-fA-F_]*(@integerSuffix)?/, 'number,hex'], + [/0[oO][0-7][0-7_]*(@integerSuffix)?/, 'number.octal'], + [/0[bB][01][01_]*(@integerSuffix)?/, 'number.binary'], // Floating-point [/[\d][\d_]*(\.[\d_]*)?([eE][+-]?[\d][\d_]*)?/, 'number.float'], [/0[xX][\da-fA-F][\da-fA-F_]*(\.[\da-fA-F_]*)?([pP][+-]?[\d][\d_]*)?/, 'number.float'], From aa6ec19f6c5161cb32aa3fa20f8d352e740029ec Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 00:06:45 -0500 Subject: [PATCH 07/13] Add example and update contribution' --- CONTRIBUTING.md | 12 ++--- .../data/home-samples/sample.ocaml.txt | 46 +++++++++++++++++++ 2 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 website/src/website/data/home-samples/sample.ocaml.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 68998ab78e..d763151866 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,14 +17,14 @@ For a stable release, the commit specified in `vscodeRef` in [package.json](./pa Please understand that we only bundle languages with the monaco editor that have a significant relevance (for example, those that have an article in Wikipedia). -- create `$/src/basic-languages/{myLang}/{myLang}.contribution.ts` -- create `$/src/basic-languages/{myLang}/{myLang}.ts` -- create `$/src/basic-languages/{myLang}/{myLang}.test.ts` -- edit `$/src/basic-languages/monaco.contribution.ts` and register your new language -- create `$/website/index/samples/sample.{myLang}.txt` +- create `$/src/languages/definitions/{myLang}/register.ts` +- create `$/src/languages/definitions/{myLang}/{myLang}.ts` +- create `$/src/languages/definitions/{myLang}/{myLang}.test.ts` +- edit `$/src/languages/definitions/register.all.ts` and register your new language +- create `$/website/src/website/data/home-samples/sample.{myLang}.txt` ```js -import './{myLang}/{myLang}.contribution'; +import './{myLang}/register'; ``` ## Debugging / Developing The Core Editor diff --git a/website/src/website/data/home-samples/sample.ocaml.txt b/website/src/website/data/home-samples/sample.ocaml.txt new file mode 100644 index 0000000000..f254bf1549 --- /dev/null +++ b/website/src/website/data/home-samples/sample.ocaml.txt @@ -0,0 +1,46 @@ +(* + Welcome to the official OCaml Playground! + + You don't need to install anything - just write your code + and see the results appear in the Output panel. + + This playground is powered by OCaml 5 which comes with + support for shared-memory parallelism through domains and effects. + Below is some naive example code that calculates + the Fibonacci sequence in parallel. + + Happy hacking! +*) + +let num_domains = 2 +let n = 20 + +let rec fib n = + if n < 2 then 1 + else fib (n-1) + fib (n-2) + +let rec fib_par n d = + if d <= 1 then fib n + else + let a = fib_par (n-1) (d-1) in + let b = Domain.spawn (fun _ -> fib_par (n-2) (d-1)) in + a + Domain.join b + +let () = + let res = fib_par n num_domains in + Printf.printf "fib(%d) = %d\n" n res + +(* + By the way, a much better, single-threaded implementation that calculates + the Fibonacci sequence is this: + + let rec fib m n i = + if i < 1 then m + else fib n (n + m) (i - 1) + + let fib = fib 0 1 + + For a more in-depth, realistic example of how to use + parallel computation, take a look at + https://v2.ocaml.org/releases/5.0/manual/parallelism.html#s:par_iterators +*) From 3e2021ecd82deb8081f1f5d01e1e2954f12a59d0 Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 00:07:18 -0500 Subject: [PATCH 08/13] Update --- src/languages/definitions/ocaml/ocaml.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index ec3c435808..3be955e8c6 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -126,7 +126,7 @@ export const language = { infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operator)+)/, infixOp: /(\*|\+|-|-.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, prefixSym: /(!(@operatorChar)*|[?~](@operatorChar)+)/, - operators: /((@prefixSym)|(infixOp))/, + operator: /((@prefixSym)|(infixOp))/, escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, integerSuffix: /[lLn]/, @@ -169,7 +169,7 @@ export const language = { [/\|[}\]]/, '@brackets'], [/[{}()\[\]]/, '@brackets'], - [/@operators/, 'operator'] + [/@operator/, 'operator'] ], whitespace: [ From 8c02161ceadc22ae64e578f1d4e30c025a97be2d Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 00:41:29 -0500 Subject: [PATCH 09/13] Fix error --- src/languages/definitions/ocaml/ocaml.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index 3be955e8c6..32b697161e 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -120,13 +120,13 @@ export const language = { '__POS_OF__', ], - // we include these common regular expressions coreOperatorChar: /[$&*+\-\/=>@\^|]/, operatorChar: /((@coreOperatorChar)|[~!?%<:.])/, infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operator)+)/, - infixOp: /(\*|\+|-|-.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, + infixOp: /(\*|\+|-|-\.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, prefixSym: /(!(@operatorChar)*|[?~](@operatorChar)+)/, - operator: /((@prefixSym)|(infixOp))/, + operator: /((@prefixSym)|(@infixOp))/, + escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, integerSuffix: /[lLn]/, @@ -135,7 +135,7 @@ export const language = { root: [ // identifiers and keywords [ - /[a-zA-Z_]\w*/, + /[a-z_]\w*/, { cases: { 'end': { token: 'keyword.bracket', bracket: '@close' }, @@ -147,12 +147,13 @@ export const language = { } } ], + [/[A-Z]\w*/, 'constructor'], { include: '@whitespace' }, { include: '@number' }, - [/[;,.]/, 'delimiter'], + [/[:;,.]/, 'delimiter'], // strings [/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string @@ -186,14 +187,12 @@ export const language = { ], number: [ - // Integer - [/[\d][\d_]*(@integerSuffix)?/, 'number'], + [/0[xX][\da-fA-F][\da-fA-F_]*(\.[\da-fA-F_]*)?([pP][+-]?[\d][\d_]*)?/, 'number.float'], [/0[xX][\da-fA-F][\da-fA-F_]*(@integerSuffix)?/, 'number,hex'], [/0[oO][0-7][0-7_]*(@integerSuffix)?/, 'number.octal'], [/0[bB][01][01_]*(@integerSuffix)?/, 'number.binary'], - // Floating-point [/[\d][\d_]*(\.[\d_]*)?([eE][+-]?[\d][\d_]*)?/, 'number.float'], - [/0[xX][\da-fA-F][\da-fA-F_]*(\.[\da-fA-F_]*)?([pP][+-]?[\d][\d_]*)?/, 'number.float'], + [/[\d][\d_]*(@integerSuffix)?/, 'number'], ], string: [ From ba08d7f4a18eb15b69f4ed4bb6aec1ef17015e1d Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 00:44:06 -0500 Subject: [PATCH 10/13] Add args --- src/languages/definitions/ocaml/ocaml.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index 32b697161e..b5cde9c514 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -135,7 +135,7 @@ export const language = { root: [ // identifiers and keywords [ - /[a-z_]\w*/, + /[~?]?[a-z_]\w*/, { cases: { 'end': { token: 'keyword.bracket', bracket: '@close' }, From 610476980a31891939a30cd37468a865630f8ea8 Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 01:33:03 -0500 Subject: [PATCH 11/13] Add tests --- src/languages/definitions/ocaml/ocaml.test.ts | 111 +++++++++++++++++- src/languages/definitions/ocaml/ocaml.ts | 23 ++-- 2 files changed, 122 insertions(+), 12 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.test.ts b/src/languages/definitions/ocaml/ocaml.test.ts index f1f4c0fad0..17d3a597b2 100644 --- a/src/languages/definitions/ocaml/ocaml.test.ts +++ b/src/languages/definitions/ocaml/ocaml.test.ts @@ -11,7 +11,7 @@ testTokenization('ocaml', [ { line: '(** documentation *)', tokens: [ - { startIndex: 0, type: 'comment.doc.ocaml' }, + { startIndex: 0, type: 'comment.ocaml' }, ] } ], @@ -47,7 +47,112 @@ testTokenization('ocaml', [ }, ], // Integer + [ + { + line: '37', + tokens: [ + { startIndex: 0, type: 'number.ocaml' }, + ] + } + ], + [ + { + line: '1_000_000', + tokens: [ + { startIndex: 0, type: 'number.ocaml' }, + ] + } + ], + [ + { + line: '0x00A9', + tokens: [ + { startIndex: 0, type: 'number.hex.ocaml' }, + ] + } + ], + [ + { + line: '0L', + tokens: [ + { startIndex: 0, type: 'number.ocaml' }, + ] + } + ], + [ + { + line: '3.141_592_653_589_793_12', + tokens: [ + { startIndex: 0, type: 'number.float.ocaml' }, + ] + } + ], + [ + { + line: '-1e-5', + tokens: [ + { startIndex: 0, type: 'number.float.ocaml' }, + ] + } + ], + [ + { + line: '0x1p-52', + tokens: [ + { startIndex: 0, type: 'number.float.ocaml' }, + ] + } + ], + // Character + [ + { + line: '\'a\'', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + ] + } + ], + [ + { + line: '\'\\\'\'', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + { startIndex: 1, type: 'string.escape.ocaml' }, + { startIndex: 3, type: 'string.ocaml' }, + ] + } + ], + [ + { + line: '\'\\xA9\'', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + { startIndex: 1, type: 'string.escape.ocaml' }, + { startIndex: 5, type: 'string.ocaml' }, + ] + } + ], // String + [ + { + line: '"Hello, World!\\n"', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + { startIndex: 14, type: 'string.escape.ocaml' }, + { startIndex: 16, type: 'string.ocaml' }, + ] + } + ], + [ + { + line: '"\\u{207A}"', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + { startIndex: 1, type: 'string.escape.ocaml' }, + { startIndex: 9, type: 'string.ocaml' }, + ] + } + ], [ { line: 'let a = "This is a string"', @@ -58,9 +163,7 @@ testTokenization('ocaml', [ { startIndex: 5, type: '' }, { startIndex: 6, type: 'operator.ocaml' }, { startIndex: 7, type: '' }, - { startIndex: 8, type: 'string.quote.ocaml' }, - { startIndex: 9, type: 'string.ocaml' }, - { startIndex: 25, type: 'string.quote.ocaml' } + { startIndex: 8, type: 'string.ocaml' }, ] } ], diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index b5cde9c514..0c5a05115c 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -128,6 +128,11 @@ export const language = { operator: /((@prefixSym)|(@infixOp))/, escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, + + coreDec: /[\d][\d_]*/, + coreHex: /0[xX][\da-fA-F][\da-fA-F_]*/, + exponent: /[eE][+-]?(@coreDec)/, + hexExponent: /[pP][+-]?(@coreDec)/, integerSuffix: /[lLn]/, // The main tokenizer for our languages @@ -175,8 +180,8 @@ export const language = { whitespace: [ [/[ \t\r\n]+/, ''], - [/\(\*\*/, 'comment.doc', '@comment'], - [/\(\*/, 'comment', '@comment'] + // [/\(\*\*/, 'comment.doc', '@comment'], + [/\(\*\*?/, 'comment', '@comment'] ], comment: [ @@ -187,16 +192,18 @@ export const language = { ], number: [ - [/0[xX][\da-fA-F][\da-fA-F_]*(\.[\da-fA-F_]*)?([pP][+-]?[\d][\d_]*)?/, 'number.float'], - [/0[xX][\da-fA-F][\da-fA-F_]*(@integerSuffix)?/, 'number,hex'], - [/0[oO][0-7][0-7_]*(@integerSuffix)?/, 'number.octal'], - [/0[bB][01][01_]*(@integerSuffix)?/, 'number.binary'], - [/[\d][\d_]*(\.[\d_]*)?([eE][+-]?[\d][\d_]*)?/, 'number.float'], - [/[\d][\d_]*(@integerSuffix)?/, 'number'], + // for float, fractional part and exponent part can be omitted but not both + [/-?(@coreHex)((\.[\da-fA-F_]*)(@hexExponent)?|(@hexExponent))/, 'number.float'], + [/-?(@coreHex)(@integerSuffix)?/, 'number.hex'], + [/-?0[oO][0-7][0-7_]*(@integerSuffix)?/, 'number.octal'], + [/-?0[bB][01][01_]*(@integerSuffix)?/, 'number.binary'], + [/-?(@coreDec)((\.[\d_]*)(@exponent)?|(@exponent))/, 'number.float'], + [/-?(@coreDec)(@integerSuffix)?/, 'number'], ], string: [ [/[^\\"]+/, 'string'], + [/\\u{\w+}/, 'string.escape'], [/@escapes/, 'string.escape'], [/\\./, 'string.escape.invalid'], [ From 3ddbbe2ce234d5d722b207001f879d90dbc5a3b5 Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 02:02:51 -0500 Subject: [PATCH 12/13] Update --- src/languages/definitions/ocaml/ocaml.test.ts | 45 +++++++++++++++++++ src/languages/definitions/ocaml/ocaml.ts | 12 +++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.test.ts b/src/languages/definitions/ocaml/ocaml.test.ts index 17d3a597b2..26094f470b 100644 --- a/src/languages/definitions/ocaml/ocaml.test.ts +++ b/src/languages/definitions/ocaml/ocaml.test.ts @@ -153,6 +153,31 @@ testTokenization('ocaml', [ ] } ], + [ + { + line: 'let longstr =', + tokens: [ + { startIndex: 0, type: 'keyword.ocaml' }, + { startIndex: 3, type: '' }, + { startIndex: 4, type: 'identifier.ocaml' }, + { startIndex: 11, type: '' }, + { startIndex: 12, type: 'operator.ocaml' }, + ] + }, + { + line: ' "Call me Ishmael. Some years ago — never mind how long \\', + tokens: [ + { startIndex: 0, type: '' }, + { startIndex: 2, type: 'string.ocaml' }, + ] + }, + { + line: ' precisely..."', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + ] + } + ], [ { line: 'let a = "This is a string"', @@ -167,4 +192,24 @@ testTokenization('ocaml', [ ] } ], + [ + { + line: 'let pi = 4.0 *. atan 1.0', + tokens: [ + { startIndex: 0, type: 'keyword.ocaml' }, + { startIndex: 3, type: '' }, + { startIndex: 4, type: 'identifier.ocaml' }, + { startIndex: 6, type: '' }, + { startIndex: 7, type: 'operator.ocaml' }, + { startIndex: 8, type: '' }, + { startIndex: 9, type: 'number.float.ocaml' }, + { startIndex: 12, type: '' }, + { startIndex: 13, type: 'operator.ocaml' }, + { startIndex: 15, type: '' }, + { startIndex: 16, type: 'identifier.ocaml' }, + { startIndex: 20, type: '' }, + { startIndex: 21, type: 'number.float.ocaml' } + ] + } + ], ]); diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index 0c5a05115c..cb5de8c789 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -41,6 +41,9 @@ export const language = { defaultToken: 'invalid', tokenPostfix: '.ocaml', + // Not implemented: + // - Quoted string + keywords: [ 'and', 'as', @@ -121,9 +124,9 @@ export const language = { ], coreOperatorChar: /[$&*+\-\/=>@\^|]/, - operatorChar: /((@coreOperatorChar)|[~!?%<:.])/, - infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operator)+)/, - infixOp: /(\*|\+|-|-\.|=|!=|<|>|\|\||&|&&|:=|(@infixSym))/, + operatorChar: /((@coreOperatorChar)|[~!?%<:\.])/, + infixSym: /(((@coreOperatorChar)|[%<])(@operatorChar)*|#(@operatorChar)+)/, + infixOp: /(!=|<|>|\|\||&&|:=|(@infixSym))/, prefixSym: /(!(@operatorChar)*|[?~](@operatorChar)+)/, operator: /((@prefixSym)|(@infixOp))/, @@ -161,7 +164,6 @@ export const language = { [/[:;,.]/, 'delimiter'], // strings - [/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string [/"""/, 'string', '@string."""'], [/"/, 'string', '@string."'], @@ -203,6 +205,8 @@ export const language = { string: [ [/[^\\"]+/, 'string'], + + [/\\$/, 'string'], // newline sequence [/\\u{\w+}/, 'string.escape'], [/@escapes/, 'string.escape'], [/\\./, 'string.escape.invalid'], From 1cd564356da365e815fe1e7c45efd9d5404a9d57 Mon Sep 17 00:00:00 2001 From: Boning Date: Mon, 2 Feb 2026 03:05:59 -0500 Subject: [PATCH 13/13] Add quoted string --- src/languages/definitions/ocaml/ocaml.test.ts | 8 +++++ src/languages/definitions/ocaml/ocaml.ts | 31 ++++++++++++++----- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/languages/definitions/ocaml/ocaml.test.ts b/src/languages/definitions/ocaml/ocaml.test.ts index 26094f470b..6c7e157b33 100644 --- a/src/languages/definitions/ocaml/ocaml.test.ts +++ b/src/languages/definitions/ocaml/ocaml.test.ts @@ -178,6 +178,14 @@ testTokenization('ocaml', [ ] } ], + [ + { + line: '{|"Hello, World!"|}', + tokens: [ + { startIndex: 0, type: 'string.ocaml' }, + ] + } + ], [ { line: 'let a = "This is a string"', diff --git a/src/languages/definitions/ocaml/ocaml.ts b/src/languages/definitions/ocaml/ocaml.ts index cb5de8c789..55eb193329 100644 --- a/src/languages/definitions/ocaml/ocaml.ts +++ b/src/languages/definitions/ocaml/ocaml.ts @@ -41,9 +41,6 @@ export const language = { defaultToken: 'invalid', tokenPostfix: '.ocaml', - // Not implemented: - // - Quoted string - keywords: [ 'and', 'as', @@ -166,6 +163,7 @@ export const language = { // strings [/"""/, 'string', '@string."""'], [/"/, 'string', '@string."'], + [/{([a-z_]*)\|/, 'string', '@quoted.$1'], // characters [/'[^\\']'B?/, 'string'], @@ -173,8 +171,7 @@ export const language = { [/'/, 'string.invalid'], // brackets - [/[{\[]\|/, '@brackets'], - [/\|[}\]]/, '@brackets'], + [/(\[\||\|\])/, '@brackets'], [/[{}()\[\]]/, '@brackets'], [/@operator/, 'operator'] @@ -204,8 +201,16 @@ export const language = { ], string: [ + [ + /\|([a-z_]*)}/, + { + cases: { + '$S2==quoted-$1': { token: 'string', next: '@pop' }, + '@default': 'string' + } + } + ], [/[^\\"]+/, 'string'], - [/\\$/, 'string'], // newline sequence [/\\u{\w+}/, 'string.escape'], [/@escapes/, 'string.escape'], @@ -219,7 +224,19 @@ export const language = { } } ] - + ], + quoted: [ + [/[^|]+/, 'string'], + [ + /\|([a-z_]*)}/, + { + cases: { + '$S2==$1': { token: 'string', next: '@pop' }, + '@default': 'string' + } + } + ], + [/\|/, 'string'] ] } };