Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ STRIP_NATIVE = strip
CFLAGS_NATIVE = $(CFLAGS)
LDFLAGS_NATIVE = $(LDFLAGS)

native: cap2hccapx.bin cleanup-rules.bin combinator.bin combinator3.bin combinatorX.bin combipow.bin ct3_to_ntlm.bin cutb.bin expander.bin gate.bin generate-rules.bin hcstatgen.bin hcstat2gen.bin keyspace.bin len.bin mli2.bin morph.bin permute.bin permute_exist.bin prepare.bin req-include.bin req-exclude.bin rli.bin rli2.bin rules_optimize.bin splitlen.bin strip-bsr.bin strip-bsn.bin
native: cap2hccapx.bin cleanup-rules.bin combinator.bin combinator3.bin combinatorX.bin combipow.bin ct3_to_ntlm.bin cutb.bin expander.bin gate.bin generate-rules.bin hcstatgen.bin hcstat2gen.bin keyspace.bin len.bin mli2.bin morph.bin ngramX.bin permute.bin permute_exist.bin prepare.bin req-include.bin req-exclude.bin rli.bin rli2.bin rules_optimize.bin splitlen.bin strip-bsr.bin strip-bsn.bin

cap2hccapx.bin: cap2hccapx.c
${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $<
Expand Down Expand Up @@ -107,6 +107,9 @@ mli2.bin: mli2.c
morph.bin: morph.c
${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $<

ngramX.bin: ngramX.c
${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $<

permute.bin: permute.c
${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $<

Expand Down Expand Up @@ -149,7 +152,7 @@ STRIP_WINDOWS = x86_64-w64-mingw32-strip
CFLAGS_WINDOWS = $(CFLAGS) -D_WINDOWS
GLOB_WINDOWS = /usr/x86_64-w64-mingw32/lib/CRT_glob.o

windows: cap2hccapx.exe cleanup-rules.exe combinator.exe combinator3.exe combinatorX.exe combipow.exe ct3_to_ntlm.exe cutb.exe expander.exe gate.exe generate-rules.exe hcstatgen.exe hcstat2gen.exe keyspace.exe len.exe mli2.exe morph.exe permute.exe permute_exist.exe prepare.exe req-include.exe req-exclude.exe rli.exe rli2.exe rules_optimize.exe splitlen.exe strip-bsr.exe strip-bsn.exe
windows: cap2hccapx.exe cleanup-rules.exe combinator.exe combinator3.exe combinatorX.exe combipow.exe ct3_to_ntlm.exe cutb.exe expander.exe gate.exe generate-rules.exe hcstatgen.exe hcstat2gen.exe keyspace.exe len.exe mli2.exe morph.exe ngramX.exe permute.exe permute_exist.exe prepare.exe req-include.exe req-exclude.exe rli.exe rli2.exe rules_optimize.exe splitlen.exe strip-bsr.exe strip-bsn.exe

cap2hccapx.exe: cap2hccapx.c
${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $<
Expand Down Expand Up @@ -202,6 +205,9 @@ mli2.exe: mli2.c
morph.exe: morph.c
${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $<

ngramX.exe: ngramX.c
${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $<

permute.exe: permute.c
${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $<

Expand Down
103 changes: 103 additions & 0 deletions src/ngramX.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* Name........: ngramX
* Author......: Gabriele 'matrix' Gristina <gabriele.gristina@gmail.com>
* Version.....: 1.0
* Date........: Sun Sep 7 18:48:41 CEST 2025
* License.....: MIT
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define LINE_BUFFER 4096

// Print all possible n-grams of size groupSize
void printGroups (char **words, size_t wordCount, int groupSize)
{
for (size_t i = 0; i + groupSize <= wordCount; i++)
{
for (int j = 0; j < groupSize; j++)
{
fprintf (stdout, "%s", words[i + j]);
if (j < groupSize - 1) fprintf (stdout, " ");
}
fprintf (stdout, "\n");
}
}

// Add a word to dynamic array
void addWord (char ***words, size_t *count, size_t *capacity, const char *word)
{
if (*count >= *capacity)
{
*capacity = (*capacity == 0) ? 1024 : (*capacity * 2);
*words = realloc (*words, (*capacity) * sizeof (char *));
if (!*words)
{
fprintf (stderr, "! Memory allocation failed\n");

exit (1);
}
}

(*words)[(*count)++] = strdup (word); // duplicate token so it persists
}

int main (int argc, char *argv[])
{
if (argc != 3)
{
fprintf (stdout, "> Usage: %s <filename> <groupSize>\n", argv[0]);

return 1;
}

char *filename = argv[1];

int groupSize = atoi (argv[2]);
if (groupSize <= 0)
{
fprintf (stderr, "! groupSize must be > 0\n");

return 1;
}

FILE *file = fopen (filename, "r");
if (!file)
{
fprintf (stderr, "! fopen() failed: %s\n", strerror (errno));

return 1;
}

char line[LINE_BUFFER];
char **words = NULL;
size_t wordCount = 0, wordCapacity = 0;

while (fgets (line, sizeof (line), file))
{
line[strcspn (line, "\r\n")] = '\0'; // strip newlines only

// Tokenize line on spaces/tabs
char *token = strtok (line, " \t");
while (token)
{
addWord (&words, &wordCount, &wordCapacity, token);

token = strtok (NULL, " \t");
}
}

fclose (file);

// Print n-grams
printGroups (words, wordCount, groupSize);

// Free allocated memory
for (size_t i = 0; i < wordCount; i++) free (words[i]);
free (words);

return 0;
}
Loading