diff --git a/src/Makefile b/src/Makefile index e47ba53..d816dd4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -54,7 +54,7 @@ STRIP_NATIVE = strip CFLAGS_NATIVE = $(CFLAGS) LDFLAGS_NATIVE = $(LDFLAGS) -native: cap2hccapx.bin cleanup-rules.bin combinator.bin combinator3.bin combinatorX.bin combipow.bin ct3_to_ntlm.bin cutb.bin expander.bin gate.bin generate-rules.bin hcstatgen.bin hcstat2gen.bin keyspace.bin len.bin mli2.bin morph.bin permute.bin permute_exist.bin prepare.bin req-include.bin req-exclude.bin rli.bin rli2.bin rules_optimize.bin splitlen.bin strip-bsr.bin strip-bsn.bin +native: cap2hccapx.bin cleanup-rules.bin combinator.bin combinator3.bin combinatorX.bin combipow.bin ct3_to_ntlm.bin cutb.bin expander.bin gate.bin generate-rules.bin hcstatgen.bin hcstat2gen.bin keyspace.bin len.bin mli2.bin morph.bin ngramX.bin permute.bin permute_exist.bin prepare.bin req-include.bin req-exclude.bin rli.bin rli2.bin rules_optimize.bin splitlen.bin strip-bsr.bin strip-bsn.bin cap2hccapx.bin: cap2hccapx.c ${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $< @@ -107,6 +107,9 @@ mli2.bin: mli2.c morph.bin: morph.c ${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $< +ngramX.bin: ngramX.c + ${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $< + permute.bin: permute.c ${CC_NATIVE} ${CFLAGS_NATIVE} ${LDFLAGS_NATIVE} -o $@ $< @@ -149,7 +152,7 @@ STRIP_WINDOWS = x86_64-w64-mingw32-strip CFLAGS_WINDOWS = $(CFLAGS) -D_WINDOWS GLOB_WINDOWS = /usr/x86_64-w64-mingw32/lib/CRT_glob.o -windows: cap2hccapx.exe cleanup-rules.exe combinator.exe combinator3.exe combinatorX.exe combipow.exe ct3_to_ntlm.exe cutb.exe expander.exe gate.exe generate-rules.exe hcstatgen.exe hcstat2gen.exe keyspace.exe len.exe mli2.exe morph.exe permute.exe permute_exist.exe prepare.exe req-include.exe req-exclude.exe rli.exe rli2.exe rules_optimize.exe splitlen.exe strip-bsr.exe strip-bsn.exe +windows: cap2hccapx.exe cleanup-rules.exe combinator.exe combinator3.exe combinatorX.exe combipow.exe ct3_to_ntlm.exe cutb.exe expander.exe gate.exe generate-rules.exe hcstatgen.exe hcstat2gen.exe keyspace.exe len.exe mli2.exe morph.exe ngramX.exe permute.exe permute_exist.exe prepare.exe req-include.exe req-exclude.exe rli.exe rli2.exe rules_optimize.exe splitlen.exe strip-bsr.exe strip-bsn.exe cap2hccapx.exe: cap2hccapx.c ${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $< @@ -202,6 +205,9 @@ mli2.exe: mli2.c morph.exe: morph.c ${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $< +ngramX.exe: ngramX.c + ${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $< + permute.exe: permute.c ${CC_WINDOWS} ${CFLAGS_WINDOWS} -o $@ $< diff --git a/src/ngramX.c b/src/ngramX.c new file mode 100644 index 0000000..eddc638 --- /dev/null +++ b/src/ngramX.c @@ -0,0 +1,103 @@ +/** + * Name........: ngramX + * Author......: Gabriele 'matrix' Gristina + * Version.....: 1.0 + * Date........: Sun Sep 7 18:48:41 CEST 2025 + * License.....: MIT + */ + +#include +#include +#include +#include + +#define LINE_BUFFER 4096 + +// Print all possible n-grams of size groupSize +void printGroups (char **words, size_t wordCount, int groupSize) +{ + for (size_t i = 0; i + groupSize <= wordCount; i++) + { + for (int j = 0; j < groupSize; j++) + { + fprintf (stdout, "%s", words[i + j]); + if (j < groupSize - 1) fprintf (stdout, " "); + } + fprintf (stdout, "\n"); + } +} + +// Add a word to dynamic array +void addWord (char ***words, size_t *count, size_t *capacity, const char *word) +{ + if (*count >= *capacity) + { + *capacity = (*capacity == 0) ? 1024 : (*capacity * 2); + *words = realloc (*words, (*capacity) * sizeof (char *)); + if (!*words) + { + fprintf (stderr, "! Memory allocation failed\n"); + + exit (1); + } + } + + (*words)[(*count)++] = strdup (word); // duplicate token so it persists +} + +int main (int argc, char *argv[]) +{ + if (argc != 3) + { + fprintf (stdout, "> Usage: %s \n", argv[0]); + + return 1; + } + + char *filename = argv[1]; + + int groupSize = atoi (argv[2]); + if (groupSize <= 0) + { + fprintf (stderr, "! groupSize must be > 0\n"); + + return 1; + } + + FILE *file = fopen (filename, "r"); + if (!file) + { + fprintf (stderr, "! fopen() failed: %s\n", strerror (errno)); + + return 1; + } + + char line[LINE_BUFFER]; + char **words = NULL; + size_t wordCount = 0, wordCapacity = 0; + + while (fgets (line, sizeof (line), file)) + { + line[strcspn (line, "\r\n")] = '\0'; // strip newlines only + + // Tokenize line on spaces/tabs + char *token = strtok (line, " \t"); + while (token) + { + addWord (&words, &wordCount, &wordCapacity, token); + + token = strtok (NULL, " \t"); + } + } + + fclose (file); + + // Print n-grams + printGroups (words, wordCount, groupSize); + + // Free allocated memory + for (size_t i = 0; i < wordCount; i++) free (words[i]); + free (words); + + return 0; +}