commit ef0b5011ce4226ebbfb1fdb039d7ffc32cd23a9e Author: jacekpoz Date: Sun Oct 20 22:19:05 2024 +0200 init diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ddba8a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.direnv/ +*.pdf +**/target diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..febc559 --- /dev/null +++ b/flake.lock @@ -0,0 +1,81 @@ +{ + "nodes": { + "crane": { + "locked": { + "lastModified": 1729273024, + "narHash": "sha256-Mb5SemVsootkn4Q2IiY0rr9vrXdCCpQ9HnZeD/J3uXs=", + "owner": "ipetkov", + "repo": "crane", + "rev": "fa8b7445ddadc37850ed222718ca86622be01967", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "fenix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ], + "rust-analyzer-src": [] + }, + "locked": { + "lastModified": 1729375822, + "narHash": "sha256-bRo4xVwUhvJ4Gz+OhWMREFMdBOYSw4Yi1Apj01ebbug=", + "owner": "nix-community", + "repo": "fenix", + "rev": "2853e7d9b5c52a148a9fb824bfe4f9f433f557ab", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "fenix", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1729265718, + "narHash": "sha256-4HQI+6LsO3kpWTYuVGIzhJs1cetFcwT7quWCk/6rqeo=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ccc0c2126893dd20963580b6478d1a10a4512185", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "fenix": "fenix", + "nixpkgs": "nixpkgs", + "systems": "systems" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..e16e104 --- /dev/null +++ b/flake.nix @@ -0,0 +1,61 @@ +{ + description = "jftt"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + systems.url = "github:nix-systems/default"; + crane.url = "github:ipetkov/crane"; + fenix = { + url = "github:nix-community/fenix"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.rust-analyzer-src.follows = ""; + }; + }; + + outputs = { self, nixpkgs, systems, crane, fenix, ... }: let + name = "jftt"; + + forEachSystem = nixpkgs.lib.genAttrs (import systems); + pkgsForEach = nixpkgs.legacyPackages; + in { + # packages = forEachSystem ( + # system: let + # pkgs = pkgsForEach.${system}; + # craneLib = (crane.mkLib pkgs).overrideToolchain ( + # fenix.packages.${system}.complete.withComponents [ + # "cargo" + # "rustc" + # "rust-src" + # ] + # ); + # in { + # default = craneLib.buildPackage { + # pname = name; + # version = "0.1.0"; + # src = craneLib.cleanCargoSource ./.; + # }; + # } + # ); + + devShells = forEachSystem ( + system: let + pkgs = pkgsForEach.${system}; + in { + default = pkgs.mkShell { + inherit name; + + packages = with pkgs; [ + rust-analyzer + (fenix.packages.${system}.complete.withComponents [ + "cargo" + "rustc" + "rust-src" + ]) + ]; + + # inputsFrom = [ self.packages.${system}.default ]; + }; + } + ); + }; +} diff --git a/l1/fa/Cargo.lock b/l1/fa/Cargo.lock new file mode 100644 index 0000000..328365c --- /dev/null +++ b/l1/fa/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "fa" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "string-matcher-lib" +version = "0.1.0" diff --git a/l1/fa/Cargo.toml b/l1/fa/Cargo.toml new file mode 100644 index 0000000..20c7763 --- /dev/null +++ b/l1/fa/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "fa" +version = "0.1.0" +edition = "2021" + +[dependencies] +string-matcher-lib = { path = "../string-matcher-lib" } diff --git a/l1/fa/src/lib.rs b/l1/fa/src/lib.rs new file mode 100644 index 0000000..62dcd49 --- /dev/null +++ b/l1/fa/src/lib.rs @@ -0,0 +1,74 @@ +// CLRS 4th edition 2022 +// +// COMPUTE-TRANSITION-FUNCTION(P, Σ, m) +// for q = 0 to m +// for each character a ∈ Σ +// k = min{m, q + 1} +// while P[:k] is not a suffix of P[:q]a +// k = k - 1 +// δ(q, a) = k +// return δ +// +// FINITE-AUTOMATON-MATCHER(T, δ, n, m) +// q = 0 +// for i = 1 to n +// q = δ(q, T[i]) +// if q == m +// print "Pattern occurs with shift" i - m +// +// T - input text +// δ - transition function +// n - input length +// m - pattern length +#![feature(pattern)] + +use std::{collections::HashMap, str::pattern::Pattern}; +use string_matcher_lib::StringMatcherError; + +fn compute_transition_function(pattern: &str, alphabet: &str) -> HashMap<(usize, char), usize> { + let m = pattern.len(); + + let mut f = HashMap::new(); + + for q in 0..=m { + for a in alphabet.chars() { + let mut k = m.min(q + 1); + // I'm sorry I know this is ugly + while !pattern[..k].is_suffix_of(format!("{}{a}", pattern[..q].to_string()).as_str()) && k > 0 { + k -= 1; + } + f.insert((q, a), k); + } + } + + f +} + +pub fn string_matcher(text: &str, pattern: &str) -> Result, StringMatcherError> { + let n = text.len(); + let m = pattern.len(); + + let alphabet = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ĄĆĘŁŃÓŚŻŹąćęłńóśżź"; + let f = compute_transition_function(pattern, alphabet); + + let mut offsets = vec![]; + + let mut q = 0; + for i in 0..n { + use StringMatcherError::*; + let ch = match text.chars().nth(i) { + Some(c) => if alphabet.contains(c) { c } else { + return Err(InvalidCharacter(i, c)); + }, + None => return Err(ReadError(i)), + }; + + q = *f.get(&(q, ch)).unwrap(); + + if q == m { + offsets.push(i + 1 - m); + } + } + + Ok(offsets) +} diff --git a/l1/kmp/Cargo.lock b/l1/kmp/Cargo.lock new file mode 100644 index 0000000..4d8f8cd --- /dev/null +++ b/l1/kmp/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "kmp" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "string-matcher-lib" +version = "0.1.0" diff --git a/l1/kmp/Cargo.toml b/l1/kmp/Cargo.toml new file mode 100644 index 0000000..1d9e378 --- /dev/null +++ b/l1/kmp/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "kmp" +version = "0.1.0" +edition = "2021" + +[dependencies] +string-matcher-lib = { path = "../string-matcher-lib" } diff --git a/l1/kmp/src/lib.rs b/l1/kmp/src/lib.rs new file mode 100644 index 0000000..354b921 --- /dev/null +++ b/l1/kmp/src/lib.rs @@ -0,0 +1,87 @@ +// CLRS 4th edition 2022 +// +// KMP-MATCHER(T, P, n, m) +// π = COMPUTE-PREFIX-FUNCTION(P, m) +// q = 0 // number of characters matched +// for i = 1 to n // scan the text from left to right +// while q > 0 and P[q + 1] ≠ T[i] +// q = π[q] // next character does not match +// if P[q + 1] == T[i] +// q = q + 1 // next character matches +// if q == m // is all of P matched? +// print "Pattern occurs with shift" i - m +// q = π[q] // look for the next match +// +// COMPUTE-PREFIX-FUNCTION(P, m) +// let π[1:m] be a new array +// π[1] = 0 +// k = 0 +// for q = 2 to m +// while k > 0 and P[k + 1] ≠ P[q] +// k = π[k] +// if P[k + 1] == P[q] +// k = k + 1 +// π[q] = k +// return π + +use string_matcher_lib::StringMatcherError; + +fn compute_prefix_function(pattern: &str) -> Vec { + let m = pattern.len(); + + let mut pi = vec![0; m]; + let mut k = 0; + for q in 1..m { + let a = pattern.chars().nth(k).unwrap(); + let b = pattern.chars().nth(q).unwrap(); + + while k > 0 && a != b { + k = pi[k]; + } + + let a = pattern.chars().nth(k).unwrap(); + + if a == b { + k += 1; + } + pi[q] = k; + } + + pi +} + +pub fn string_matcher(text: &str, pattern: &str) -> Result, StringMatcherError> { + let n = text.len(); + let m = pattern.len(); + + let mut offsets = vec![]; + + let pi = compute_prefix_function(pattern); + let mut q = 0; + for i in 0..n { + let a = pattern.chars().nth(q).unwrap(); + use StringMatcherError::*; + let b = match text.chars().nth(i) { + Some(c) => c, + None => { + return Err(ReadError(i)) + }, + }; + + while q > 0 && a != b { + q = pi[q - 1]; + } + + let a = pattern.chars().nth(q).unwrap(); + + if a == b { + q += 1; + } + if q == m { + offsets.push(i + 1 - m); + q = pi[q - 1]; + } + } + + Ok(offsets) +} diff --git a/l1/naive/Cargo.lock b/l1/naive/Cargo.lock new file mode 100644 index 0000000..806de29 --- /dev/null +++ b/l1/naive/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "naive" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "string-matcher-lib" +version = "0.1.0" diff --git a/l1/naive/Cargo.toml b/l1/naive/Cargo.toml new file mode 100644 index 0000000..4bfb95a --- /dev/null +++ b/l1/naive/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "naive" +version = "0.1.0" +edition = "2021" + +[dependencies] +string-matcher-lib = { path = "../string-matcher-lib" } diff --git a/l1/naive/src/lib.rs b/l1/naive/src/lib.rs new file mode 100644 index 0000000..773dc13 --- /dev/null +++ b/l1/naive/src/lib.rs @@ -0,0 +1,28 @@ +// CLRS 4th edition 2022 +// +// NAIVE-STRING_MATCHER(T, P, n, m) +// for s = 0 to n - m +// if P[1:m] == T[s + 1:s + m] +// print "Pattern occurs with shift" s +// +// T - input text +// P - pattern +// n - input length +// m - pattern length + +use string_matcher_lib::StringMatcherError; + +pub fn string_matcher(text: &str, pattern: &str) -> Result, StringMatcherError> { + let n = text.len(); + let m = pattern.len(); + + let mut offsets = vec![]; + + for s in 0..=(n - m) { + if pattern == &text[s..(s + m)] { + offsets.push(s); + } + } + + Ok(offsets) +} diff --git a/l1/string-matcher-lib/Cargo.lock b/l1/string-matcher-lib/Cargo.lock new file mode 100644 index 0000000..2246597 --- /dev/null +++ b/l1/string-matcher-lib/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "string-matcher-lib" +version = "0.1.0" diff --git a/l1/string-matcher-lib/Cargo.toml b/l1/string-matcher-lib/Cargo.toml new file mode 100644 index 0000000..ff59fb8 --- /dev/null +++ b/l1/string-matcher-lib/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "string-matcher-lib" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/l1/string-matcher-lib/src/lib.rs b/l1/string-matcher-lib/src/lib.rs new file mode 100644 index 0000000..0a72123 --- /dev/null +++ b/l1/string-matcher-lib/src/lib.rs @@ -0,0 +1,4 @@ +pub enum StringMatcherError { + InvalidCharacter(usize, char), + ReadError(usize), +} diff --git a/l1/string-matcher/Cargo.lock b/l1/string-matcher/Cargo.lock new file mode 100644 index 0000000..b842b14 --- /dev/null +++ b/l1/string-matcher/Cargo.lock @@ -0,0 +1,38 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "fa" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "kmp" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "naive" +version = "0.1.0" +dependencies = [ + "string-matcher-lib", +] + +[[package]] +name = "string-matcher" +version = "0.1.0" +dependencies = [ + "fa", + "kmp", + "naive", + "string-matcher-lib", +] + +[[package]] +name = "string-matcher-lib" +version = "0.1.0" diff --git a/l1/string-matcher/Cargo.toml b/l1/string-matcher/Cargo.toml new file mode 100644 index 0000000..3e0e6c3 --- /dev/null +++ b/l1/string-matcher/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "string-matcher" +version = "0.1.0" +edition = "2021" + +[profile.release] +panic = "abort" + +[dependencies] +string-matcher-lib = { path = "../string-matcher-lib" } +naive = { path = "../naive" } +fa = { path = "../fa" } +kmp = { path = "../kmp" } diff --git a/l1/string-matcher/src/main.rs b/l1/string-matcher/src/main.rs new file mode 100644 index 0000000..839ccf4 --- /dev/null +++ b/l1/string-matcher/src/main.rs @@ -0,0 +1,83 @@ +use std::{env, process::exit}; +use string_matcher_lib::StringMatcherError::*; + +fn main() { + let usage = format!( + "usage: {program} ", + program = env::args().nth(0).unwrap() + ); + + let algorithm = match env::args().nth(1) { + Some(s) => s, + None => { + eprintln!("{}", usage); + exit(1) + }, + }; + + let algorithm_char = match algorithm.to_lowercase().chars().nth(0) { + Some(s) => s, + None => { + eprintln!("{}", usage); + exit(1) + }, + }; + + let matcher = match algorithm_char { + 'n' => naive::string_matcher, + 'f' => fa::string_matcher, + 'k' => kmp::string_matcher, + _ => { + eprintln!("{}", &usage); + exit(1) + } + }; + + let pattern = match env::args().nth(2) { + Some(s) => s, + None => { + eprintln!("{}", usage); + exit(1) + }, + }; + + let file = match env::args().nth(3) { + Some(s) => s, + None => { + eprintln!("{}", usage); + exit(1) + }, + }; + + let text = match std::fs::read_to_string(&file) { + Ok(s) => s, + Err(e) => { + eprintln!("couldn't read file {}: {e}", &file); + exit(1) + }, + }; + + match matcher(&text, &pattern) { + Ok(offsets) => { + if offsets.is_empty() { + println!("{pattern} not found in {file}"); + } else { + print!("{pattern} found in {file} at the following offsets:"); + + for offset in offsets { + print!(" {offset}"); + } + + print!("\n"); + } + }, + Err(error) => match error { + ReadError(offset) => { + eprintln!("failed reading input at offset {offset}"); + }, + InvalidCharacter(offset, ch) => { + eprintln!("invalid character at offset {offset}: {ch}") + }, + }, + } +}