This commit is contained in:
2025-04-29 22:04:32 +02:00
parent 90478ad477
commit a9dd9bc87c
13 changed files with 446 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
[package]
name = "search_and_replace"
version = "0.1.0"
edition = "2021"
[profile.release]
strip = true
[dependencies]

View File

@@ -0,0 +1,371 @@
use std::{env::args, io::Read};
#[derive(Debug, PartialEq)]
struct JumpTable {
jumps: Vec<usize>,
}
impl JumpTable {
fn resolve_char(
pat: &Vec<char>,
ptr: &usize,
cur_char: &char,
) -> usize {
if pat.get(*ptr).unwrap() == cur_char {
return *ptr + 1;
}
if pat.get(0).unwrap() == cur_char {
return 1;
}
return 0;
}
pub fn new(pat: &Vec<char>) -> Self {
if pat.len() == 0 {
return Self { jumps: vec![] };
}
let mut ptr = 0_usize;
let mut jumps = vec![0];
for cur_char in pat.iter().skip(1) {
ptr = JumpTable::resolve_char(pat, &ptr, cur_char);
jumps.push(ptr);
}
Self { jumps }
}
pub fn search(&self, pat: &Vec<char>, needle: &char, ptr: &usize) -> usize {
let mut ptr = *ptr;
while ptr != 0 {
if pat.get(ptr).unwrap() == needle {
break;
}
ptr = *self.jumps.get(ptr.saturating_sub(1)).unwrap();
}
ptr
}
}
#[derive(Debug, PartialEq)]
struct Pattern {
pat: Vec<char>,
jt: JumpTable,
ptr: usize,
}
impl Pattern {
pub fn new(pat: String) -> Result<Self, ()> {
if pat.len() == 0 {
return Err(());
}
let pat = pat.chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
Ok(Self {
pat,
jt,
ptr: 0,
})
}
pub fn search_and_update(&mut self, cur_char: &char) -> bool {
if self.pat.get(self.ptr).unwrap() == cur_char {
self.ptr += 1;
return if self.ptr == self.pat.len() { true } else { false };
}
let found = self.jt.search(&self.pat, &cur_char, &self.ptr);
self.ptr = found;
false
}
pub fn reset_ptr(&mut self) {
self.ptr = 0;
}
pub fn len(&self) -> usize {
self.pat.len()
}
}
#[derive(Debug, PartialEq, Eq)]
struct Match {
pub pat_id: usize,
pub start_pos: usize,
pub len: usize,
}
impl Match {
pub fn new(pat_id: usize, start_pos: usize, len: usize) -> Self {
Self { pat_id, start_pos, len }
}
}
#[derive(Debug)]
struct MPSearch {
pats: Vec<Pattern>,
}
impl MPSearch {
pub fn new(pats: Vec<Pattern>) -> Self {
Self { pats }
}
fn reset_all_ptrs(&mut self) {
for pat in self.pats.iter_mut() {
pat.reset_ptr();
}
}
fn search_at_pos(&mut self, cur_char: &char, pos: &usize) -> Option<Match> {
let mut cur_match = None;
for (idx, pat) in self.pats.iter_mut().enumerate() {
if pat.search_and_update(cur_char) {
cur_match = Some(Match::new(idx, pos.saturating_sub(pat.len() - 1), pat.len()));
break;
}
}
if let Some(_) = cur_match {
self.reset_all_ptrs();
}
cur_match
}
pub fn search(&mut self, haystack: &String) -> Vec<Match> {
let mut res = Vec::new();
for (cur_idx, cur_char) in haystack.chars().enumerate() {
if let Some(cur_match) = self.search_at_pos(&cur_char, &cur_idx) {
res.push(cur_match);
}
}
res
}
}
macro_rules! skip_n {
($iter: ident, $n: expr) => {
for _ in 0..$n {
$iter.next().ok_or(())?;
}
}
}
macro_rules! append_n {
($iter: ident, $target: ident, $n: expr) => {
for _ in 0..$n {
$target.push($iter.next().ok_or(())?);
}
}
}
#[derive(Debug)]
struct MPReplace {
replacements: Vec<String>,
matches: Vec<Match>,
}
impl MPReplace {
pub fn new(matches: Vec<Match>, replacements: Vec<String>) -> Self {
Self { replacements, matches }
}
pub fn replace(&self, target: &String) -> Result<String, ()> {
let mut ptr = 0;
let mut iter = target.chars();
let mut res = String::new();
for cur_match in self.matches.iter() {
append_n!(iter, res, cur_match.start_pos - ptr);
skip_n!(iter, cur_match.len);
res.push_str(self.replacements.get(cur_match.pat_id).ok_or(())?);
ptr = cur_match.start_pos + cur_match.len;
}
append_n!(iter, res, target.chars().count() - ptr);
Ok(res)
}
}
struct PatMatchArgs<I: Iterator<Item = String>> {
inner: I,
}
impl<I: Iterator<Item = String>> PatMatchArgs<I> {
pub fn new(inner: I, count: usize) -> Result<Self, ()> {
if count % 2 != 0 {
return Err(());
}
Ok(Self { inner })
}
}
impl<I: Iterator<Item = String>> Iterator for PatMatchArgs<I> {
type Item = (String, String);
fn next(&mut self) -> Option<Self::Item> {
Some((self.inner.next()?, self.inner.next()?))
}
}
fn main() {
let mut contents = Vec::new();
std::io::stdin().lock().read_to_end(&mut contents).unwrap();
let contents = std::str::from_utf8(&contents).unwrap().to_string();
let args = match PatMatchArgs::new(args().skip(1), args().len() - 1) {
Ok(val) => val,
Err(()) => {
eprintln!("the number of arguments after filepath must be divisible by two");
std::process::exit(1);
}
};
let mut pats = Vec::new();
let mut replacements = Vec::new();
for (cur_pat, cur_replacement) in args {
let Ok(cur_pat) = Pattern::new(cur_pat) else {
eprintln!("the patterns can't be empty");
std::process::exit(1);
};
pats.push(cur_pat);
replacements.push(cur_replacement);
}
let matches = MPSearch::new(pats).search(&contents);
let replaced = MPReplace::new(matches, replacements)
.replace(&contents).unwrap();
println!("{}", replaced);
}
#[cfg(test)]
mod test {
use crate::{JumpTable, MPReplace, MPSearch, Match, Pattern};
#[test]
fn jumps_01() {
let src = String::from("thisthen").chars().collect::<Vec<_>>();
let target = vec![0_usize, 0, 0, 0, 1, 2, 0, 0];
let jt = JumpTable::new(&src);
assert_eq!(jt.jumps, target);
}
#[test]
fn jumps_02() {
let src = String::from("tthis").chars().collect::<Vec<_>>();
let target = vec![0, 1, 0, 0, 0];
let jt = JumpTable::new(&src);
assert_eq!(jt.jumps, target);
}
#[test]
fn jumps_03() {
let src = String::from("t").chars().collect::<Vec<_>>();
let target = vec![0];
let jt = JumpTable::new(&src);
assert_eq!(jt.jumps, target);
}
#[test]
fn jumps_04() {
let src = String::from("tt").chars().collect::<Vec<_>>();
let target = vec![0, 1];
let jt = JumpTable::new(&src);
assert_eq!(jt.jumps, target);
}
#[test]
fn jumps_05() {
let src = String::from("").chars().collect::<Vec<_>>();
let target = vec![];
let jt = JumpTable::new(&src);
assert_eq!(jt.jumps, target);
}
#[test]
fn search_01() {
let pat = String::from("tthis").chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
assert_eq!(jt.search(&pat, &'t', &1), 1);
}
#[test]
fn search_02() {
let pat = String::from("testtesa").chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
assert_eq!(jt.search(&pat, &'t', &7), 3);
}
#[test]
fn search_03() {
let pat = String::from("ahojahoj").chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
assert_eq!(jt.search(&pat, &'j', &7), 7);
}
#[test]
fn search_04() {
let pat = String::from("ahojahoj").chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
assert_eq!(jt.search(&pat, &'j', &7), 7);
}
#[test]
fn search_05() {
let pat = String::from("jojojojojojoja").chars().collect::<Vec<_>>();
let jt = JumpTable::new(&pat);
assert_eq!(jt.search(&pat, &'o', &13), 11);
}
#[test]
fn search_and_update_01() {
let pat = String::from("test");
let mut pat = Pattern::new(pat).unwrap();
let haystack = String::from("thisisatest");
for cur_haystack in haystack.chars().take(haystack.len() - 1) {
assert_eq!(pat.search_and_update(&cur_haystack), false);
println!("{:?}", pat);
}
println!("{:?}", pat);
assert_eq!(pat.search_and_update(&'t'), true);
}
#[test]
fn empty_pattern() {
assert_eq!(Pattern::new("".chars().collect()), Err(()));
}
#[test]
fn mpsearch_01() {
let mut mpsearch = MPSearch::new(
vec!["this", "is", "a", "test"]
.iter()
.map(|p| Pattern::new(p.to_string()).unwrap())
.collect()
);
println!("{:?}", mpsearch);
let haystack = String::from("this is a test");
let target = vec![
Match::new(0, 0, 4),
Match::new(1, 5, 2),
Match::new(2, 8, 1),
Match::new(3, 10, 4)
];
assert_eq!(mpsearch.search(&haystack), target);
}
#[test]
fn mpreplace_01() {
let mut mpsearch = MPSearch::new(
vec!["this", "is", "a", "test"]
.iter()
.map(|p| Pattern::new(p.to_string()).unwrap())
.collect()
);
let haystack = String::from("this-is.a*test///");
let matches = mpsearch.search(&haystack);
let mpreplace = MPReplace::new(matches, vec![
"that".to_string(),
"isn't".to_string(),
"the".to_string(),
"final".to_string(),
]);
let replaced = mpreplace.replace(&haystack).unwrap();
assert_eq!(replaced, "that-isn't.the*final///");
}
}