Create build system (#1)
This commit is contained in:
8
lib/inferium/.gitignore
vendored
8
lib/inferium/.gitignore
vendored
@@ -1,8 +0,0 @@
|
||||
/target
|
||||
|
||||
|
||||
# Added by cargo
|
||||
#
|
||||
# already existing elements were commented out
|
||||
|
||||
#/target
|
@@ -1,60 +0,0 @@
|
||||
#![feature(test)]
|
||||
extern crate test;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use test::Bencher;
|
||||
|
||||
extern crate inferium;
|
||||
use inferium::h1::{SyncClient, Response, ResponseHead, ProtocolVariant};
|
||||
use inferium::{Status, HeaderValue};
|
||||
use inferium::TestSyncStream;
|
||||
|
||||
fn parse_response_sync_inner() {
|
||||
let src = "HTTP/1.1 200 OK\r\nserver: inferium\r\n\r\n".as_bytes().to_vec();
|
||||
let stream = TestSyncStream::<4>::new(&src);
|
||||
let mut client = SyncClient::<TestSyncStream<4>>::new(stream);
|
||||
let target = Response::HeadersOnly(ResponseHead::new(
|
||||
Status::Ok,
|
||||
ProtocolVariant::HTTP1_1,
|
||||
HashMap::from([
|
||||
("server".into(), HeaderValue::new(vec!["inferium".to_string()]))
|
||||
])
|
||||
));
|
||||
assert_eq!(client.receive_response().unwrap(), target);
|
||||
}
|
||||
|
||||
fn parse_response_sync_inner_body() {
|
||||
let mut src = "HTTP/1.1 200 OK\r\ncontent-length: 50\r\n\r\n".as_bytes().to_vec();
|
||||
src.extend_from_slice(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||||
let stream = TestSyncStream::<4>::new(&src);
|
||||
let mut client = SyncClient::<TestSyncStream<4>>::new(stream);
|
||||
let target_head = ResponseHead::new(
|
||||
Status::Ok,
|
||||
ProtocolVariant::HTTP1_1,
|
||||
HashMap::from([
|
||||
("content-length".into(), HeaderValue::new(vec!["50".to_string()]))
|
||||
])
|
||||
);
|
||||
let Response::WithSizedBody((h, mut b)) = client.receive_response().unwrap() else {
|
||||
panic!();
|
||||
};
|
||||
let b = b.recv_all().unwrap();
|
||||
assert_eq!(h, target_head);
|
||||
assert_eq!(b, b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_response_sync(b: &mut Bencher) {
|
||||
b.bytes = 37;
|
||||
b.iter(|| {
|
||||
parse_response_sync_inner();
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn parse_response_sync_with_body(b: &mut Bencher) {
|
||||
b.bytes = 39 + 13;
|
||||
b.iter(|| {
|
||||
parse_response_sync_inner_body();
|
||||
});
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
#![feature(test)]
|
||||
extern crate test;
|
||||
use test::Bencher;
|
||||
|
||||
extern crate inferium;
|
||||
use inferium::Status;
|
||||
|
||||
#[bench]
|
||||
fn baseline(b: &mut Bencher) {
|
||||
fn status_test_from_slice(raw: &[u8]) -> Result<Status, ()> {
|
||||
match raw {
|
||||
b"200" => Ok(Status::Ok),
|
||||
b"404" => Ok(Status::NotFound),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
assert_eq!(status_test_from_slice(b"200".as_slice()), Ok(Status::Ok));
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn valid_ok(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
assert_eq!(Status::try_from(b"200".as_slice()), Ok(Status::Ok));
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn valid_internal_server_error(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
assert_eq!(Status::try_from(b"500".as_slice()), Ok(Status::InternalServerError));
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn invalid(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
assert!(Status::try_from(b"690".as_slice()).is_err());
|
||||
})
|
||||
}
|
@@ -1,94 +0,0 @@
|
||||
// This is an async port of `examples/simple_server.rs`. Please see that example first.
|
||||
//
|
||||
// Features `async` and `tokio-net` must be enabled for this example to compile.
|
||||
// It is also possible to enable feature `full` (which will enable all the features).
|
||||
|
||||
use std::{collections::HashMap, net::SocketAddr};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use inferium::{
|
||||
h1::{ProtocolVariant, Request, ResponseHead, ServerSendError, AsyncServer},
|
||||
HeaderKey, Method, Status, TokioInet
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let listener = TcpListener::bind("localhost:8080").await.unwrap();
|
||||
loop {
|
||||
let (conn, addr) = listener.accept().await.unwrap();
|
||||
// Here we are creating a new asynchronous task for every client.
|
||||
// This will fork off in an asynchronous manner and won't block our accept loop.
|
||||
tokio::task::spawn(async move {
|
||||
// We created this new async block, so we need to `.await` on this function to propagate
|
||||
// the future from the function to the top of the spawned task (this async block).
|
||||
handle_client(conn, addr).await;
|
||||
});
|
||||
// You can now handle multiple clients at once... congratulations.
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_client(conn: TcpStream, addr: SocketAddr) {
|
||||
println!("connection from {addr:?}");
|
||||
let mut server_handler = AsyncServer::<TokioInet>::new(TokioInet::new(conn));
|
||||
// When receiving or sending - we call the same functions with `.await` appended (in an async
|
||||
// context). This will automatically poll the returned future from the top of the context.
|
||||
// The polling is handled by tokio here - so we don't need to worry about it.
|
||||
while let Ok(request) = server_handler.receive_request().await {
|
||||
match handle_request(request, addr) {
|
||||
Ok((h, b)) => if let Err(_) = send_response(h, b, &mut server_handler).await { break; },
|
||||
Err(()) => break,
|
||||
}
|
||||
};
|
||||
println!("ended connection for {addr:?}");
|
||||
}
|
||||
|
||||
fn handle_request<T>(
|
||||
req: Request<T>, addr: SocketAddr
|
||||
) -> Result<(ResponseHead, &'static [u8]), ()> {
|
||||
let Request::HeadersOnly(headers) = req else {
|
||||
return Err(());
|
||||
};
|
||||
|
||||
println!("req from {addr:?}: {headers}");
|
||||
|
||||
const OK_RESPONSE: &[u8] = b"<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Hello, world!</h1>
|
||||
<p>Hello from inferium.</p>
|
||||
</body>
|
||||
</html>";
|
||||
const NOT_FOUND_RESPONSE: &[u8] = b"<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Not found</h1>
|
||||
<p>This page was not found</p>
|
||||
</body>
|
||||
</html>";
|
||||
|
||||
Ok(match (headers.method(), headers.uri().path()) {
|
||||
(&Method::GET, "/") => (ResponseHead::new(
|
||||
Status::Ok,
|
||||
ProtocolVariant::HTTP1_0,
|
||||
HashMap::from([
|
||||
(HeaderKey::SERVER, "inferium".parse().unwrap()),
|
||||
(HeaderKey::CONTENT_LENGTH, OK_RESPONSE.len().into()),
|
||||
])
|
||||
), OK_RESPONSE),
|
||||
|
||||
_ => (ResponseHead::new(
|
||||
Status::NotFound,
|
||||
ProtocolVariant::HTTP1_0,
|
||||
HashMap::from([
|
||||
(HeaderKey::SERVER, "inferium".parse().unwrap()),
|
||||
(HeaderKey::CONTENT_LENGTH, NOT_FOUND_RESPONSE.len().into()),
|
||||
])
|
||||
), NOT_FOUND_RESPONSE),
|
||||
})
|
||||
}
|
||||
|
||||
async fn send_response(
|
||||
response: ResponseHead, body: &[u8], conn: &mut AsyncServer<TokioInet>
|
||||
)-> Result<(), ServerSendError> {
|
||||
conn.send_response(&response).await?;
|
||||
conn.send_body_bytes(body).await.map_err(|e| e.try_into().unwrap())
|
||||
}
|
@@ -1,88 +0,0 @@
|
||||
// This is a port of a client from `examples/start_here.rs`. Please see that example first.
|
||||
// Also... maybe brush up on some async tasks since we are going to need them here (there is an
|
||||
// example on async with inferium in `examples/going_async.rs`).
|
||||
//
|
||||
// Features `async`, `tokio-net` and `webpki-roots` dependency must be enabled for this example to
|
||||
// compile. We recommend enabling the `dev` feature when running this example.
|
||||
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio_rustls::{
|
||||
rustls::{
|
||||
pki_types::ServerName,
|
||||
ClientConfig,
|
||||
RootCertStore
|
||||
},
|
||||
TlsConnector,
|
||||
TlsStream
|
||||
};
|
||||
use inferium::{
|
||||
h1::{
|
||||
ProtocolVariant,
|
||||
RequestHead,
|
||||
Response,
|
||||
AsyncClient
|
||||
},
|
||||
HeaderKey,
|
||||
Method,
|
||||
TokioRustls
|
||||
};
|
||||
|
||||
async fn run_tls_handshake(raw_stream: TcpStream) -> TlsStream<TcpStream> {
|
||||
let mut root_certs = RootCertStore::empty();
|
||||
root_certs.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
|
||||
let config = ClientConfig::builder()
|
||||
.with_root_certificates(root_certs)
|
||||
.with_no_client_auth();
|
||||
let connector = TlsConnector::from(Arc::new(config));
|
||||
let verify_server_name = ServerName::try_from("zumepro.cz").unwrap();
|
||||
TlsStream::Client(connector.connect(verify_server_name, raw_stream).await.unwrap())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let stream = TcpStream::connect("zumepro.cz:443").await.unwrap();
|
||||
let stream = run_tls_handshake(stream).await;
|
||||
let conn = TokioRustls::new(stream);
|
||||
let mut client = AsyncClient::<TokioRustls>::new(conn);
|
||||
|
||||
let to_send = RequestHead::new(
|
||||
Method::GET, "/".parse().unwrap(), ProtocolVariant::HTTP1_1,
|
||||
HashMap::from([
|
||||
(HeaderKey::USER_AGENT, "Mozilla/5.0 (inferium)".parse().unwrap()),
|
||||
(HeaderKey::HOST, "zumepro.cz".parse().unwrap()),
|
||||
(HeaderKey::CONNECTION, "close".parse().unwrap())
|
||||
])
|
||||
);
|
||||
println!("----------> Sending\n\n{to_send}\n");
|
||||
client.send_request(&to_send).await.unwrap();
|
||||
let response = client.receive_response().await.unwrap();
|
||||
|
||||
let (header, body) = match response {
|
||||
Response::HeadersOnly(h) => (h, None),
|
||||
Response::WithSizedBody((_, _)) => panic!(),
|
||||
Response::WithChunkedBody((h, b)) => (h, Some(b)),
|
||||
};
|
||||
|
||||
println!("----------< Received\n\n{header}\n");
|
||||
|
||||
if let Some(mut body) = body {
|
||||
|
||||
// Since our zumepro server sends bodies chunked - we will need to handle it.
|
||||
// This simple loop just collects all the chunks into the res vector.
|
||||
let mut res = Vec::new();
|
||||
while let Some(mut chunk) = body.get_chunk_async().await.unwrap() {
|
||||
// Now here is a difference between sync and async.
|
||||
//
|
||||
// For easy body manipulation and no redundant trait pollution, a body within an
|
||||
// asynchronous stream can be sent/received using the same methods as a synchronous one,
|
||||
// but with the suffix `_async`.
|
||||
res.append(&mut chunk.recv_all_async().await.unwrap());
|
||||
}
|
||||
|
||||
println!(
|
||||
"----------< Body\n\n{:?}\n",
|
||||
std::str::from_utf8(&res).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
@@ -1,84 +0,0 @@
|
||||
use std::{collections::HashMap, net::{SocketAddr, TcpListener}};
|
||||
use inferium::{
|
||||
h1::{ProtocolVariant, Request, ResponseHead, ServerSendError, SyncServer},
|
||||
HeaderKey, Method, Status, StdInet
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let listener = TcpListener::bind("localhost:8080").unwrap();
|
||||
loop {
|
||||
let (conn, addr) = listener.accept().unwrap();
|
||||
println!("connection from {addr:?}");
|
||||
let mut server_handler = SyncServer::<StdInet>::new(StdInet::new(conn));
|
||||
// We'll serve the client as long as it sends valid requests.
|
||||
// Note that this will effectively block other clients.
|
||||
while let Ok(request) = server_handler.receive_request() {
|
||||
// This matching is here to provide a way of controlling the while loop.
|
||||
match handle_request(request, addr) {
|
||||
Ok((h, b)) => if let Err(_) = send_response(h, b, &mut server_handler) { break; },
|
||||
Err(()) => break,
|
||||
}
|
||||
};
|
||||
println!("ended connection for {addr:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_request<T>(
|
||||
req: Request<T>, addr: SocketAddr
|
||||
) -> Result<(ResponseHead, &'static [u8]), ()> {
|
||||
let Request::HeadersOnly(headers) = req else {
|
||||
// We will not handle POST requests with bodies - so let's tell the client to f*ck off.
|
||||
return Err(());
|
||||
};
|
||||
|
||||
println!("req from {addr:?}: {headers}");
|
||||
|
||||
const OK_RESPONSE: &[u8] = b"<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Hello, world!</h1>
|
||||
<p>Hello from inferium.</p>
|
||||
</body>
|
||||
</html>";
|
||||
const NOT_FOUND_RESPONSE: &[u8] = b"<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Not found</h1>
|
||||
<p>This page was not found</p>
|
||||
</body>
|
||||
</html>";
|
||||
|
||||
// The URI can contain both path and parameters - so we're just getting the path here.
|
||||
Ok(match (headers.method(), headers.uri().path()) {
|
||||
// The ok response with our index page
|
||||
(&Method::GET, "/") => (ResponseHead::new(
|
||||
Status::Ok,
|
||||
ProtocolVariant::HTTP1_0,
|
||||
HashMap::from([
|
||||
(HeaderKey::SERVER, "inferium".parse().unwrap()),
|
||||
(HeaderKey::CONTENT_LENGTH, OK_RESPONSE.len().into()),
|
||||
])
|
||||
), OK_RESPONSE),
|
||||
|
||||
// The not found response with an example not found page
|
||||
_ => (ResponseHead::new(
|
||||
Status::NotFound,
|
||||
ProtocolVariant::HTTP1_0,
|
||||
HashMap::from([
|
||||
(HeaderKey::SERVER, "inferium".parse().unwrap()),
|
||||
(HeaderKey::CONTENT_LENGTH, NOT_FOUND_RESPONSE.len().into()),
|
||||
])
|
||||
), NOT_FOUND_RESPONSE),
|
||||
})
|
||||
}
|
||||
|
||||
fn send_response(
|
||||
response: ResponseHead, body: &[u8], conn: &mut SyncServer<StdInet>
|
||||
)-> Result<(), ServerSendError> {
|
||||
conn.send_response(&response)?;
|
||||
// The send body can fail on an I/O error or if the content-length header does not match the
|
||||
// actual sent length in this scenario. But we know that we have the correct length so with
|
||||
// `.try_into().unwrap()` we tell inferium to convert the error and panic on (not so much)
|
||||
// possible body length discrepancy.
|
||||
conn.send_body_bytes(body).map_err(|e| e.try_into().unwrap())
|
||||
}
|
@@ -1,101 +0,0 @@
|
||||
// Hello, and welcome to inferium. A performance-oriented small HTTP library written in Rust that
|
||||
// keeps you (the user) in charge.
|
||||
|
||||
// Let's first import some necessary things.
|
||||
|
||||
// In inferium - HashMaps are used to store uri parameters and headers.
|
||||
use std::collections::HashMap;
|
||||
// TcpStream is needed if we want to connect to the internet.
|
||||
use std::net::TcpStream;
|
||||
|
||||
use inferium::{
|
||||
// The h1 module contains all the protocol specific things for HTTP/1.(0/1).
|
||||
h1::{
|
||||
// ProtocolVariant contains variants with the protocol versions supported in this module:
|
||||
// - HTTP/1.1
|
||||
// - HTTP/1.0
|
||||
ProtocolVariant,
|
||||
// RequestHead contains headers and the HTTP request headline (method, path, protocol).
|
||||
RequestHead,
|
||||
// Response here is a wrapper for a response that can have the following:
|
||||
// - Headers only
|
||||
// - Headers and body (with a known length or chunked)
|
||||
//
|
||||
// ! The body in the response is not yet collected. It is up to you if you wish to discard
|
||||
// the connection or receive and collect the response body into some structure.
|
||||
//
|
||||
// The same things here go for the Request object which is nearly the same except that the
|
||||
// headline contains protocol and status instead.
|
||||
Response,
|
||||
// Sync client is a stream wrapper that helps us keep track of the open connection and
|
||||
// perform request/response operations.
|
||||
//
|
||||
// The server equivalent is SyncServer.
|
||||
SyncClient
|
||||
},
|
||||
// Header key contains various known header keys, but can also store arbitrary (unknown) header
|
||||
// key in the OTHER variant.
|
||||
HeaderKey,
|
||||
Method,
|
||||
// StdInet here is a stream wrapper that allows the TcpStream to be used by inferium. There is
|
||||
// also a unix socket equivalent and some asynchronous io wrappers.
|
||||
StdInet
|
||||
};
|
||||
|
||||
fn main() {
|
||||
// Let's first create a connection... nothing weird here.
|
||||
let conn = StdInet::new(TcpStream::connect("zumepro.cz:80").unwrap());
|
||||
// And a client...
|
||||
let mut client = SyncClient::<StdInet>::new(conn);
|
||||
|
||||
// Now let's create a request to send
|
||||
let to_send = RequestHead::new(
|
||||
// The path here is parsed into an HTTP path object (which also supports parameters)
|
||||
// I'm using HTTP/1.0 in this example as HTTP/1.1 automatically infers a compatibility with
|
||||
// chunked encoding (which I'm not even trying to handle here).
|
||||
Method::GET, "/".parse().unwrap(), ProtocolVariant::HTTP1_0,
|
||||
HashMap::from([
|
||||
// All headers are HeaderKey - HeaderValue pairs. We can parse the header value into
|
||||
// the desired object.
|
||||
//
|
||||
// Constructing arbitrary header key is supported using the OTHER variant - however
|
||||
// it's not recommended as a violation of the HTTP protocol can happen.
|
||||
//
|
||||
// If you really want to construct an arbitrary header key - please carefully check
|
||||
// that all of the symbols are valid.
|
||||
(HeaderKey::USER_AGENT, "Mozilla/5.0 (inferium)".parse().unwrap()),
|
||||
(HeaderKey::HOST, "zumepro.cz".parse().unwrap()),
|
||||
])
|
||||
);
|
||||
println!("----------> Sending\n\n{to_send}\n");
|
||||
// Let's send the request - this is pretty straightforward.
|
||||
client.send_request(&to_send).unwrap();
|
||||
// As is receiving a response.
|
||||
let response = client.receive_response().unwrap();
|
||||
|
||||
// Now (as we discussed earlier) - the response can have a body.
|
||||
// In this example we'll try to handle a basic body with a known size.
|
||||
let (header, body) = match response {
|
||||
// Extracting the headers if no body is present.
|
||||
Response::HeadersOnly(h) => (h, None),
|
||||
// Extracting both the headers and the body if body is present.
|
||||
Response::WithSizedBody((h, b)) => (h, Some(b)),
|
||||
// We will not handle chunked responses in this example.
|
||||
Response::WithChunkedBody((_, _)) => panic!(),
|
||||
};
|
||||
|
||||
// inferium kindly provides a simple way to print the head of a request/response.
|
||||
// It will be formatted pretty close to the actual protocol plaintext representation.
|
||||
println!("----------< Received\n\n{header}\n");
|
||||
|
||||
// And finally... if we have a body, we'll print it.
|
||||
if let Some(mut body) = body {
|
||||
println!(
|
||||
"----------< Body\n\n{:?}\n",
|
||||
// A body is always returned in bytes. It's up to you to decode it however you see fit.
|
||||
std::str::from_utf8(&mut body.recv_all().unwrap()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
// And you're done. Come on... try to run it.
|
||||
}
|
@@ -158,6 +158,14 @@ impl HeaderValue {
|
||||
self.inner.push(val);
|
||||
}
|
||||
|
||||
/// Create a new unchecked value.
|
||||
/// This is faster, but can cause protocol violation.
|
||||
/// Please avoid putting unchecked content here.
|
||||
#[inline]
|
||||
pub fn new_unchecked(inner: String) -> Self {
|
||||
Self { inner: vec![inner] }
|
||||
}
|
||||
|
||||
/// Query the first entry for this header key.
|
||||
///
|
||||
/// See the documentation of [`HeaderValue`] for more information.
|
||||
|
@@ -1,3 +1,5 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod settings;
|
||||
|
||||
mod io;
|
||||
|
@@ -322,6 +322,28 @@ pub enum Request<'a, T> {
|
||||
WithChunkedBody((RequestHead, Incoming<'a, ChunkedIn<'a, PrependableStream<T>>>)),
|
||||
}
|
||||
|
||||
impl<T> Response<'_, T> {
|
||||
#[inline]
|
||||
pub fn head(&self) -> &ResponseHead {
|
||||
match self {
|
||||
Self::HeadersOnly(h) => h,
|
||||
Self::WithSizedBody((h, _)) => h,
|
||||
Self::WithChunkedBody((h, _)) => h,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Request<'_, T> {
|
||||
#[inline]
|
||||
pub fn head(&self) -> &RequestHead {
|
||||
match self {
|
||||
Self::HeadersOnly(h) => h,
|
||||
Self::WithSizedBody((h, _)) => h,
|
||||
Self::WithChunkedBody((h, _)) => h,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_outgoing_req_content_length(head: &RequestHead) -> Result<Option<usize>, ClientSendError> {
|
||||
let Some(l) = head.headers.get(&HeaderKey::CONTENT_LENGTH) else {
|
||||
return Ok(None);
|
||||
|
7
lib/search_and_replace/Cargo.lock
generated
Normal file
7
lib/search_and_replace/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "search_and_replace"
|
||||
version = "0.1.0"
|
10
lib/search_and_replace/Cargo.toml
Normal file
10
lib/search_and_replace/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "search_and_replace"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[profile.release]
|
||||
strip = true
|
||||
lto = true
|
||||
|
||||
[dependencies]
|
386
lib/search_and_replace/src/main.rs
Normal file
386
lib/search_and_replace/src/main.rs
Normal file
@@ -0,0 +1,386 @@
|
||||
use std::{env::args, io::Read};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct JumpTable {
|
||||
jumps: Vec<usize>,
|
||||
}
|
||||
|
||||
impl JumpTable {
|
||||
/// Returns the pointer for the next char (if this char is matched).
|
||||
fn resolve_char(
|
||||
pat: &Vec<char>,
|
||||
ptr: &usize,
|
||||
cur_char: &char,
|
||||
) -> usize {
|
||||
if pat.get(*ptr).unwrap() == cur_char {
|
||||
return *ptr + 1;
|
||||
}
|
||||
if pat.get(0).unwrap() == cur_char {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
pub fn new(pat: &Vec<char>) -> Self {
|
||||
if pat.len() == 0 {
|
||||
return Self { jumps: vec![] };
|
||||
}
|
||||
let mut ptr = 0_usize;
|
||||
let mut jumps = vec![0];
|
||||
for cur_char in pat.iter().skip(1) {
|
||||
ptr = JumpTable::resolve_char(pat, &ptr, cur_char);
|
||||
jumps.push(ptr);
|
||||
}
|
||||
|
||||
Self { jumps }
|
||||
}
|
||||
|
||||
pub fn search(&self, pat: &Vec<char>, needle: &char, ptr: &usize) -> usize {
|
||||
let mut ptr = *ptr;
|
||||
while ptr != 0 {
|
||||
if pat.get(ptr).unwrap() == needle {
|
||||
break;
|
||||
}
|
||||
ptr = *self.jumps.get(ptr.saturating_sub(1)).unwrap();
|
||||
}
|
||||
ptr
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct Pattern {
|
||||
pat: Vec<char>,
|
||||
jt: JumpTable,
|
||||
ptr: usize,
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
pub fn new(pat: String) -> Result<Self, ()> {
|
||||
if pat.len() == 0 {
|
||||
return Err(());
|
||||
}
|
||||
let pat = pat.chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
Ok(Self {
|
||||
pat,
|
||||
jt,
|
||||
ptr: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn search_and_update(&mut self, cur_char: &char) -> bool {
|
||||
if self.pat.get(self.ptr).unwrap() == cur_char {
|
||||
self.ptr += 1;
|
||||
return if self.ptr == self.pat.len() { true } else { false };
|
||||
}
|
||||
let found = self.jt.search(&self.pat, &cur_char, &self.ptr);
|
||||
self.ptr = found;
|
||||
false
|
||||
}
|
||||
|
||||
pub fn reset_ptr(&mut self) {
|
||||
self.ptr = 0;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.pat.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct Match {
|
||||
pub pat_id: usize,
|
||||
pub start_pos: usize,
|
||||
pub len: usize,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
pub fn new(pat_id: usize, start_pos: usize, len: usize) -> Self {
|
||||
Self { pat_id, start_pos, len }
|
||||
}
|
||||
}
|
||||
|
||||
/// [`MPSearch`] assumes that each [`Match`] will be replaced later. So this will match the first
|
||||
/// pattern only and reset all the pointers (including the matched one) to zero.
|
||||
#[derive(Debug)]
|
||||
struct MPSearch {
|
||||
pats: Vec<Pattern>,
|
||||
}
|
||||
|
||||
impl MPSearch {
|
||||
/// The [`Pattern`]s will be searched in the order as they are in the [`Vec`].
|
||||
pub fn new(pats: Vec<Pattern>) -> Self {
|
||||
Self { pats }
|
||||
}
|
||||
|
||||
/// Reset pointers of all patterns
|
||||
fn reset_all_ptrs(&mut self) {
|
||||
for pat in self.pats.iter_mut() {
|
||||
pat.reset_ptr();
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if ANY pattern ended a match at this pos
|
||||
///
|
||||
/// This will also clear all ptrs on a match
|
||||
fn search_at_pos(&mut self, cur_char: &char, pos: &usize) -> Option<Match> {
|
||||
let mut cur_match = None;
|
||||
for (idx, pat) in self.pats.iter_mut().enumerate() {
|
||||
if pat.search_and_update(cur_char) {
|
||||
cur_match = Some(Match::new(idx, pos.saturating_sub(pat.len() - 1), pat.len()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(_) = cur_match {
|
||||
self.reset_all_ptrs();
|
||||
}
|
||||
cur_match
|
||||
}
|
||||
|
||||
/// Perform a search on the haystack - returning the ordered [`Match`]es.
|
||||
pub fn search(&mut self, haystack: &String) -> Vec<Match> {
|
||||
let mut res = Vec::new();
|
||||
for (cur_idx, cur_char) in haystack.chars().enumerate() {
|
||||
if let Some(cur_match) = self.search_at_pos(&cur_char, &cur_idx) {
|
||||
res.push(cur_match);
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! skip_n {
|
||||
($iter: ident, $n: expr) => {
|
||||
for _ in 0..$n {
|
||||
$iter.next().ok_or(())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! append_n {
|
||||
($iter: ident, $target: ident, $n: expr) => {
|
||||
for _ in 0..$n {
|
||||
$target.push($iter.next().ok_or(())?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Replace the given [`Match`]es with the replacements. The replacements need to be in the order
|
||||
/// of the search patterns as when [`MPSearch`] was constructed.
|
||||
#[derive(Debug)]
|
||||
struct MPReplace {
|
||||
replacements: Vec<String>,
|
||||
matches: Vec<Match>,
|
||||
}
|
||||
|
||||
impl MPReplace {
|
||||
/// The replacement must be given in the same order as the patterns were on the corresponding
|
||||
/// [`MPSearch`] construction.
|
||||
pub fn new(matches: Vec<Match>, replacements: Vec<String>) -> Self {
|
||||
Self { replacements, matches }
|
||||
}
|
||||
|
||||
/// # Errors
|
||||
/// This can fail if the [`Match`]es are invalid relevant to the given target.
|
||||
pub fn replace(&self, target: &String) -> Result<String, ()> {
|
||||
let mut ptr = 0;
|
||||
let mut iter = target.chars();
|
||||
let mut res = String::new();
|
||||
for cur_match in self.matches.iter() {
|
||||
append_n!(iter, res, cur_match.start_pos - ptr);
|
||||
skip_n!(iter, cur_match.len);
|
||||
res.push_str(self.replacements.get(cur_match.pat_id).ok_or(())?);
|
||||
ptr = cur_match.start_pos + cur_match.len;
|
||||
}
|
||||
append_n!(iter, res, target.chars().count() - ptr);
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
struct PatMatchArgs<I: Iterator<Item = String>> {
|
||||
inner: I,
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = String>> PatMatchArgs<I> {
|
||||
pub fn new(inner: I, count: usize) -> Result<Self, ()> {
|
||||
if count % 2 != 0 {
|
||||
return Err(());
|
||||
}
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = String>> Iterator for PatMatchArgs<I> {
|
||||
type Item = (String, String);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
Some((self.inner.next()?, self.inner.next()?))
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut contents = Vec::new();
|
||||
std::io::stdin().lock().read_to_end(&mut contents).unwrap();
|
||||
let contents = std::str::from_utf8(&contents).unwrap().to_string();
|
||||
|
||||
let args = match PatMatchArgs::new(args().skip(1), args().len() - 1) {
|
||||
Ok(val) => val,
|
||||
Err(()) => {
|
||||
eprintln!("the number of arguments after filepath must be divisible by two");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
let mut pats = Vec::new();
|
||||
let mut replacements = Vec::new();
|
||||
for (cur_pat, cur_replacement) in args {
|
||||
let Ok(cur_pat) = Pattern::new(cur_pat) else {
|
||||
eprintln!("the patterns can't be empty");
|
||||
std::process::exit(1);
|
||||
};
|
||||
pats.push(cur_pat);
|
||||
replacements.push(cur_replacement);
|
||||
}
|
||||
|
||||
let matches = MPSearch::new(pats).search(&contents);
|
||||
let replaced = MPReplace::new(matches, replacements)
|
||||
.replace(&contents).unwrap();
|
||||
println!("{}", replaced);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{JumpTable, MPReplace, MPSearch, Match, Pattern};
|
||||
|
||||
#[test]
|
||||
fn jumps_01() {
|
||||
let src = String::from("thisthen").chars().collect::<Vec<_>>();
|
||||
let target = vec![0_usize, 0, 0, 0, 1, 2, 0, 0];
|
||||
let jt = JumpTable::new(&src);
|
||||
assert_eq!(jt.jumps, target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jumps_02() {
|
||||
let src = String::from("tthis").chars().collect::<Vec<_>>();
|
||||
let target = vec![0, 1, 0, 0, 0];
|
||||
let jt = JumpTable::new(&src);
|
||||
assert_eq!(jt.jumps, target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jumps_03() {
|
||||
let src = String::from("t").chars().collect::<Vec<_>>();
|
||||
let target = vec![0];
|
||||
let jt = JumpTable::new(&src);
|
||||
assert_eq!(jt.jumps, target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jumps_04() {
|
||||
let src = String::from("tt").chars().collect::<Vec<_>>();
|
||||
let target = vec![0, 1];
|
||||
let jt = JumpTable::new(&src);
|
||||
assert_eq!(jt.jumps, target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jumps_05() {
|
||||
let src = String::from("").chars().collect::<Vec<_>>();
|
||||
let target = vec![];
|
||||
let jt = JumpTable::new(&src);
|
||||
assert_eq!(jt.jumps, target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_01() {
|
||||
let pat = String::from("tthis").chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
assert_eq!(jt.search(&pat, &'t', &1), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_02() {
|
||||
let pat = String::from("testtesa").chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
assert_eq!(jt.search(&pat, &'t', &7), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_03() {
|
||||
let pat = String::from("ahojahoj").chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
assert_eq!(jt.search(&pat, &'j', &7), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_04() {
|
||||
let pat = String::from("ahojahoj").chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
assert_eq!(jt.search(&pat, &'j', &7), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_05() {
|
||||
let pat = String::from("jojojojojojoja").chars().collect::<Vec<_>>();
|
||||
let jt = JumpTable::new(&pat);
|
||||
assert_eq!(jt.search(&pat, &'o', &13), 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_and_update_01() {
|
||||
let pat = String::from("test");
|
||||
let mut pat = Pattern::new(pat).unwrap();
|
||||
let haystack = String::from("thisisatest");
|
||||
for cur_haystack in haystack.chars().take(haystack.len() - 1) {
|
||||
assert_eq!(pat.search_and_update(&cur_haystack), false);
|
||||
println!("{:?}", pat);
|
||||
}
|
||||
println!("{:?}", pat);
|
||||
assert_eq!(pat.search_and_update(&'t'), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_pattern() {
|
||||
assert_eq!(Pattern::new("".chars().collect()), Err(()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mpsearch_01() {
|
||||
let mut mpsearch = MPSearch::new(
|
||||
vec!["this", "is", "a", "test"]
|
||||
.iter()
|
||||
.map(|p| Pattern::new(p.to_string()).unwrap())
|
||||
.collect()
|
||||
);
|
||||
println!("{:?}", mpsearch);
|
||||
let haystack = String::from("this is a test");
|
||||
let target = vec![
|
||||
Match::new(0, 0, 4),
|
||||
Match::new(1, 5, 2),
|
||||
Match::new(2, 8, 1),
|
||||
Match::new(3, 10, 4)
|
||||
];
|
||||
assert_eq!(mpsearch.search(&haystack), target);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mpreplace_01() {
|
||||
let mut mpsearch = MPSearch::new(
|
||||
vec!["this", "is", "a", "test"]
|
||||
.iter()
|
||||
.map(|p| Pattern::new(p.to_string()).unwrap())
|
||||
.collect()
|
||||
);
|
||||
let haystack = String::from("this-is.a*test///");
|
||||
let matches = mpsearch.search(&haystack);
|
||||
let mpreplace = MPReplace::new(matches, vec![
|
||||
"that".to_string(),
|
||||
"isn't".to_string(),
|
||||
"the".to_string(),
|
||||
"final".to_string(),
|
||||
]);
|
||||
let replaced = mpreplace.replace(&haystack).unwrap();
|
||||
assert_eq!(replaced, "that-isn't.the*final///");
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user