actix · mskorkowski · Dec 3, 2022 · Dec 3, 2022 · Dec 3, 2022 · Dec 3, 2022
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/websockets/autobahn/Cargo.toml b/websockets/autobahn/Cargo.toml
@@ -11,6 +11,9 @@ path = "src/main.rs"
 actix = "0.13"
 actix-web = "4"
 actix-web-actors = "4.1"
+actix-http = "3"
+bytes = "1.3.0"
+bytestring = "1"
 
 env_logger = "0.9"
 log = "0.4"
diff --git a/websockets/autobahn/README.md b/websockets/autobahn/README.md
@@ -11,11 +11,13 @@ cd websockets/autobahn
 cargo run
 ```
 
-### Running Autobahn Test Suite
+### Running autobahn test suite
 
 Running the autobahn test suite is easiest using the docker image as explained on the [autobahn test suite repo](https://github.com/crossbario/autobahn-testsuite#using-the-testsuite-docker-image).
 
-After starting the server, in the same directory, run the test suite in "fuzzing client" mode:
+After starting the server, in the same directory, run the test suite in "fuzzing client" mode.
+
+#### Docker
 
 ```sh
 docker run -it --rm \
@@ -29,4 +31,30 @@ docker run -it --rm \
  --mode fuzzingclient
 ```
 
-Results are written to the `reports/servers` directory for viewing.
+#### Podman
+
+```sh
+podman run -it --rm \
+ -v "${PWD}/config":/config \
+ -v "${PWD}/reports":/reports \
+ --network host \
+ --name autobahn \
+ crossbario/autobahn-testsuite \
+ wstest \
+ --spec /config/fuzzingclient-podman.json \
+ --mode fuzzingclient
+```
+
+If you run it with `selinux` enabled, then
+
+```sh
+podman run -it --rm \
+ -v "${PWD}/config":/config:z \
+ -v "${PWD}/reports":/reports:z \
+ --network host \
+ --name autobahn \
+ crossbario/autobahn-testsuite \
+ wstest \
+ --spec /config/fuzzingclient-podman.json \
+ --mode fuzzingclient
+```
diff --git a/websockets/autobahn/config/fuzzingclient-podman.json b/websockets/autobahn/config/fuzzingclient-podman.json
@@ -0,0 +1,15 @@
+{
+ "outdir": "./reports/servers",
+ "servers": [
+ {
+ "agent": "actix-web-actors",
+ "url": "ws://127.0.0.1:9001"
+ }
+ ],
+ "cases": ["*"],
+ "exclude-cases": [
+ "12.*",
+ "13.*"
+ ],
+ "exclude-agent-cases": {}
+}
diff --git a/websockets/autobahn/src/main.rs b/websockets/autobahn/src/main.rs
@@ -1,36 +1,7 @@
-use actix::prelude::*;
-use actix_web::{middleware, web, App, Error, HttpRequest, HttpResponse, HttpServer};
-use actix_web_actors::ws;
+mod websocket;
+mod utf8;
 
-async fn ws_index(r: HttpRequest, stream: web::Payload) -> Result<HttpResponse, Error> {
- ws::start(AutobahnWebSocket::default(), &r, stream)
-}
-
-#[derive(Debug, Clone, Default)]
-struct AutobahnWebSocket;
-
-impl Actor for AutobahnWebSocket {
- type Context = ws::WebsocketContext<Self>;
-}
-
-impl StreamHandler<Result<ws::Message, ws::ProtocolError>> for AutobahnWebSocket {
- fn handle(&mut self, msg: Result<ws::Message, ws::ProtocolError>, ctx: &mut Self::Context) {
- if let Ok(msg) = msg {
- match msg {
- ws::Message::Text(text) => ctx.text(text),
- ws::Message::Binary(bin) => ctx.binary(bin),
- ws::Message::Ping(bytes) => ctx.pong(&bytes),
- ws::Message::Close(reason) => {
- ctx.close(reason);
- ctx.stop();
- }
- _ => {}
- }
- } else {
- ctx.stop();
- }
- }
-}
+use actix_web::{middleware, web, App, HttpServer};
 
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
@@ -41,7 +12,7 @@ async fn main() -> std::io::Result<()> {
  HttpServer::new(|| {
  App::new()
  .wrap(middleware::Logger::default())
- .service(web::resource("/").route(web::get().to(ws_index)))
+ .service(web::resource("/").route(web::get().to(websocket::index)))
  })
  .workers(2)
  .bind(("127.0.0.1", 9001))?

diff --git a/websockets/autobahn/src/utf8/mod.rs b/websockets/autobahn/src/utf8/mod.rs
@@ -0,0 +1,215 @@
+//! Module contains code related to handling utf8 codepoints split across multiple continuation frames
+//! 
+//! Websocket standard allows sending continuation text frames which are not valid utf8 by themselves. 
+//! 
+//! Example:
+//! > `♩` is `e2 99 a9`
+//! >
+//! > The first frame can end up with (e2) `0b11100010u8` which is a first byte of three byte utf8 sequence and the second
+//! > continuation frame can start with (99) `0b10011001u8` followed by (a9) `0b10101001u8` which only after combining together
+//! > will give the proper utf8 sequence
+//! 
+//! What's more strict 
+
+#[cfg(test)]
+mod tests;
+
+use actix_http::ws::ProtocolError;
+
+use bytes::BufMut;
+use bytes::Bytes;
+use bytes::BytesMut;
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct ValidUtf8 {
+ pub valid: Bytes,
+ pub overflow: Option<Bytes>,
+}
+
+const UTF8_START_2_BYTE_SEQ_MASK: u8 = 0b1110_0000u8;
+const UFT8_START_3_BYTE_SEQ_MASK: u8 = 0b1111_0000u8;
+const UTF8_START_4_BYTE_SEQ_MASK: u8 = 0b1111_1000u8;
+
+const UTF8_2_BYTE_SEQ: u8 = 0b11000000u8;
+const UTF8_3_BYTE_SEQ: u8 = 0b11100000u8;
+const UTF8_4_BYTE_SEQ: u8 = 0b11110000u8;
+
+const MAX_ASCII_VALUE: u8 = 0x7Fu8;
+const MIN_CONTINUATION: u8 = 0x80u8;
+const MAX_CONTINUATION: u8 = 0xBFu8;
+
+const ERROR_INVALID_UTF8_SEQUENCE_MESSAGE: &str = "invalid utf-8 sequence";
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum ByteResult {
+ Continuation,
+ First(usize),
+ Ok,
+ Invalid,
+}
+
+fn protocol_error<T>(error: String, kind: std::io::ErrorKind) -> Result<T, ProtocolError> {
+ Err(ProtocolError::Io(std::io::Error::new(kind, error)))
+}
+
+fn protocol_other_error<T>(error: String) -> Result<T, ProtocolError> {
+ protocol_error(error, std::io::ErrorKind::Other)
+}
+
+fn protocol_data_error<T>(error: String) -> Result<T, ProtocolError> {
+ protocol_error(error, std::io::ErrorKind::InvalidData)
+}
+
+/// This method rebuilds the code point up to the given point
+///
+/// You can invoke this method only for the overflowed ("unfinished") code point.
+/// As the consequence:
+///
+/// 1. `data[0]` is always valid
+/// 2. We don't need to check the last byte since it's not there
+///
+///
+/// > From Unicode 13 spec
+/// >
+/// > | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
+/// > |:----------------------|:----------------|:----------------|:----------------|:----------------|
+/// > | U+0000 ..= U+007f | `0x00 ..= 0x7f` | | | |
+/// > | U+0080 ..= U+07FF | `0xC2 ..= 0xDF | `0x80 ..= 0xBF` | | |
+/// > | U+0800 ..= U+0FFF | `0xE0` | `0xA0 ..= 0xBF` | `0x80 ..= 0xBF` | |
+/// > | U+1000 ..= U+CFFF | `0xE1 ..= 0xEC | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` | |
+/// > | U+D000 ..= U+D7FF | `0xED` | `0x80 ..= 0x9F` | `0x80 ..= 0xBF` | |
+/// > | U+E000 ..= U+FFFF | `0xEE ..= 0xEF` | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` | |
+/// > | U+10000 ..= U+3FFFF | `0xF0` | `0x90 ..= 0xBF` | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` |
+/// > | U+40000 ..= U+FFFFF | `0xF1 ..= 0xF3` | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` |
+/// > | U+100000 ..= U+10FFFF | `0xF4` | `0x80 ..= 0x8F` | `0x80 ..= 0xBF` | `0x80 ..= 0xBF` |
+fn check_overflow(data: &[u8], expected_size: usize) -> bool {
+ let len = data.len();
+
+ let raw_1 = data[0];
+ if expected_size == 2 {
+ (0xC2u8..=0xDFu8).contains(&raw_1)
+ } else if expected_size == 3 {
+ let raw_2: u8 = if len == 2 {
+ data[1]
+ } else if raw_1 == 0xE0 {
+ 0xA0
+ } else {
+ 0x80
+ };
+
+ matches!((raw_1, raw_2), (0xE0, 0xA0..=0xBF) | (0xE1..=0xEC, 0x80..=0xBF) | (0xED, 0x80..=0x9F))
+ } else {
+ let raw_2: u8 = if len >= 2 {
+ data[1]
+ } else if raw_1 == 0xF0 {
+ 0x90
+ } else {
+ 0x80
+ };
+ let raw_3: u8 = if len == 3 { data[2] } else { 0x80 };
+
+ matches!((raw_1, raw_2, raw_3), 
+ (0xF0, 0x90..=0xBF, 0x80..=0xBF) |
+ (0xF1..=0xF3, 0x80..=0xBF, 0x80..=0xBF) |
+ (0xf4, 0x80..=0x8F, 0x80..=0xBF)
+ )
+ }
+}
+
+fn check_byte(byte: u8) -> ByteResult {
+ if byte <= MAX_ASCII_VALUE {
+ ByteResult::Ok
+ } else if (MIN_CONTINUATION..=MAX_CONTINUATION).contains(&byte) {
+ ByteResult::Continuation
+ } else if byte & UTF8_START_2_BYTE_SEQ_MASK == UTF8_2_BYTE_SEQ {
+ ByteResult::First(2)
+ } else if byte & UFT8_START_3_BYTE_SEQ_MASK == UTF8_3_BYTE_SEQ {
+ ByteResult::First(3)
+ } else if byte & UTF8_START_4_BYTE_SEQ_MASK == UTF8_4_BYTE_SEQ {
+ ByteResult::First(4)
+ } else {
+ ByteResult::Invalid
+ }
+}
+
+pub fn validate_utf8_bytes(data: Bytes) -> Result<ValidUtf8, ProtocolError> {
+ let len: usize = data.len();
+ let mut overflow_size: usize = 0;
+ let mut checked: ByteResult;
+
+ if len == 0 {
+ Ok(ValidUtf8 {
+ valid: data,
+ overflow: None,
+ })
+ } else {
+ let mut index = len;
+ let mut expected_overflow_size = 0;
+
+ while index > 0 {
+ index -= 1;
+ let current = match data.get(index) {
+ Some(b) => b,
+ None => return protocol_other_error(ERROR_INVALID_UTF8_SEQUENCE_MESSAGE.to_owned()),
+ };
+
+ checked = check_byte(*current);
+
+ match checked {
+ ByteResult::Continuation => {
+ overflow_size += 1;
+ continue;
+ }
+ ByteResult::First(seq_size) => {
+ overflow_size += 1;
+
+ if overflow_size == seq_size {
+ index = len;
+ overflow_size = 0;
+ expected_overflow_size = 0;
+ break;
+ // we've just checked that whole code point is inside this data frame, so no overflow is required
+ }
+ if overflow_size > seq_size {
+ return protocol_data_error(ERROR_INVALID_UTF8_SEQUENCE_MESSAGE.to_owned());
+ }
+
+ expected_overflow_size = seq_size;
+ break;
+ }
+ ByteResult::Ok => {
+ index += 1;
+ break;
+ }
+ ByteResult::Invalid => {
+ return protocol_data_error(ERROR_INVALID_UTF8_SEQUENCE_MESSAGE.to_owned())
+ }
+ }
+ }
+
+ // index points at first "overflowed" byte
+ if overflow_size > 0 {
+ let (data, overflow) = data.split_at(index);
+
+ if !check_overflow(overflow, expected_overflow_size) {
+ return protocol_data_error(ERROR_INVALID_UTF8_SEQUENCE_MESSAGE.to_owned());
+ }
+
+ let mut bytes_data = BytesMut::with_capacity(data.len());
+ bytes_data.put(data);
+
+ let mut bytes_overflow = BytesMut::with_capacity(overflow.len());
+ bytes_overflow.put(overflow);
+
+ Ok(ValidUtf8 {
+ valid: bytes_data.freeze(),
+ overflow: Some(bytes_overflow.freeze()),
+ })
+ } else {
+ Ok(ValidUtf8 {
+ valid: data,
+ overflow: None,
+ })
+ }
+ }
+}
diff --git a/websockets/autobahn/src/utf8/tests/mod.rs b/websockets/autobahn/src/utf8/tests/mod.rs
@@ -0,0 +1,3 @@
+mod test6_4_2;
+
+use super::*;
diff --git a/websockets/autobahn/src/utf8/tests/test6_4_2.rs b/websockets/autobahn/src/utf8/tests/test6_4_2.rs
@@ -0,0 +1,41 @@
+use super::*;
+
+#[test]
+fn invalid_sequence() {
+ let data: &[u8] = b"\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5\xf4\x90\x80\x80edited";
+ let bytes = BytesMut::from(data).freeze();
+ let tested_bytes = bytes.slice(11..14);
+
+ let result = validate_utf8_bytes(tested_bytes);
+
+ match result {
+ Err(ProtocolError::Io(err)) => {
+ assert_eq!(err.kind(), std::io::ErrorKind::InvalidData, "Error kind should be `Other`");
+ assert_eq!(err.to_string(), ERROR_INVALID_UTF8_SEQUENCE_MESSAGE.to_owned());
+ },
+ Err(_) => assert!(false, "Result should be ProtocolError::Io"),
+ Ok(_) => assert!(false, "Result should be an error")
+ }
+}
+
+#[test]
+fn first_byte_type() {
+ let byte: u8 = 0xf4u8;
+
+ let expected = ByteResult::First(4);
+
+ let result = check_byte(byte);
+
+ assert_eq!(result, expected);
+}
+
+#[test]
+fn second_byte_type() {
+ let byte: u8 = 0x90u8;
+
+ let expected = ByteResult::Continuation;
+
+ let result = check_byte(byte);
+
+ assert_eq!(result, expected);
+}