From 1cb5b59763996af29e694dfb16503aa2417c45b1 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 9 Apr 2018 07:00:59 -0700 Subject: [PATCH] WIP for improving my chi2 algorithm so it works on s1c4 --- src/letter_frequency.rs | 31 +++++++++++++++++++++++++------ src/xor.rs | 1 + tests/cryptopals.rs | 2 +- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/letter_frequency.rs b/src/letter_frequency.rs index 78c1f70..ad7d18a 100644 --- a/src/letter_frequency.rs +++ b/src/letter_frequency.rs @@ -24,6 +24,28 @@ pub fn english_letter_freqs() -> FreqMap { FreqMap::from_iter(char_strings.zip(ENGLISH_LETTER_FREQS.iter().map(|x| *x))) } +struct EnglishLetterSet { + freqs: FreqMap, +} + +impl EnglishLetterSet { + fn new() -> EnglishLetterSet { + EnglishLetterSet { freqs: english_letter_freqs() } + } + + fn is_valid(&self, c: char) -> bool { + c.is_ascii_alphabetic() + } + + fn is_ignored(&self, c: char) -> bool { + c.is_ascii_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit() + } + + fn frequency_for(&self, c: char) -> f32 { + freqs.get(c.to_uppercase().to_string()).unwrap_or(0f32); + } +} + pub trait LetterFreq { fn letter_freqs(&self, lang: &str) -> FreqMap; fn chi2_freqs(&self, lang: &str) -> f32; @@ -32,13 +54,10 @@ pub trait LetterFreq { impl<'a> LetterFreq for &'a str { fn letter_freqs(&self, lang: &str) -> FreqMap { assert_eq!(lang, "en", "only 'en' language is supported rn"); - let english_letters = english_letters(); let mut freqs = FreqMap::new(); for c in self.chars() { let c_str = c.to_uppercase().to_string(); - if english_letters.contains(&c_str) { - *freqs.entry(c_str).or_insert(0f32) += 1f32; - } + *freqs.entry(c_str).or_insert(0f32) += 1f32; } freqs } @@ -51,8 +70,8 @@ impl<'a> LetterFreq for &'a str { let freqs = self.letter_freqs(lang); let english_freqs = english_letter_freqs(); let num_letters = freqs.values().sum::(); - let score = english_freqs.into_iter() - .map(|(c, sc)| (freqs.get(&c).map_or(0f32, |c| *c), sc * num_letters)) + let score = freqs.into_iter() + .map(|(c, f)| (f, english_freqs.get(&c).map_or(1e-8, |c| c * num_letters))) .fold(0f32, |acc, (obs, exp)| acc + ((obs - exp).powf(2.0) / exp)); score } diff --git a/src/xor.rs b/src/xor.rs index e2bf93c..b2f4400 100644 --- a/src/xor.rs +++ b/src/xor.rs @@ -1,4 +1,5 @@ use std::iter; +use std::slice; pub type FixedByteXOR = iter::Map, fn((u8, u8)) -> u8>; pub type SingleByteXOR = FixedByteXOR>; diff --git a/tests/cryptopals.rs b/tests/cryptopals.rs index 21e4284..8aca383 100644 --- a/tests/cryptopals.rs +++ b/tests/cryptopals.rs @@ -81,7 +81,7 @@ fn s1c4() { let mut best_score = f32::INFINITY; let mut best_output: Option = None; for key in 32u8..127 { - let decrypted = decoded.iter().byte_xor(key).map(char::from).collect::(); + let decrypted = decoded.iter().map(|c| *c).byte_xor(key).map(char::from).collect::(); let score = decrypted.chi2_freqs("en"); if !score.is_nan() && score < best_score { best_score = score;