WIP for improving my chi2 algorithm so it works on s1c4
This commit is contained in:
		
							parent
							
								
									76bc8895cc
								
							
						
					
					
						commit
						1cb5b59763
					
				
					 3 changed files with 27 additions and 7 deletions
				
			
		| 
						 | 
					@ -24,6 +24,28 @@ pub fn english_letter_freqs() -> FreqMap {
 | 
				
			||||||
    FreqMap::from_iter(char_strings.zip(ENGLISH_LETTER_FREQS.iter().map(|x| *x)))
 | 
					    FreqMap::from_iter(char_strings.zip(ENGLISH_LETTER_FREQS.iter().map(|x| *x)))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct EnglishLetterSet {
 | 
				
			||||||
 | 
					    freqs: FreqMap,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl EnglishLetterSet {
 | 
				
			||||||
 | 
					    fn new() -> EnglishLetterSet {
 | 
				
			||||||
 | 
					        EnglishLetterSet { freqs: english_letter_freqs() }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn is_valid(&self, c: char) -> bool {
 | 
				
			||||||
 | 
					        c.is_ascii_alphabetic()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn is_ignored(&self, c: char) -> bool {
 | 
				
			||||||
 | 
					        c.is_ascii_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn frequency_for(&self, c: char) -> f32 {
 | 
				
			||||||
 | 
					        freqs.get(c.to_uppercase().to_string()).unwrap_or(0f32);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub trait LetterFreq {
 | 
					pub trait LetterFreq {
 | 
				
			||||||
    fn letter_freqs(&self, lang: &str) -> FreqMap;
 | 
					    fn letter_freqs(&self, lang: &str) -> FreqMap;
 | 
				
			||||||
    fn chi2_freqs(&self, lang: &str) -> f32;
 | 
					    fn chi2_freqs(&self, lang: &str) -> f32;
 | 
				
			||||||
| 
						 | 
					@ -32,13 +54,10 @@ pub trait LetterFreq {
 | 
				
			||||||
impl<'a> LetterFreq for &'a str {
 | 
					impl<'a> LetterFreq for &'a str {
 | 
				
			||||||
    fn letter_freqs(&self, lang: &str) -> FreqMap {
 | 
					    fn letter_freqs(&self, lang: &str) -> FreqMap {
 | 
				
			||||||
        assert_eq!(lang, "en", "only 'en' language is supported rn");
 | 
					        assert_eq!(lang, "en", "only 'en' language is supported rn");
 | 
				
			||||||
        let english_letters = english_letters();
 | 
					 | 
				
			||||||
        let mut freqs = FreqMap::new();
 | 
					        let mut freqs = FreqMap::new();
 | 
				
			||||||
        for c in self.chars() {
 | 
					        for c in self.chars() {
 | 
				
			||||||
            let c_str = c.to_uppercase().to_string();
 | 
					            let c_str = c.to_uppercase().to_string();
 | 
				
			||||||
            if english_letters.contains(&c_str) {
 | 
					            *freqs.entry(c_str).or_insert(0f32) += 1f32;
 | 
				
			||||||
                *freqs.entry(c_str).or_insert(0f32) += 1f32;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        freqs
 | 
					        freqs
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					@ -51,8 +70,8 @@ impl<'a> LetterFreq for &'a str {
 | 
				
			||||||
        let freqs = self.letter_freqs(lang);
 | 
					        let freqs = self.letter_freqs(lang);
 | 
				
			||||||
        let english_freqs = english_letter_freqs();
 | 
					        let english_freqs = english_letter_freqs();
 | 
				
			||||||
        let num_letters = freqs.values().sum::<f32>();
 | 
					        let num_letters = freqs.values().sum::<f32>();
 | 
				
			||||||
        let score = english_freqs.into_iter()
 | 
					        let score = freqs.into_iter()
 | 
				
			||||||
            .map(|(c, sc)| (freqs.get(&c).map_or(0f32, |c| *c), sc * num_letters))
 | 
					            .map(|(c, f)| (f, english_freqs.get(&c).map_or(1e-8, |c| c * num_letters)))
 | 
				
			||||||
            .fold(0f32, |acc, (obs, exp)| acc + ((obs - exp).powf(2.0) / exp));
 | 
					            .fold(0f32, |acc, (obs, exp)| acc + ((obs - exp).powf(2.0) / exp));
 | 
				
			||||||
        score
 | 
					        score
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,5 @@
 | 
				
			||||||
use std::iter;
 | 
					use std::iter;
 | 
				
			||||||
 | 
					use std::slice;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub type FixedByteXOR<T, U> = iter::Map<iter::Zip<T, U>, fn((u8, u8)) -> u8>;
 | 
					pub type FixedByteXOR<T, U> = iter::Map<iter::Zip<T, U>, fn((u8, u8)) -> u8>;
 | 
				
			||||||
pub type SingleByteXOR<T> = FixedByteXOR<T, iter::Repeat<u8>>;
 | 
					pub type SingleByteXOR<T> = FixedByteXOR<T, iter::Repeat<u8>>;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -81,7 +81,7 @@ fn s1c4() {
 | 
				
			||||||
        let mut best_score = f32::INFINITY;
 | 
					        let mut best_score = f32::INFINITY;
 | 
				
			||||||
        let mut best_output: Option<String> = None;
 | 
					        let mut best_output: Option<String> = None;
 | 
				
			||||||
        for key in 32u8..127 {
 | 
					        for key in 32u8..127 {
 | 
				
			||||||
            let decrypted = decoded.iter().byte_xor(key).map(char::from).collect::<String>();
 | 
					            let decrypted = decoded.iter().map(|c| *c).byte_xor(key).map(char::from).collect::<String>();
 | 
				
			||||||
            let score = decrypted.chi2_freqs("en");
 | 
					            let score = decrypted.chi2_freqs("en");
 | 
				
			||||||
            if !score.is_nan() && score < best_score {
 | 
					            if !score.is_nan() && score < best_score {
 | 
				
			||||||
                best_score = score;
 | 
					                best_score = score;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue