The maths, frustrating that java does not have a function to use log in a base you choose however it does have natural log where it is then possible to get the needed result by dividing the natural log of some value by natural log of some base.
Math.log(x) / Math.log(2)
The challenge is about calculating shannon entropy. Which returns the correct results to at least five decimal places and got to try some streaming.
Entropy of "122333444455555666666777777788888888" is 2.7942086837942446
Entropy of "563881467447538846567288767728553786" is 2.7942086837942446
Entropy of "https://www.reddit.com/r/dailyprogrammer" is 4.056198332810095
Entropy of "int main(int argc, char *argv[])" is 3.8667292966721747
Entropy of "563881467447538846567288767728553786" is 2.7942086837942446
Entropy of "https://www.reddit.com/r/dailyprogrammer" is 4.056198332810095
Entropy of "int main(int argc, char *argv[])" is 3.8667292966721747
package com.company;
import java.math.BigDecimal;
import java.math.MathContext;
import java.util.*;
public class Main {
public static void main(String[] args) {
String[] inputSequences = {
"122333444455555666666777777788888888",
"563881467447538846567288767728553786",
"https://www.reddit.com/r/dailyprogrammer",
"int main(int argc, char *argv[])"
};
double result;
int minimumDecimalPlaces = 5;
String resultStringDP;
for (String input : inputSequences) {
result = calculateEntropy(input);
resultStringDP = new String(result + "").split(("\\."))[1];
if( resultStringDP.length() < minimumDecimalPlaces) {
result = new BigDecimal(result).round(new MathContext(minimumDecimalPlaces)).doubleValue();
}
System.out.println("Entropy of \"" + input + "\" is " + result);
}
}
public static double calculateEntropy(String input) {
List<Double> entropyValuesPerChar = new ArrayList<>(input.length());
List<Integer> frequencyOfChars = new ArrayList<>();
// get unique chars
HashSet<Character> uniqueChars = new HashSet<>(input.length());
for ( int i = 0; i < input.length(); i++) {
uniqueChars.add(input.charAt(i));
}
// for each char determine frequency and add to counter
int count;
for ( char uniqueLetter : uniqueChars) {
count = 0;
for ( char letter : input.toCharArray()) {
if (letter == uniqueLetter) {
count++;
}
}
frequencyOfChars.add(count);
}
for ( Integer freq : frequencyOfChars) {
double x = (double)freq / input.length();
entropyValuesPerChar.add(
-(x) * (Math.log(x) / Math.log(2))
);
}
return entropyValuesPerChar.stream()
import java.math.BigDecimal;
import java.math.MathContext;
import java.util.*;
public class Main {
public static void main(String[] args) {
String[] inputSequences = {
"122333444455555666666777777788888888",
"563881467447538846567288767728553786",
"https://www.reddit.com/r/dailyprogrammer",
"int main(int argc, char *argv[])"
};
double result;
int minimumDecimalPlaces = 5;
String resultStringDP;
for (String input : inputSequences) {
result = calculateEntropy(input);
resultStringDP = new String(result + "").split(("\\."))[1];
if( resultStringDP.length() < minimumDecimalPlaces) {
result = new BigDecimal(result).round(new MathContext(minimumDecimalPlaces)).doubleValue();
}
System.out.println("Entropy of \"" + input + "\" is " + result);
}
}
public static double calculateEntropy(String input) {
List<Double> entropyValuesPerChar = new ArrayList<>(input.length());
List<Integer> frequencyOfChars = new ArrayList<>();
// get unique chars
HashSet<Character> uniqueChars = new HashSet<>(input.length());
for ( int i = 0; i < input.length(); i++) {
uniqueChars.add(input.charAt(i));
}
// for each char determine frequency and add to counter
int count;
for ( char uniqueLetter : uniqueChars) {
count = 0;
for ( char letter : input.toCharArray()) {
if (letter == uniqueLetter) {
count++;
}
}
frequencyOfChars.add(count);
}
for ( Integer freq : frequencyOfChars) {
double x = (double)freq / input.length();
entropyValuesPerChar.add(
-(x) * (Math.log(x) / Math.log(2))
);
}
return entropyValuesPerChar.stream()
.mapToDouble(w -> w).sum();
}
}
}
}
No comments:
Post a Comment