Skip to content

Commit

Permalink
Performance: Lazy load Charset in EciMode
Browse files Browse the repository at this point in the history
  • Loading branch information
uwolfer committed Mar 19, 2024
1 parent 699c6cf commit 76871a8
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 57 deletions.
2 changes: 1 addition & 1 deletion src/main/java/uk/org/okapibarcode/backend/Pdf417.java
Original file line number Diff line number Diff line change
Expand Up @@ -1810,7 +1810,7 @@ private int addMacroCodewords() {
codeWords[codeWordCount++] = 923;
codeWords[codeWordCount++] = 000;
EciMode eci = EciMode.chooseFor(structuredAppendFileName, 3, 26);
int[] data2 = toBytes(structuredAppendFileName, eci.charset);
int[] data2 = toBytes(structuredAppendFileName, eci.getCharset());
processEci(eci.mode);
processText(data2, 0, data2.length, true);
}
Expand Down
9 changes: 5 additions & 4 deletions src/main/java/uk/org/okapibarcode/backend/Symbol.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

import uk.org.okapibarcode.graphics.Circle;
import uk.org.okapibarcode.graphics.Hexagon;
Expand Down Expand Up @@ -414,7 +415,7 @@ public void setEciMode(int eciMode) {
if (!supportsEci()) {
throw new IllegalArgumentException("This symbology type does not support ECI");
}
boolean valid = EciMode.ECIS.stream().anyMatch(eci -> eci.mode == eciMode);
boolean valid = Optional.ofNullable(EciMode.ECIS.get(eciMode)).filter(EciMode::isSupported).isPresent();
if (!valid) {
throw new IllegalArgumentException("Unsupported ECI mode: " + eciMode);
}
Expand Down Expand Up @@ -678,7 +679,7 @@ protected void eciProcess() {
EciMode eci;
if (eciMode != -1) {
// user chose the ECI mode explicitly
eci = EciMode.ECIS.stream().filter(e -> e.mode == eciMode).findFirst().orElse(EciMode.NONE);
eci = Optional.ofNullable(EciMode.ECIS.get(eciMode)).filter(EciMode::isSupported).orElse(EciMode.NONE);
} else {
// detect the ECI mode automatically
eci = EciMode.chooseFor(content);
Expand All @@ -689,15 +690,15 @@ protected void eciProcess() {
}

eciMode = eci.mode;
inputData = toBytes(content, eci.charset);
inputData = toBytes(content, eci.getCharset());

if (inputData == null) {
// user chose the ECI mode explicitly and it can't encode the provided data
throw new OkapiInputException("Unable to encode the provided data using the requested ECI mode");
}

infoLine("ECI Mode: " + eci.mode);
infoLine("ECI Charset: " + eci.charset.name());
infoLine("ECI Charset: " + eci.charsetName);
}

protected static int[] toBytes(String s, Charset charset, int... suffix) {
Expand Down
126 changes: 75 additions & 51 deletions src/main/java/uk/org/okapibarcode/util/EciMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
import static uk.org.okapibarcode.util.Arrays.contains;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;

/**
* Represents an ECI (Extended Channel Interpretation) mode. Each ECI mode corresponds to a particular
Expand All @@ -36,69 +37,92 @@ public final class EciMode {

/** Represents "no ECI" or "missing ECI". */
public static final EciMode NONE = new EciMode(-1, null);
private static final EciMode ISO_8859_1 = new EciMode(3, "ISO-8859-1");
private static final EciMode UTF_8 = new EciMode(26, "UTF-8");
private static final EciMode US_ASCII = new EciMode(27, "US-ASCII");

/** The available ECI modes, in priority order. */
public static List< EciMode > ECIS = Collections.unmodifiableList(Arrays.asList(
EciMode.of(3, "ISO-8859-1"),
EciMode.of(4, "ISO-8859-2"),
EciMode.of(5, "ISO-8859-3"),
EciMode.of(6, "ISO-8859-4"),
EciMode.of(7, "ISO-8859-5"),
EciMode.of(8, "ISO-8859-6"),
EciMode.of(9, "ISO-8859-7"),
EciMode.of(10, "ISO-8859-8"),
EciMode.of(11, "ISO-8859-9"),
EciMode.of(12, "ISO-8859-10"), // not usually supported by Java
EciMode.of(13, "ISO-8859-11"),
EciMode.of(15, "ISO-8859-13"),
EciMode.of(16, "ISO-8859-14"), // not usually supported by Java
EciMode.of(17, "ISO-8859-15"),
EciMode.of(18, "ISO-8859-16"), // not usually supported by older Java versions
EciMode.of(21, "windows-1250"),
EciMode.of(22, "windows-1251"),
EciMode.of(23, "windows-1252"),
EciMode.of(24, "windows-1256"),
EciMode.of(20, "Shift_JIS"),
EciMode.of(26, "UTF-8"),
public static final LinkedHashMap<Integer, EciMode> ECIS = new LinkedHashMap<>();

static {
ECIS.put(3, ISO_8859_1);
ECIS.put(4, new EciMode(4, "ISO-8859-2"));
ECIS.put(5, new EciMode(5, "ISO-8859-3"));
ECIS.put(6, new EciMode(6, "ISO-8859-4"));
ECIS.put(7, new EciMode(7, "ISO-8859-5"));
ECIS.put(8, new EciMode(8, "ISO-8859-6"));
ECIS.put(9, new EciMode(9, "ISO-8859-7"));
ECIS.put(10, new EciMode(10, "ISO-8859-8"));
ECIS.put(11, new EciMode(11, "ISO-8859-9"));
ECIS.put(12, new EciMode(12, "ISO-8859-10")); // not usually supported by Jav);a
ECIS.put(13, new EciMode(13, "ISO-8859-11"));
ECIS.put(15, new EciMode(15, "ISO-8859-13"));
ECIS.put(16, new EciMode(16, "ISO-8859-14")); // not usually supported by Jav);a
ECIS.put(17, new EciMode(17, "ISO-8859-15"));
ECIS.put(18, new EciMode(18, "ISO-8859-16")); // not usually supported by older Java version);s
ECIS.put(21, new EciMode(21, "windows-1250"));
ECIS.put(22, new EciMode(22, "windows-1251"));
ECIS.put(23, new EciMode(23, "windows-1252"));
ECIS.put(24, new EciMode(24, "windows-1256"));
ECIS.put(20, new EciMode(20, "Shift_JIS"));
ECIS.put(26, UTF_8);
// UTF-8 is the final fallback when automatically detecting the ECI, since it can encode anything.
// The ECI modes below are available to be requested explicitly, but are never used automatically.
EciMode.of(0, "IBM437"),
EciMode.of(1, "ISO-8859-1"),
EciMode.of(2, "IBM437"),
EciMode.of(25, "UTF-16BE"),
EciMode.of(27, "US-ASCII"),
EciMode.of(28, "Big5"),
EciMode.of(29, "GB2312"),
EciMode.of(30, "EUC-KR"),
EciMode.of(31, "GBK"),
EciMode.of(32, "GB18030"),
EciMode.of(33, "UTF-16LE"),
EciMode.of(34, "UTF-32BE"),
EciMode.of(35, "UTF-32LE")));
ECIS.put(0, new EciMode(0, "IBM437"));
ECIS.put(1, new EciMode(1, "ISO-8859-1"));
ECIS.put(2, new EciMode(2, "IBM437"));
ECIS.put(25, new EciMode(25, "UTF-16BE"));
ECIS.put(27, US_ASCII);
ECIS.put(28, new EciMode(28, "Big5"));
ECIS.put(29, new EciMode(29, "GB2312"));
ECIS.put(30, new EciMode(30, "EUC-KR"));
ECIS.put(31, new EciMode(31, "GBK"));
ECIS.put(32, new EciMode(32, "GB18030"));
ECIS.put(33, new EciMode(33, "UTF-16LE"));
ECIS.put(34, new EciMode(34, "UTF-32BE"));
ECIS.put(35, new EciMode(35, "UTF-32LE"));
}

public final int mode;
public final Charset charset;
public final String charsetName;

private static final HashMap<EciMode, Charset> ECI_CHARSETS = new HashMap<>(ECIS.size());

static {
ECI_CHARSETS.put(ISO_8859_1, StandardCharsets.ISO_8859_1);
ECI_CHARSETS.put(UTF_8, StandardCharsets.UTF_8);
ECI_CHARSETS.put(US_ASCII, StandardCharsets.US_ASCII);
}

private EciMode(int mode, Charset charset) {
private EciMode(int mode, String charsetName) {
this.mode = mode;
this.charset = charset;
this.charsetName = charsetName;
}

public Charset getCharset() {
return ECI_CHARSETS.computeIfAbsent(this, this::loadCharset);
}

public boolean isSupported() {
return getCharset() != null;
}

private static EciMode of(int mode, String charsetName) {
private Charset loadCharset(EciMode eciMode) {
try {
return new EciMode(mode, Charset.forName(charsetName));
return Charset.forName(eciMode.charsetName);
} catch (UnsupportedCharsetException e) {
return NONE;
return null;
}
}

public static EciMode chooseFor(String data, int... filter) {
for (EciMode eci : ECIS) {
if (eci.charset != null
&& eci.charset.canEncode()
&& eci.charset.newEncoder().canEncode(data)
&& (filter.length == 0 || contains(filter, eci.mode))) {
return eci;
for (Map.Entry<Integer, EciMode> eci : ECIS.entrySet()) {
Charset charset = eci.getValue().getCharset();
if (charset != null
&& charset.canEncode()
&& charset.newEncoder().canEncode(data)
&& (filter.length == 0 || contains(filter, eci.getKey()))) {
return eci.getValue();
}
}
return NONE;
Expand All @@ -116,6 +140,6 @@ public int hashCode() {

@Override
public String toString() {
return "EciMode[mode=" + mode + ", charset=" + charset + "]";
return "EciMode[mode=" + mode + ", charset=" + charsetName + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ content=te\u0E24t
LOG

ECI Mode: 13
ECI Charset: x-iso-8859-11
ECI Charset: ISO-8859-11
Codewords: 8 927 13 829 149 901 196 116
Length Descriptor Codewords: 1
Data Codewords: 7
Expand Down

0 comments on commit 76871a8

Please sign in to comment.