Apache Jackrabbit : ReduceMemOfSharedFieldCache

Note: the following is for Jackrabbit 2.x and does not apply for Jackrabbit Oak.

Currently the SharedFieldCache keeps values nearly as is. Specifically for Date fields where there are usually many distinct values it could be useful to employ some kind of compression. See discussion here: http://thread.gmane.org/gmane.comp.apache.jackrabbit.user/11573

Recent rework on the SharedFieldCache also included the conversion of the generic StringIndex into a property type aware ValueIndex of Java Comparables. This probably already has a positive effect on the memory usage.

Another idea is to reuse substrings as flyweight instances and reference them.

Test code:

public class DateFieldTest extends TestCase {

    private static final int SUB_LEN = 3;

    public void testField() {
        List<String> values = new ArrayList<String>();
        Map<String, String> subs = new HashMap<String, String>();

        long mem = currentMemory();
        long time = System.currentTimeMillis();
        for (int i = 0; i < 1000 * 1000; i++) {
            String s = DateField.timeToString(time);
            values.add(s);
            //System.out.println("- " + s);
            time += 1000;
        }

        long diff = currentMemory() - mem;
        System.out.println("values as is: " + diff);

        mem = currentMemory();
        for (String s : values) {
            int idx = 0;
            while (idx < s.length()) {
                String sub = s.substring(idx, Math.min(idx + 3, s.length()));
                if (!subs.containsKey(sub)) {
                    sub = new String(sub.toCharArray());
                    subs.put(sub, sub);
                }
                idx += SUB_LEN;
            }
        }
        diff = currentMemory() - mem;
        System.out.println("value subs: " + diff);

        System.out.println("subs: ");

        int i = 0;
        for (String sub : subs.values()) {
            // System.out.println("[" + i++ + "] " + sub);
        }

        List<String[]> keys = new ArrayList<String[]>();
        for (String value : values) {
            List<String> key = new ArrayList<String>();
            int idx = 0;
            while (idx < value.length()) {
                String s = value.substring(idx, Math.min(idx + 3, value.length()));
                key.add(subs.get(s));
                idx += SUB_LEN;
            }
            keys.add(key.toArray(new String[key.size()]));
        }
        diff = currentMemory() - mem;
        System.out.println("value subs and keys: " + diff);

        keys.size();
        values.size();
        subs.size();
    }

    private static long currentMemory() {
        System.gc();
        Runtime r = Runtime.getRuntime();
        return r.totalMemory() - r.freeMemory();
    }
}