001/*
002 *  Unit-API - Units of Measurement API for Java
003 *  Copyright (c) 2005-2016, Jean-Marie Dautelle, Werner Keil, V2COM.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-363 nor the names of its contributors may be used to endorse or promote products
017 *    derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package systems.uom.ucum.format;
031
032import static tec.uom.se.AbstractUnit.ONE;
033import si.uom.SI;
034import systems.uom.ucum.internal.format.UCUMFormatParser;
035import tec.uom.se.AbstractConverter;
036import tec.uom.se.AbstractUnit;
037import tec.uom.se.format.AbstractUnitFormat;
038import tec.uom.se.format.SymbolMap;
039import tec.uom.se.function.MultiplyConverter;
040import tec.uom.se.function.RationalConverter;
041import tec.uom.se.internal.format.TokenException;
042import tec.uom.se.internal.format.TokenMgrError;
043import tec.uom.se.unit.AnnotatedUnit;
044import tec.uom.se.unit.MetricPrefix;
045import tec.uom.se.unit.TransformedUnit;
046
047import javax.measure.Quantity;
048import javax.measure.Unit;
049import javax.measure.UnitConverter;
050import javax.measure.format.ParserException;
051
052import java.io.ByteArrayInputStream;
053import java.io.IOException;
054import java.math.BigInteger;
055import java.text.ParsePosition;
056import java.util.*;
057
058/**
059 * <p>
060 * This class provides the interface for formatting and parsing
061 * {@link AbstractUnit units} according to the
062 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of
063 * Measure</a> (UCUM).
064 * </p>
065 *
066 * <p>
067 * For a technical/historical overview of this format please read
068 * <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354">
069 * CommonUnits of Measure in Clinical Information Systems</a>.
070 * </p>
071 *
072 * <p>
073 * As of revision 1.16, the BNF in the UCUM standard contains an
074 * <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to
075 * work around the problem by modifying the BNF productions for &lt;Term&gt;.
076 * Once the error in the standard is corrected, it may be necessary to modify
077 * the productions in the UCUMFormatParser.jj file to conform to the standard.
078 * </p>
079 *
080 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
081 * @author <a href="mailto:units@catmedia.us">Werner Keil</a>
082 * @version 0.7.2, 24 March 2017
083 */
084public abstract class UCUMFormat extends AbstractUnitFormat {
085    /**
086     * 
087     */
088    // private static final long serialVersionUID = 8586656823290135155L;
089
090    // A helper to declare bundle names for all instances
091    private static final String BUNDLE_BASE = UCUMFormat.class.getName();
092
093    // /////////////////
094    // Class methods //
095    // /////////////////
096
097    /**
098     * Returns the instance for formatting/parsing using the given variant
099     * 
100     * @param variant
101     *            the <strong>UCUM</strong> variant to use
102     */
103    public static UCUMFormat getInstance(Variant variant) {
104        switch (variant) {
105        case CASE_INSENSITIVE:
106            return Parsing.DEFAULT_CI;
107        case CASE_SENSITIVE:
108            return Parsing.DEFAULT_CS;
109        case PRINT:
110            return Print.DEFAULT;
111        default:
112            throw new IllegalArgumentException("Unknown variant: " + variant);
113        }
114    }
115
116    /**
117     * Returns an instance for formatting and parsing using user defined symbols
118     * 
119     * @param variant
120     *            the <strong>UCUM</strong> variant to use
121     * @param symbolMap
122     *            the map of user defined symbols to use
123     */
124    public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) {
125        switch (variant) {
126        case CASE_INSENSITIVE:
127            return new Parsing(symbolMap, false);
128        case CASE_SENSITIVE:
129            return new Parsing(symbolMap, true);
130        case PRINT:
131            return new Print(symbolMap);
132        default:
133            throw new IllegalArgumentException("Unknown variant: " + variant);
134        }
135    }
136
137    /**
138     * The symbol map used by this instance to map between {@link AbstractUnit
139     * Unit}s and <code>String</code>s.
140     */
141    final SymbolMap symbolMap;
142
143    /**
144     * Get the symbol map used by this instance to map between
145     * {@link AbstractUnit Unit}s and <code>String</code>s, etc...
146     * 
147     * @return SymbolMap the current symbol map
148     */
149    @Override
150    protected SymbolMap getSymbols() {
151        return symbolMap;
152    }
153
154    // ////////////////
155    // Constructors //
156    // ////////////////
157    /**
158     * Base constructor.
159     */
160    UCUMFormat(SymbolMap symbolMap) {
161        this.symbolMap = symbolMap;
162    }
163
164    // ///////////
165    // Parsing //
166    // ///////////
167    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException;
168
169    protected Unit<?> parse(CharSequence csq, int index) throws ParserException {
170        return parse(csq, new ParsePosition(index));
171    }
172
173    @Override
174    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException;
175
176    // //////////////
177    // Formatting //
178    // //////////////
179    @SuppressWarnings({ "rawtypes", "unchecked" })
180    public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException {
181        if (!(unknownUnit instanceof AbstractUnit)) {
182            throw new UnsupportedOperationException(
183                    "The UCUM format supports only known units (AbstractUnit instances)");
184        }
185        AbstractUnit unit = (AbstractUnit) unknownUnit;
186        CharSequence symbol;
187        CharSequence annotation = null;
188        if (unit instanceof AnnotatedUnit) {
189            AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit;
190            unit = annotatedUnit.getActualUnit();
191            annotation = annotatedUnit.getAnnotation();
192        }
193        String mapSymbol = symbolMap.getSymbol(unit);
194        if (mapSymbol != null) {
195            symbol = mapSymbol;
196        } else if (unknownUnit instanceof TransformedUnit) {
197        final StringBuilder temp = new StringBuilder();
198        final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit();
199        final UnitConverter converter = unit.getConverterTo(parentUnit);
200        final boolean printSeparator = !parentUnit.equals(ONE);
201
202        format(parentUnit, temp);
203        formatConverter(converter, printSeparator, temp);
204
205        symbol = temp;
206        } else if (unit.getBaseUnits() != null) {
207            Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits();
208            StringBuffer app = new StringBuffer();
209            for (AbstractUnit<?> u : productUnits.keySet()) {
210                StringBuffer temp = new StringBuffer();
211                temp = (StringBuffer) format(u, temp);
212                if ((temp.indexOf(".") >= 0) || (temp.indexOf("/") >= 0)) {
213                    temp.insert(0, '(');
214                    temp.append(')');
215                }
216                int pow = productUnits.get(u);
217                int indexToAppend;
218                if (app.length() > 0) { // Not the first unit.
219
220                    if (pow >= 0) {
221
222                        if (app.indexOf("1/") >= 0) {
223                            indexToAppend = app.indexOf("1/");
224                            app.replace(indexToAppend, indexToAppend + 2, "/");
225                            // this statement make sure that (1/y).x will be
226                            // (x/y)
227
228                        } else if (app.indexOf("/") >= 0) {
229                            indexToAppend = app.indexOf("/");
230                            app.insert(indexToAppend, ".");
231                            indexToAppend++;
232                            // this statement make sure that (x/z).y will be
233                            // (x.y/z)
234
235                        } else {
236                            app.append('.');
237                            indexToAppend = app.length();
238                            // this statement make sure that (x).y will be (x.y)
239                        }
240
241                    } else {
242                        app.append('/');
243                        pow = -pow;
244
245                        indexToAppend = app.length();
246                        // this statement make sure that (x).y^-z will be
247                        // (x/y^z), where z would be added if it has a value
248                        // different than 1.
249                    }
250
251                } else { // First unit.
252
253                    if (pow < 0) {
254                        app.append("1/");
255                        pow = -pow;
256                        // this statement make sure that x^-y will be (1/x^y),
257                        // where z would be added if it has a value different
258                        // than 1.
259                    }
260
261                    indexToAppend = app.length();
262                }
263
264                app.insert(indexToAppend, temp);
265
266                if (pow != 1) {
267                    app.append(Integer.toString(pow));
268                    // this statement make sure that the power will be added if
269                    // it's different than 1.
270                }
271            }
272            symbol = app;
273        } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) {
274            final StringBuilder temp = new StringBuilder();
275            UnitConverter converter;
276            boolean printSeparator;
277            if (unit.equals(SI.KILOGRAM)) {
278                // A special case because KILOGRAM is a BaseUnit instead of
279                // a transformed unit, for compatibility with existing SI
280                // unit system.
281                format(SI.GRAM, temp);
282                converter = MetricPrefix.KILO.getConverter();
283                printSeparator = true;
284            } else {
285                Unit<?> parentUnit = unit.getSystemUnit();
286                converter = unit.getConverterTo(parentUnit);
287                if (parentUnit.equals(SI.KILOGRAM)) {
288                    // More special-case hackery to work around gram/kilogram
289                    // inconsistency
290                    parentUnit = SI.GRAM;
291                    converter = converter.concatenate(MetricPrefix.KILO.getConverter());
292                }
293                format(parentUnit, temp);
294                printSeparator = !parentUnit.equals(ONE);
295            }
296            formatConverter(converter, printSeparator, temp);
297            symbol = temp;
298        } else if (unit.getSymbol() != null) {
299            symbol = unit.getSymbol();
300        } else {
301            throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit "
302                    + unit.getClass().getName() + "). "
303                    + "Custom units types should override the toString() method as the default implementation uses the UCUM format.");
304        }
305        
306        appendable.append(symbol);
307        if (annotation != null && annotation.length() > 0) {
308            appendAnnotation(symbol, annotation, appendable);
309        }
310
311        return appendable;
312    }
313
314    public void label(Unit<?> unit, String label) {
315    }
316
317    public boolean isLocaleSensitive() {
318        return false;
319    }
320
321    void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
322        appendable.append('{');
323        appendable.append(annotation);
324        appendable.append('}');
325    }
326
327    /**
328     * Formats the given converter to the given StringBuffer. This is similar to
329     * what {@link ConverterFormat} does, but there's no need to worry about
330     * operator precedence here, since UCUM only supports multiplication,
331     * division, and exponentiation and expressions are always evaluated left-
332     * to-right.
333     * 
334     * @param converter
335     *            the converter to be formatted
336     * @param continued
337     *            <code>true</code> if the converter expression should begin
338     *            with an operator, otherwise <code>false</code>. This will
339     *            always be true unless the unit being modified is equal to
340     *            Unit.ONE.
341     * @param buffer
342     *            the <code>StringBuffer</code> to append to. Contains the
343     *            already-formatted unit being modified by the given converter.
344     */
345    void formatConverter(UnitConverter converter, boolean continued, StringBuilder buffer) {
346        boolean unitIsExpression = ((buffer.indexOf(".") >= 0) || (buffer.indexOf("/") >= 0));
347        MetricPrefix prefix = symbolMap.getPrefix(converter);
348        if ((prefix != null) && (!unitIsExpression)) {
349            buffer.insert(0, symbolMap.getSymbol(prefix));
350        } else if (converter == AbstractConverter.IDENTITY) {
351            // do nothing
352        } else if (converter instanceof MultiplyConverter) {
353            if (unitIsExpression) {
354                buffer.insert(0, '(');
355                buffer.append(')');
356            }
357            MultiplyConverter multiplyConverter = (MultiplyConverter) converter;
358            double factor = multiplyConverter.getFactor();
359            long lFactor = (long) factor;
360            if ((lFactor != factor) || (lFactor < -9007199254740992L) || (lFactor > 9007199254740992L)) {
361                throw new IllegalArgumentException("Only integer factors are supported in UCUM");
362            }
363            if (continued) {
364                buffer.append('.');
365            }
366            buffer.append(lFactor);
367        } else if (converter instanceof RationalConverter) {
368            if (unitIsExpression) {
369                buffer.insert(0, '(');
370                buffer.append(')');
371            }
372            RationalConverter rationalConverter = (RationalConverter) converter;
373            if (!rationalConverter.getDividend().equals(BigInteger.ONE)) {
374                if (continued) {
375                    buffer.append('.');
376                }
377                buffer.append(rationalConverter.getDividend());
378            }
379            if (!rationalConverter.getDivisor().equals(BigInteger.ONE)) {
380                buffer.append('/');
381                buffer.append(rationalConverter.getDivisor());
382            }
383        } else { // All other converter type (e.g. exponential) we use the
384                 // string representation.
385            buffer.insert(0, converter.toString() + "(");
386            buffer.append(")");
387        }
388    }
389
390    // static final ResourceBundle.Control getControl(final String key) {
391    // return new ResourceBundle.Control() {
392    // @Override
393    // public List<Locale> getCandidateLocales(String baseName, Locale locale) {
394    // if (baseName == null)
395    // throw new NullPointerException();
396    // if (locale.equals(new Locale(key))) {
397    // return Arrays.asList(
398    // locale,
399    // Locale.GERMANY,
400    // // no Locale.GERMAN here
401    // Locale.ROOT);
402    // } else if (locale.equals(Locale.GERMANY)) {
403    // return Arrays.asList(
404    // locale,
405    // // no Locale.GERMAN here
406    // Locale.ROOT);
407    // }
408    // return super.getCandidateLocales(baseName, locale);
409    // }
410    // };
411    // }
412
413    // /////////////////
414    // Inner classes //
415    // /////////////////
416
417    /**
418     * Variant of unit representation in the UCUM standard
419     * 
420     * @see <a href=
421     *      "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules">
422     *      UCUM - Character Set and Lexical Rules</a>
423     */
424    public static enum Variant {
425        CASE_SENSITIVE, CASE_INSENSITIVE, PRINT
426    }
427
428    /**
429     * The Print format is used to output units according to the "print" column
430     * in the UCUM standard. Because "print" symbols in UCUM are not unique,
431     * this class of UCUMFormat may not be used for parsing, only for
432     * formatting.
433     */
434    private static final class Print extends UCUMFormat {
435
436        /**
437         *
438         */
439        // private static final long serialVersionUID = 2990875526976721414L;
440        private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print"));
441        private static final Print DEFAULT = new Print(PRINT_SYMBOLS);
442
443        public Print(SymbolMap symbols) {
444            super(symbols);
445        }
446
447        @Override
448        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException {
449            throw new UnsupportedOperationException(
450                    "The print format is for pretty-printing of units only. Parsing is not supported.");
451        }
452
453        @Override
454        void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
455            if (symbol != null && symbol.length() > 0) {
456                appendable.append('(');
457                appendable.append(annotation);
458                appendable.append(')');
459            } else {
460                appendable.append(annotation);
461            }
462        }
463
464        @Override
465        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException {
466            return parse(csq, new ParsePosition(0));
467
468        }
469    }
470
471    /**
472     * The Parsing format outputs formats and parses units according to the
473     * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap
474     * is passed to its constructor.
475     */
476    private static final class Parsing extends UCUMFormat {
477        // private static final long serialVersionUID = -922531801940132715L;
478        private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap
479                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() {
480                    @Override
481                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
482                        if (baseName == null)
483                            throw new NullPointerException();
484                        if (locale.equals(new Locale("", "CS"))) {
485                            return Arrays.asList(locale, Locale.ROOT);
486                        }
487                        return super.getCandidateLocales(baseName, locale);
488                    }
489                }));
490        private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap
491                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() {
492                    @Override
493                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
494                        if (baseName == null)
495                            throw new NullPointerException();
496                        if (locale.equals(new Locale("", "CI"))) {
497                            return Arrays.asList(locale, Locale.ROOT);
498                        } else if (locale.equals(Locale.GERMANY)) { // TODO
499                                                                    // why
500                                                                    // GERMANY?
501                            return Arrays.asList(locale,
502                                    // no Locale.GERMAN here
503                                    Locale.ROOT);
504                        }
505                        return super.getCandidateLocales(baseName, locale);
506                    }
507                }));
508        private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true);
509        private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false);
510        private final boolean caseSensitive;
511
512        public Parsing(SymbolMap symbols, boolean caseSensitive) {
513            super(symbols);
514            this.caseSensitive = caseSensitive;
515        }
516
517        @Override
518        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException {
519            // Parsing reads the whole character sequence from the parse
520            // position.
521            int start = cursor.getIndex();
522            int end = csq.length();
523            if (end <= start) {
524                return ONE;
525            }
526            String source = csq.subSequence(start, end).toString().trim();
527            if (source.length() == 0) {
528                return ONE;
529            }
530            if (!caseSensitive) {
531                source = source.toUpperCase();
532            }
533            UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes()));
534            try {
535                Unit<?> result = parser.parseUnit();
536                cursor.setIndex(end);
537                return result;
538            } catch (TokenException e) {
539                if (e.currentToken != null) {
540                    cursor.setErrorIndex(start + e.currentToken.endColumn);
541                } else {
542                    cursor.setErrorIndex(start);
543                }
544                throw new ParserException(e);
545            } catch (TokenMgrError e) {
546                cursor.setErrorIndex(start);
547                throw new IllegalArgumentException(e.getMessage());
548            }
549        }
550
551        @Override
552        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException {
553            return parse(csq, new ParsePosition(0));
554        }
555    }
556}