import java.util.HashMap; import java.util.Map; import java.util.StringTokenizer; /** * This class is an example parser for the proposed time series format. Methods are provided for reading (parse) and writing (toString) the * format. When run, this class will perform a series of tests to ensure the parser is working as expected. Additionally, command line * parameters are parsed according to the time series format and then output. Data is stored simplistically by the put, get, and remove methods * of the TimeSeriesFormat class. A scalar is stored at position 0 and list elements are stored at positions 1 through n. This form of data * storage is not intended for actual use. A real user of this parser would store data directly into their own internal format. This avoids * double, or even triple, copying of data that would otherwise be required. * *

* This code is licensed under the DARPA BioCOMP Open Source License. See LICENSE for more details. *

* * @author Nicholas Allen */ public final class TimeSeriesFormat { // -- CONSTANTS USED IN THE EXAMPLE PARSER ----------------------------------------------------- private final static String PATTERN_FALSE = "false"; // Boolean false private final static String PATTERN_TRUE = "true"; // Boolean true private final static char PATTERN_FP = '.'; // Floating point number private final static String PATTERN_NAN = "nan"; // Floating point NaN private final static String PATTERN_NINFINITY = "-infinity"; // Floating point negative infinity private final static String PATTERN_NINFINITY2 = "-inf"; // Floating point negative infinity private final static String PATTERN_PINFINITY = "infinity"; // Floating point positive infinity private final static String PATTERN_PINFINITY2 = "inf"; // Floating point positive infinity private final static String PATTERN_VOID = "no value"; // Empty private final static Double DOUBLE_NAN = new Double (Double.NaN); // Value for NaN private final static Double DOUBLE_NINFINITY = new Double (Double.NEGATIVE_INFINITY); // Value for negative infinity private final static Double DOUBLE_PINFINITY = new Double (Double.POSITIVE_INFINITY); // Value for positive infinity private final static String TOKEN_ALL = "(,)\\\""; // All tokens used private final static String TOKEN_BLOCKELEMENT = ","; // Separator between block elements private final static String TOKEN_BLOCKEND = ")"; // End of block marker private final static String TOKEN_BLOCKSTART = "("; // Start of block marker private final static String TOKEN_ESCAPE = "\\"; // Character escape private final static String TOKEN_LITERAL = "\""; // Literal separator // -- METHODS FOR THE EXAMPLE PARSER ----------------------------------------------------------- /** * Converts a string in the time series format to an element. * * @param input Input string */ public static TimeSeriesFormat parse (String input) { TimeSeriesFormat element = new TimeSeriesFormat (); return parse (element, new StringTokenizer (input, TOKEN_ALL, true), -1); } /** * Converts a string in the time series format to an element. * * @param element Element to store data into * @param tokenizer Provides the input stream broken into pieces at the structural boundaries of the language * @param pos First position to store data at in the element */ private static TimeSeriesFormat parse (TimeSeriesFormat element, StringTokenizer tokenizer, int pos) { boolean waiting = true; // Whether the parser is expecting another token to be present while (tokenizer.hasMoreElements ()) { String text = tokenizer.nextToken ().trim ().toLowerCase (); if (text.length () == 0) continue; // Skip white space if (TOKEN_BLOCKELEMENT.equals (text)) { // Element in a list switch (pos) { case 0 : // Element is a list but ( was left off element.put (1, element.get (-1)); element.remove (-1); waiting = true; case -1 : // Element is a list but ( was left off and the first entry is empty pos = 2; break; default : if (waiting) pos++; // Handle empty entry else waiting = true; // Expect upcoming entry } continue; } if (TOKEN_BLOCKSTART.equals (text)) { // Start of a list if (pos == -1) { // This element is the list pos = 1; waiting = true; } else { // This element contains the list if (pos == 0 || !waiting) throw new IllegalArgumentException ("Unexpected '('"); // Uh oh, this element was a scalar element.put (pos++, parse (new TimeSeriesFormat (), tokenizer, 1)); waiting = false; } continue; } if (TOKEN_BLOCKEND.equals (text)) { // End of the list element.length = waiting ? pos : pos - 1; return element; // Ignore the rest of the input } if (pos == 0 || !waiting) throw new IllegalArgumentException ("Unexpected input following expression"); // Uh oh, wasn't expecting actual data waiting = false; element.put (pos++, parseValue (text, tokenizer)); } switch (pos) { case 0 : // Element is a scalar element.put (0, element.get (-1)); element.remove (-1); case -1 : // Element is an empty scalar element.length = 0; break; default : element.length = waiting ? pos : pos - 1; // Element is a list, but )'s were left off } return element; } /** * Parses a single value. * * @param text Text for value * @param tokenizer Provides further input broken into pieces at the structural boundaries of the language */ private static Object parseValue (String text, StringTokenizer tokenizer) { if (TOKEN_LITERAL.equals (text)) { // Start of a literal StringBuffer value = new StringBuffer (); while (true) { // Pull text until a " is found if (!tokenizer.hasMoreElements ()) throw new IllegalArgumentException ("Unable to find closing '\"'"); // Uh oh, ran out of text text = tokenizer.nextToken (); if (TOKEN_LITERAL.equals (text)) break; // Found end of literal if (TOKEN_ESCAPE.equals (text)) { if (!tokenizer.hasMoreElements ()) throw new IllegalArgumentException ("Input ended in a character escape"); // Uh oh, ran out of text value.append (tokenizer.nextToken ()); // We define an escape to just pass through the next character unchanged } else value.append (text); } return value.toString (); } try { if (PATTERN_TRUE.equals (text)) return Boolean.TRUE; if (PATTERN_FALSE.equals (text)) return Boolean.FALSE; if (PATTERN_VOID.equals (text)) return null; if (PATTERN_PINFINITY.equals (text) || PATTERN_PINFINITY2.equals (text)) return DOUBLE_PINFINITY; if (PATTERN_NINFINITY.equals (text) || PATTERN_NINFINITY2.equals (text)) return DOUBLE_NINFINITY; if (PATTERN_NAN.equals (text)) return DOUBLE_NAN; if (text.indexOf (PATTERN_FP) != -1) return Double.valueOf (text); // Handle real value return Long.valueOf (text); // Handle integers } catch (Exception e) { throw (IllegalArgumentException) new IllegalArgumentException ("Unable to parse value").initCause (e); } } /** * Converts an element to a string in the time series format. */ public String toString () { if (length == 0) { // Handle scalar value Object value = get (0); if (value == null) return ""; if (value instanceof String) return quote ((String) value); return value.toString (); } StringBuffer text = new StringBuffer (TOKEN_BLOCKSTART); for (int i = 1; i <= length; i++) { // Handle list value if (i > 1) text.append (TOKEN_BLOCKELEMENT); Object value = get (i); if (value == null) continue; if (value instanceof String) text.append (quote ((String) value)); else text.append (value.toString ()); } return text.append (TOKEN_BLOCKEND).toString (); } /** * Converts a string value to support escaped characters. * * @param literal Literal to convert */ private String quote (String literal) { StringBuffer value = new StringBuffer (TOKEN_LITERAL); for (StringTokenizer tokenizer = new StringTokenizer (literal, TOKEN_LITERAL + TOKEN_ESCAPE, true); tokenizer.hasMoreTokens (); ) { String text = tokenizer.nextToken (); if (TOKEN_ESCAPE.equals (text) || TOKEN_LITERAL.equals (text)) value.append (TOKEN_ESCAPE); value.append (text); } return value.append (TOKEN_LITERAL).toString (); } // -- DATA STORAGE FOR THE EXAMPLE PARSER ------------------------------------------------------ private int length; private Map data; private Object get (int pos) { return data.get (new Long (pos)); } private void put (int pos, Object value) { data.put (new Long (pos), value); } private void remove (int pos) { data.remove (new Long (pos)); } // -- METHODS FOR TESTING ---------------------------------------------------------------------- private final static String TESTS [] = { "", "0.0", "NaN", "Infinity", "-Infinity", "true", "false", "\"\"", "\"test\"", "\"test, (-Ifninity, 0..7, :\"", "37", "()", "(3.141592654)", "(true,false,true)", "((0.0,0.0,0.0),(0.1,0.2,0.3),(0.2,0.4,0.3))", "((\"Time\",\"X\",\"MPF\"),(0.0,0.0,0.0),(0.1,0.2,0.3),(0.2,0.4,0.3))", "((0,0.0),(1,,0.1),(2,,,0.2),(3,,,,0.3))", "(,(0.0,NaN,Infinity,-Infinity,(true,false,(\"\",\"test\",(37,())))))", "\"This is a \\\"test\\\"\"", "\"\\\\\\\",\"", "(\"\",\"\\\"!\\\\\\\",!\\\\\\\"\\\\\")" }; /** * Runs tests on the example parser. * * @param args User expressions to run through the parser */ public static void main (String args []) { for (int i = 0; i < TESTS.length; i++) try { String output = parse (TESTS [i]).toString (); if (!TESTS [i].equals (output)) System.out.println ("TEST FAILED: " + TESTS [i] + " " + output); else System.out.println ("TEST PASSED: " + TESTS [i]); } catch (Exception e) { System.out.println ("TEST ERROR: " + TESTS [i] + " " + e.getMessage ()); } for (int i = 0; i < args.length; i++) try { System.out.println (args [i] + " BECAME " + parse (args [i]).toString ()); } catch (Exception e) { System.out.println ("ERROR: " + e.getMessage ()); } } private TimeSeriesFormat () { data = new HashMap (); } }