001 package jigcell.sbml2.math; 002 003 import java.io.PrintWriter; 004 import java.util.ArrayList; 005 import java.util.HashMap; 006 import java.util.Hashtable; 007 import java.util.Iterator; 008 import java.util.Map; 009 010 /** 011 * SymbolTable keeps track of all names and strings in a model and their 012 * translated names in Fortran. The names are translated by making the strings 013 * Fortran safe. See makeFortranIdSafe. The symbol table can make strings safe 014 * as Fortran identifiers or safe as Fortran character strings. 015 * <p> 016 * This code is licensed under the DARPA BioCOMP Open Source License. See 017 * LICENSE for more details. 018 * 019 * @author Jason Zwolak 020 * @author Nicholas Allen 021 */ 022 023 public class SymbolTable { 024 025 /* 026 * The following constants deal with mutating strings when there already 027 * exists a string of the same name. See the routine findAFortranName for 028 * more documentation. 029 */ 030 public static final int ID_START = 1; 031 public static final int ID_END = 9999; 032 public static final int ID_WIDTH = 4; 033 034 /** 035 * These constants describe the type of symbol table. A FORTRAN_IDENTIFIER 036 * symbol table will create symbols suitable as Fortran identifiers. A 037 * FORTRAN_CHARACTER_STRING symbol table will create symbols suitable as 038 * Fortran character strings. IDENTITY will not modify the name at all. It 039 * will simply copy it to "fortranName". It will not modify the name even 040 * if it is identical to another fortranName. 041 */ 042 public static final class Mode { 043 public static final Mode FORTRAN_IDENTIFIER = 044 new Mode( "FORTRAN_INDENTIFIER" , 31 ) 045 ; 046 /** 047 * The mode for Fortran character strings. This mode limits characters 048 * to the length of 65000. This is an empirically determined number for 049 * the the Lahey-Fujitsu Fortran 95 compiler v6.1 Express for Linux. 050 */ 051 public static final Mode FORTRAN_CHARACTER_STRING = 052 new Mode( "FORTRAN_CHARACTER_STRING", 65000 ) 053 ; 054 public static final Mode IDENTITY = 055 new Mode( "IDENTITY", Integer.MAX_VALUE ) 056 //new Mode( "IDENTITY", (1<<31) - 1 ) 057 ; 058 private String name; 059 private int maxLength; 060 public Mode( String name, int maxLength) { 061 this.name = name; 062 this.maxLength = maxLength; 063 } 064 public int maxLength() { return maxLength; } 065 public String toString() { return name; } 066 } 067 /* 068 * forbiddenSymbols ares symbols already defined in the Fortran Code and 069 * cannot be used by the modeller. 070 * 071 * Note this isn't used yet, it is just a place holder and reminder. 072 */ 073 private static Hashtable forbiddenSymbols; 074 075 /* 076 * nameCol - list of names in the symbol table. 077 * fortranNameCol - list of fortran names in the symbol table. 078 * idCol - list of ids in the symbol table 079 * nameColRev - list of names in the symbol table. 080 * fortranNameColRev - list of fortran names in the symbol table. 081 * 082 * nameCol, fortranNameCol, and idCol form the symbol table with nameCol 083 * containing the names that are keys. 084 * 085 * nameColRev and fortranNameColRev provide reverse lookup of names based on 086 * fortran names. 087 * 088 * nameCol and fortranNameColRev are alphabetically sorted. fortranNameCol, 089 * idCol, and nameColRev are ordered so they match up with their 090 * counterpart names in nameCol and fortranNameColRev. 091 * 092 * fortranNameColRev has all its entries capitolized. Fortran is case 093 * insensitive. Therefore, the names must be unique regardless of case. 094 * Capitolizing all entries in fortranNameColRev ensures this. 095 * 096 * fortranNameCol contains the names that will be written to the Fortran 097 * file. 098 * 099 * idNameMap contains a hash of id-name pairs. This is for quickly looking 100 * up names given an id. Note that the ids are assumed to be unique (an 101 * exception is thrown otherwise, see code below to ensure an exception is 102 * really thrown). 103 */ 104 private ArrayList nameCol; 105 private ArrayList fortranNameCol; 106 private ArrayList idCol; 107 private ArrayList nameColRev; 108 private ArrayList fortranNameColRev; 109 private Hashtable idNameMap; 110 111 /* 112 * maxLength specifies the maximum length of symbols in this table. After 113 * a symbol is mangled it is guaranteed to be this length or smaller. 114 * 115 * mode specifies the mode of this symbol table. The mod can be 116 * TYPE_IDENTIFIER or TYPE_CHARACTER_STRING. See TYPE_IDENTIFIER and 117 * TYPE_CHARACTER_STRING for more info. 118 * 119 * If padToLength is true then FORTRAN_CHARACTER_STRINGs will be padded with 120 * the padCharacter to the maxLength. 121 */ 122 private int maxLength; 123 private Mode mode = Mode.FORTRAN_IDENTIFIER; 124 private boolean padToLength = false; 125 private char padCharacter = ' '; 126 127 /** 128 * Create a new empty symbol table. 129 */ 130 public SymbolTable() { 131 maxLength = mode.maxLength(); 132 nameCol = new ArrayList(); 133 fortranNameCol = new ArrayList(); 134 idCol = new ArrayList(); 135 nameColRev = new ArrayList(); 136 fortranNameColRev = new ArrayList(); 137 idNameMap = new Hashtable(); 138 } 139 140 /** 141 * Create a new SymbolTable object with specified maximum ID length. The 142 * user may wish to limit the maximum ID length to smaller than Fortran 143 * 95's limit on the length. This is useful if the user wishes to prepend 144 * or append strings with the '__' (double underscore) separator for a new 145 * unique ID based on an old ID. This is also useful if Fortran is not the 146 * target for code generation. The maxLength cannot be greater than the 147 * default for the given mode. 148 */ 149 public SymbolTable( int maxLength ) { 150 this(); 151 this.maxLength = 152 maxLength > mode.maxLength() ? 153 mode.maxLength() : maxLength 154 ; 155 } 156 157 /** 158 * Creates a new empty SymbolTable object with the specified mode. The mode 159 * will control the nature in which the symbol table mangles symbols. See 160 * the {@link Mode} documentation for more information. 161 */ 162 public SymbolTable( Mode mode ) { 163 this(); 164 this.mode = mode; 165 this.maxLength = mode.maxLength(); 166 } 167 168 /** 169 * Creates a new empty SymbolTable with the specified mode and maximum id 170 * length. 171 */ 172 public SymbolTable( Mode mode, int maxLength ) { 173 this(); 174 this.mode = mode; 175 this.maxLength = maxLength; 176 } 177 178 /** 179 * Creates a new empty SymbolTable with the specified mode and maximum id 180 * length and request to pad. 181 */ 182 public SymbolTable( Mode mode, int maxLength, boolean padToLength ) { 183 this(); 184 this.mode = mode; 185 this.maxLength = maxLength; 186 this.padToLength = padToLength; 187 } 188 189 /** 190 * Write the symbol table to out. 191 */ 192 public void dumpSymbolTable( PrintWriter out ) { 193 Iterator n = nameCol.iterator(); 194 Iterator f = fortranNameCol.iterator(); 195 while ( n.hasNext() && f.hasNext() ) { 196 String name = (String)n.next(); 197 out.println(name+" ("+lookupId(name)+")"+" => "+f.next()); 198 } 199 out.flush(); 200 } 201 public void dumpSymbolTable( ) { 202 dumpSymbolTable(new PrintWriter(System.out)); 203 } 204 205 public void emptySymbolTable(){ 206 nameCol.clear(); 207 fortranNameCol.clear(); 208 idCol.clear(); 209 nameColRev.clear(); 210 fortranNameColRev.clear(); 211 idNameMap.clear(); 212 } 213 214 public Map getSymbolToIdMap() { 215 Map map = new HashMap(); 216 for (int i=0; i<fortranNameCol.size(); i++) { 217 map.put(fortranNameCol.get(i),idCol.get(i)); 218 } 219 return map; 220 } 221 222 /** 223 * Find the fortranName for name. If name doesn't exist in the current 224 * symbol table then return null otherwise return the fortran name. 225 * 226 * @return Fortran name on success, null on error. 227 */ 228 public String lookupSymbol( String name ) { 229 int index = lookupSymbolIndex( name, nameCol ); 230 if ( index < 0 ) return null; 231 return (String)fortranNameCol.get(index); 232 } 233 /** 234 * Returns the Fortran name for id. 235 */ 236 public String lookupSymbolById( String id ) { 237 String name = (String)idNameMap.get(id); 238 if ( name == null ) 239 return null; 240 String ret = lookupSymbol(name); 241 return ret; 242 } 243 /** 244 * Returns the id for the JigCell name. 245 */ 246 public String lookupId( String name ) { 247 int index = lookupSymbolIndex( name, nameCol ); 248 if ( index < 0 ) return null; 249 return (String)idCol.get(index); 250 } 251 /** 252 * Returns the JigCell name for id. 253 */ 254 public String lookupName( String id ) { 255 return (String)idNameMap.get(id); 256 } 257 /** 258 * Returns the JigCell id for fortranName. 259 */ 260 public String reverseLookupId( String fortranName ) { 261 int index = lookupFortranNameIndex( fortranName ); 262 if ( index < 0 ) return null; 263 return lookupId( (String)nameColRev.get(index) ); 264 } 265 266 267 /** 268 * Finds fname in fortranNameColRev. Converts fname to upper case first 269 * because all entries in fortranNameColRev are upper case. Calls 270 * lookupSymbolIndex to perform the search. 271 */ 272 private int lookupFortranNameIndex( String fname ) { 273 if ( mode == Mode.IDENTITY ) 274 return lookupSymbolIndex( fname, fortranNameColRev ); 275 return lookupSymbolIndex( fname.toUpperCase(), fortranNameColRev ); 276 } 277 278 /** 279 * Finds the index of name in the internal symbol table. If name doesn't 280 * exist in the table then the index returned is the negative of the place 281 * in the table where this symbol should be placed -1; the minus one makes 282 * the code work in the case where the symbol should be inserted at the 0th 283 * position. 284 * 285 * @return Index of name if name is in the symbol table, negative position 286 * in the table otherwise. 287 */ 288 private int lookupSymbolIndex( String name, ArrayList array ) { 289 int left = 0, right = array.size(); 290 int next = ( left + right ) / 2; 291 if ( right == left ) return -1; 292 int compare = 0; 293 if ( next < array.size() ) 294 compare = name.compareTo( array.get(next) ); 295 while ( right > left && compare != 0 ) { 296 if ( compare > 0 ) left = next + 1; 297 else right = next; 298 next = ( left + right ) / 2; 299 if ( next < array.size() ) 300 compare = name.compareTo( array.get(next) ); 301 } 302 if ( compare == 0 ) return next; 303 return -next-1; 304 } 305 306 /** 307 * Add <i>id</i> to the symbol table. If name already exists then throw an 308 * exception. This routine adds a symbol with the id <i>id</i> and the 309 * name <i>id</i>. In other words, the id is copied to the name since the 310 * name isn't present and symbols are added by name. 311 * 312 * @param id The ID of the symbol table entry. 313 * @return The Fortran name of name. 314 */ 315 public String addSymbol( String id ) throws Exception { 316 return addSymbol( id, id ); 317 } 318 /** 319 * Add a symbol with name and id. All ids entered here must be unique or 320 * an exception is thrown. 321 */ 322 public String addSymbol( String name, String id ) throws Exception { 323 if ( name == null ) { 324 System.err.println( 325 "WARNING: symbol name is null, setting name = id" 326 ); 327 name = id; 328 } 329 if ( id == null ) throw new Exception("id is null"); 330 if ( nameCol.size() == Integer.MAX_VALUE ) 331 throw new Exception( "Symbol table is full." ); 332 String fortranName = makeFortranSafe( name, mode, maxLength ); 333 int index = lookupSymbolIndex( name, nameCol ); 334 int fortranIndex = lookupFortranNameIndex( fortranName ); 335 if ( index >= 0 ) 336 throw new Exception( name+" already exists in the symbol table." ); 337 if ( fortranIndex >= 0 && mode != Mode.IDENTITY ) 338 fortranName = findAFortranName( fortranName ); 339 int forIndex = lookupFortranNameIndex( fortranName ); 340 nameCol.add( -index-1, name ); 341 fortranNameCol.add( -index-1, fortranName ); 342 idCol.add( -index-1, id ); 343 nameColRev.add( -forIndex-1, name ); 344 if ( mode == Mode.IDENTITY ) 345 fortranNameColRev.add( -forIndex-1, fortranName ); 346 else 347 fortranNameColRev.add( -forIndex-1, fortranName.toUpperCase() ); 348 if ( idNameMap.get(id) != null ) 349 throw new Exception("Id "+id+" isn't unique."); 350 idNameMap.put( id, name ); 351 return fortranName; 352 } 353 354 /** 355 * Takes a valid Fortran identifier name or string and finds a similar 356 * identifier name 357 * that is not already used in fortranNameCol. If name is already in 358 * fortranNameCol then findAFortranName mutates name in a recognizable way 359 * until it is unique among all the fortranNameCol rows or findAFortranName 360 * has exhausted its mutations. If the mutations are exhausted then an 361 * exception is thrown. 362 */ 363 public String findAFortranName( String name ) throws Exception { 364 int index = 0; 365 int id = ID_START; 366 String suffix = ""; 367 String newName = ""; 368 if ( lookupFortranNameIndex( name ) < 0 ) return name; 369 // Append or replace end with _id. 370 suffix = Integer.toString(id); 371 while ( suffix.length() < ID_WIDTH ) { suffix = "0" + suffix; } 372 suffix = "_" + suffix; 373 if ( name.length() < maxLength - suffix.length() ) { 374 newName = name + suffix; 375 } else { 376 newName = 377 name.substring( 0, maxLength - suffix.length() ) 378 + suffix 379 ; 380 } 381 // If name still in fortranNameCol then increment id and replace old id 382 // in name with new. Only repeat until id = ID_END. 383 while ( 384 ( index = lookupFortranNameIndex( newName ) ) >= 0 385 && id < ID_END 386 ) { 387 id++; 388 suffix = Integer.toString(id); 389 while ( suffix.length() < ID_WIDTH ) { suffix = "0" + suffix; } 390 suffix = "_" + suffix; 391 if ( name.length() < maxLength - suffix.length() ) { 392 newName = name + suffix; 393 } else { 394 newName = 395 name.substring( 0, maxLength - suffix.length() ) 396 + suffix 397 ; 398 } 399 } 400 if ( index >= 0 ) 401 throw new Exception( "A unique Fortran name couldn't be found." ); 402 return newName; 403 } 404 405 /** 406 * Returns a Fortran safe string according to <i>mode</i> 407 */ 408 public String makeFortranSafe( String string ) 409 throws Exception 410 { 411 return makeFortranSafe( 412 string, mode, maxLength, padToLength, padCharacter 413 ); 414 } 415 416 /** 417 * Makes string Fortran safe according to <i>mode</i>. 418 */ 419 public String makeFortranSafe( String string, Mode mode ) 420 throws Exception 421 { 422 return makeFortranSafe( 423 string, mode, maxLength, padToLength, padCharacter 424 ); 425 } 426 427 public String makeFortranSafe( 428 String string, Mode mode, int maxLength 429 ) throws Exception { 430 return makeFortranSafe( 431 string, mode, maxLength, padToLength, padCharacter 432 ); 433 } 434 435 /** 436 * Makes string Fortran safe according to <i>mode</i> and <i>maxLength</i>. 437 */ 438 public static String makeFortranSafe( 439 String string, Mode mode, int maxLength, boolean padToLength, 440 char padCharacter 441 ) throws Exception { 442 if ( mode == Mode.IDENTITY ) { 443 return string; 444 } else if ( mode == Mode.FORTRAN_IDENTIFIER ) { 445 return makeFortranIdSafe_( string, maxLength ); 446 } else if ( mode == Mode.FORTRAN_CHARACTER_STRING ) { 447 return makeFortranCharacterSafe_( 448 string, maxLength, padToLength, padCharacter ) 449 ; 450 } else throw new Exception("Unrecognized mode."); 451 } 452 453 /** 454 * Makes a string safe as a Fortran character string. Uses the same 455 * algorithm as {@link #makeFortranIdSafe_(String,int)} with the following 456 * differences: 457 * <ul> 458 * <li>No string is ever prepended. 459 * <li>The characters [0-9A-Za-z~`!#$%&*()_+-=[]\{};':",./<>? ] 460 * are permitted anywhere in the string. 461 * <li>The characters [@^|] are converted to _u??, u??_, or _u??_ depending 462 * on whether the character is at the end, beginning, or middle of the 463 * string, respectively. 464 * <li>All other characters are converted to _u??, u??_, or _u??_ depending 465 * on whether the character is at the end, beginning, or middle of the 466 * string, respectively. 467 * </ul> 468 */ 469 public static String makeFortranCharacterSafe_( 470 String string, int maxLength, boolean padToLength, char padCharacter 471 ) throws Exception { 472 StringBuffer out = new StringBuffer(); 473 StringBuffer in = new StringBuffer(string); 474 while ( in.length() > 0 && out.length() < maxLength ) { 475 if ( in.toString().matches( 476 "^[0-9A-Za-z~`!#$%&*()_+\\-=\\[\\]\\\\{};':\",./<>? ].*") 477 ) { 478 out.append(in.charAt(0)); 479 } else { 480 if ( out.length() > 1 ) out.append("_"); 481 out.append("u"+Integer.toHexString((int)in.charAt(0))); 482 if ( in.length() > 1 ) out.append("_"); 483 } 484 in.deleteCharAt(0); 485 } 486 if ( out.length() > maxLength ) { 487 return out.substring(0,maxLength); 488 } 489 if ( padToLength ) 490 while ( out.length() < maxLength ) 491 out.append(padCharacter); 492 return out.toString(); 493 } 494 495 /** 496 * Convert a string to a Fortran safe identifier. 497 * <p><b>Documentation out of date.</b><p> 498 * Fortran 95 only accepts 499 * [A-Za-z0-9_] in identifier names. Addtionally an identifier must begin 500 * with a letter and be less than 31 characters. Identifiers can be 501 * Fortran keywords. 502 * <p> 503 * The algorithm works as follows: 504 <pre> 505 If the first character is not a letter prepend PREFIX. 506 Convert series of ['"]* to _ppp_ if in the middle of the string and _ppp if 507 at the end, where the number of ps is the number of 508 apostrophes (each ' counts as one p and each " counts as 2). 509 Convert all other non alphanumeric characters to a number as they are 510 defined in Java. Surround the number with "_" if in the middle of the 511 string and prepend with "_" if at the end. Underscores are also 512 converted to numbers. This allows symbols from the symbol table to 513 take suffixes of the form "__MYUSERSUFFIX" and still guarantee 514 uniqueness. The user must take care not to use suffixes that might be 515 generated by this routine (i.e. _p, and _u34). Also, the user must 516 take care not to exceed the Fortran limit on variable names. This 517 routine allows the user to restrict var name length to ensure suffixes 518 can be appended. 519 </pre> 520 */ 521 public String makeFortranIdSafe( String string ) throws Exception { 522 return makeFortranIdSafe_( string, maxLength ); 523 } 524 525 private static String makeFortranIdSafe_ (String in, int maxLength) { 526 int length = in.length (); 527 if (length > maxLength) 528 length = maxLength; 529 if (length == 0) 530 return ""; 531 StringBuffer out = new StringBuffer (); 532 char c = in.charAt (0); 533 int count = 0; 534 int pos = 1; 535 536 // Avoid identifiers starting with non-alphabetic characters 537 if ((c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { 538 out.append ("A_"); 539 count += 2; 540 } 541 542 outer: 543 while (count < maxLength) { 544 if (c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') { 545 out.append (c); 546 count++; 547 } else if (c == '\'' || c == '"') { 548 out.append ("_"); 549 count++; 550 while (true) { 551 if (c == '\'') { 552 out.append ("p"); 553 count++; 554 } else if (c == '"') { 555 out.append ("pp"); 556 count += 2; 557 } else 558 continue outer; 559 if (pos == length) 560 break outer; 561 c = in.charAt (pos++); 562 } 563 } else { 564 out.append ("_u"); 565 String hexCode = Integer.toHexString ((int) c); 566 out.append (hexCode); 567 count += 2 + hexCode.length (); 568 } 569 if (pos == length) 570 break; 571 c = in.charAt (pos++); 572 } 573 return count > maxLength ? out.substring (0, maxLength) : out.toString (); 574 } 575 }