1 package org.rosuda.REngine.Rserve.protocol;
3 // JRclient library - client interface to Rserve, see http://www.rosuda.org/Rserve/
4 // Copyright (C) 2004-8 Simon Urbanek
5 // --- for licensing information see LICENSE file in the original JRclient distribution ---
9 import org.rosuda.REngine.*;
10 import org.rosuda.REngine.Rserve.*;
12 /** representation of R-eXpressions in Java
16 public class REXPFactory {
17 /** xpression type: NULL */
18 public static final int XT_NULL=0;
19 /** xpression type: integer */
20 public static final int XT_INT=1;
21 /** xpression type: double */
22 public static final int XT_DOUBLE=2;
23 /** xpression type: String */
24 public static final int XT_STR=3;
25 /** xpression type: language construct (currently content is same as list) */
26 public static final int XT_LANG=4;
27 /** xpression type: symbol (content is symbol name: String) */
28 public static final int XT_SYM=5;
29 /** xpression type: RBool */
30 public static final int XT_BOOL=6;
31 /** xpression type: S4 object
33 public static final int XT_S4=7;
34 /** xpression type: generic vector (RList) */
35 public static final int XT_VECTOR=16;
36 /** xpression type: dotted-pair list (RList) */
37 public static final int XT_LIST=17;
38 /** xpression type: closure (there is no java class for that type (yet?). currently the body of the closure is stored in the content part of the REXP. Please note that this may change in the future!) */
39 public static final int XT_CLOS=18;
40 /** xpression type: symbol name
42 public static final int XT_SYMNAME=19;
43 /** xpression type: dotted-pair list (w/o tags)
45 public static final int XT_LIST_NOTAG=20;
46 /** xpression type: dotted-pair list (w tags)
48 public static final int XT_LIST_TAG=21;
49 /** xpression type: language list (w/o tags)
51 public static final int XT_LANG_NOTAG=22;
52 /** xpression type: language list (w tags)
54 public static final int XT_LANG_TAG=23;
55 /** xpression type: expression vector */
56 public static final int XT_VECTOR_EXP=26;
57 /** xpression type: string vector */
58 public static final int XT_VECTOR_STR=27;
59 /** xpression type: int[] */
60 public static final int XT_ARRAY_INT=32;
61 /** xpression type: double[] */
62 public static final int XT_ARRAY_DOUBLE=33;
63 /** xpression type: String[] (currently not used, Vector is used instead) */
64 public static final int XT_ARRAY_STR=34;
65 /** internal use only! this constant should never appear in a REXP */
66 public static final int XT_ARRAY_BOOL_UA=35;
67 /** xpression type: RBool[] */
68 public static final int XT_ARRAY_BOOL=36;
69 /** xpression type: raw (byte[])
70 @since Rserve 0.4-? */
71 public static final int XT_RAW=37;
72 /** xpression type: Complex[]
74 public static final int XT_ARRAY_CPLX=38;
75 /** xpression type: unknown; no assumptions can be made about the content */
76 public static final int XT_UNKNOWN=48;
78 /** xpression type: RFactor; this XT is internally generated (ergo is does not come from Rsrv.h) to support RFactor class which is built from XT_ARRAY_INT */
79 public static final int XT_FACTOR=127;
81 /** used for transport only - has attribute */
82 private static final int XT_HAS_ATTR=128;
89 public REXP getREXP() { return cont; }
90 public REXPList getAttr() { return (attr==null)?null:(REXPList)attr.cont; }
92 public REXPFactory() {
95 public REXPFactory(REXP r) throws REXPMismatchException {
96 if (r == null) r=new REXPNull();
97 REXPList a = r._attr();
99 if (a != null) attr = new REXPFactory(a);
100 if (r instanceof REXPNull) {
102 } else if (r instanceof REXPList) {
103 RList l = r.asList();
104 type = l.isNamed()?XT_LIST_TAG:XT_LIST_NOTAG;
105 if (r instanceof REXPLanguage)
106 type = (type==XT_LIST_TAG)?XT_LANG_TAG:XT_LANG_NOTAG;
107 } else if (r instanceof REXPGenericVector) {
108 type = XT_VECTOR; // FIXME: may have to adjust names attr
109 } else if (r instanceof REXPS4) {
111 } else if (r instanceof REXPInteger) { // this includes factor - FIXME: do we need speacial handling?
113 } else if (r instanceof REXPDouble) {
114 type = XT_ARRAY_DOUBLE;
115 } else if (r instanceof REXPString) {
117 } else if (r instanceof REXPSymbol) {
119 } else if (r instanceof REXPRaw) {
121 } else if (r instanceof REXPLogical) {
122 type = XT_ARRAY_BOOL;
124 // throw new REXPMismatchException(r, "decode");
125 System.err.println("*** REXPFactory unable to interpret "+r);
129 /** parses byte buffer for binary representation of xpressions - read one xpression slot (descends recursively for aggregated xpressions such as lists, vectors etc.)
130 @param buf buffer containing the binary representation
131 @param o offset in the buffer to start at
132 @return position just behind the parsed xpression. Can be use for successive calls to {@link #parseREXP} if more than one expression is stored in the binary array. */
133 public int parseREXP(byte[] buf, int o) throws REXPMismatchException {
134 int xl = RTalk.getLen(buf,o);
135 boolean hasAtt = ((buf[o]&128)!=0);
136 boolean isLong = ((buf[o]&64)!=0);
137 int xt = (int)(buf[o]&63);
138 //System.out.println("parseREXP: type="+xt+", len="+xl+", hasAtt="+hasAtt+", isLong="+isLong);
143 type=xt; attr=new REXPFactory(); cont=null;
144 if (hasAtt) o = attr.parseREXP(buf, o);
146 cont = new REXPNull(getAttr());
150 long lr = RTalk.getLong(buf,o);
151 double[] d = new double[] { Double.longBitsToDouble(lr) };
154 System.err.println("Warning: double SEXP size mismatch\n");
157 cont = new REXPDouble(d, getAttr());
160 if (xt==XT_ARRAY_DOUBLE) {
161 int as=(eox-o)/8,i=0;
162 double[] d=new double[as];
164 d[i]=Double.longBitsToDouble(RTalk.getLong(buf,o));
169 System.err.println("Warning: double array SEXP size mismatch\n");
172 cont = new REXPDouble(d, getAttr());
176 byte b[] = new byte[] { buf[o] };
177 if (b[0] != 0 && b[0] != 1) b[0] = REXPLogical.NA;
178 cont = new REXPLogical(b, getAttr());
181 if (eox!=o+3) // o+3 could happen if the result was aligned (1 byte data + 3 bytes padding)
182 System.err.println("Warning: bool SEXP size mismatch\n");
187 if (xt==XT_ARRAY_BOOL_UA) {
189 byte[] d=new byte[as];
190 System.arraycopy(buf,o,d,0,eox-o);
192 for (int j = 0; j < d.length; j++) if (d[j] != 0 && d[j] != 1) d[j] = REXPLogical.NA;
193 cont = new REXPLogical(d, getAttr());
196 if (xt==XT_ARRAY_BOOL) {
197 int as=RTalk.getInt(buf,o);
199 byte[] d=new byte[as];
200 System.arraycopy(buf,o,d,0,as);
201 for (int j = 0; j < d.length; j++) if (d[j] != 0 && d[j] != 1) d[j] = REXPLogical.NA;
203 cont = new REXPLogical(d, getAttr());
207 int i[] = new int[] { RTalk.getInt(buf,o) };
208 cont = new REXPInteger(i, getAttr());
211 System.err.println("Warning: int SEXP size mismatch\n");
216 if (xt==XT_ARRAY_INT) {
217 int as=(eox-o)/4,i=0;
220 d[i]=RTalk.getInt(buf,o);
225 System.err.println("Warning: int array SEXP size mismatch\n");
229 // hack for lists - special lists attached to int are factors
231 if (getAttr()!=null) {
232 REXP ca = getAttr().asList().at("class");
233 REXP ls = getAttr().asList().at("levels");
234 if (ca != null && ls != null && ca.asString().equals("factor")) {
235 // R uses 1-based index, Java uses 0-based one
236 cont = new REXPFactor(d, ls.asStrings(), getAttr());
240 } catch (Exception e) {
242 if (cont == null) cont = new REXPInteger(d, getAttr());
246 int as=RTalk.getInt(buf,o);
248 byte[] d=new byte[as];
249 System.arraycopy(buf,o,d,0,as);
251 cont = new REXPRaw(d, getAttr());
254 if (xt==XT_LIST_NOTAG || xt==XT_LIST_TAG ||
255 xt==XT_LANG_NOTAG || xt==XT_LANG_TAG) {
256 REXPFactory lc = new REXPFactory();
257 REXPFactory nf = new REXPFactory();
258 RList l = new RList();
261 o = lc.parseREXP(buf, o);
262 if (xt==XT_LIST_TAG || xt==XT_LANG_TAG) {
263 o = nf.parseREXP(buf, o);
264 if (nf.cont.isSymbol() || nf.cont.isString()) name = nf.cont.asString();
266 if (name==null) l.add(lc.cont);
267 else l.put(name, lc.cont);
269 cont = (xt==XT_LANG_NOTAG || xt==XT_LANG_TAG)?
270 new REXPLanguage(l, getAttr()):
271 new REXPList(l, getAttr());
273 System.err.println("Warning: int list SEXP size mismatch\n");
278 if (xt==XT_LIST || xt==XT_LANG) { //old-style lists, for comaptibility with older Rserve versions - rather inefficient since we have to convert the recusively stored structures into a flat structure
279 boolean isRoot = false;
280 if (rootList == null) {
281 rootList = new RList();
284 REXPFactory headf = new REXPFactory();
285 REXPFactory tagf = new REXPFactory();
286 o = headf.parseREXP(buf, o);
287 int elIndex = rootList.size();
288 rootList.add(headf.cont);
289 //System.out.println("HEAD="+headf.cont);
290 o = parseREXP(buf, o); // we use ourselves recursively for the body
292 o = tagf.parseREXP(buf, o);
293 //System.out.println("TAG="+tagf.cont);
294 if (tagf.cont != null && (tagf.cont.isString() || tagf.cont.isSymbol()))
295 rootList.setKeyAt(elIndex, tagf.cont.asString());
298 cont = (xt==XT_LIST)?
299 new REXPList(rootList, getAttr()):
300 new REXPLanguage(rootList, getAttr());
302 //System.out.println("result="+cont);
306 if (xt==XT_VECTOR || xt==XT_VECTOR_EXP) {
307 Vector v=new Vector(); //FIXME: could we use RList?
309 REXPFactory xx=new REXPFactory();
310 o = xx.parseREXP(buf,o);
311 v.addElement(xx.cont);
314 System.err.println("Warning: int vector SEXP size mismatch\n");
317 // fixup for lists since they're stored as attributes of vectors
318 if (getAttr()!=null && getAttr().asList().at("names") != null) {
319 REXP nam = getAttr().asList().at("names");
320 String names[] = null;
321 if (nam.isString()) names = nam.asStrings();
322 else if (nam.isVector()) { // names could be a vector if supplied by old Rserve
323 RList l = nam.asList();
324 Object oa[] = l.toArray();
325 names = new String[oa.length];
326 for(int i = 0; i < oa.length; i++) names[i] = ((REXP)oa[i]).asString();
328 RList l = new RList(v, names);
329 cont = (xt==XT_VECTOR_EXP)?
330 new REXPExpressionVector(l, getAttr()):
331 new REXPGenericVector(l, getAttr());
333 cont = (xt==XT_VECTOR_EXP)?
334 new REXPExpressionVector(new RList(v), getAttr()):
335 new REXPGenericVector(new RList(v), getAttr());
338 if (xt==XT_ARRAY_STR) {
340 /* count the entries */
341 while (i < eox) if (buf[i++] == 0) c++;
342 String s[] = new String[c];
348 if (buf[i] == -1) { /* if the first byte is 0xff (-1 in signed char) then it either needs to be skipped (doubling) or there is an NA value */
350 s[c] = null; /* NA */
352 s[c] = new String(buf, i + 1, o - i - 1, RConnection.transferCharset);
354 s[c] = new String(buf, i, o - i, RConnection.transferCharset);
355 } catch (java.io.UnsupportedEncodingException ex) {
364 cont = new REXPString(s, getAttr());
367 if (xt==XT_VECTOR_STR) {
368 Vector v=new Vector();
370 REXPFactory xx=new REXPFactory();
371 o = xx.parseREXP(buf,o);
372 v.addElement(xx.cont.asString());
375 System.err.println("Warning: int vector SEXP size mismatch\n");
378 String sa[] = new String[v.size()];
379 int i = 0; while (i < sa.length) { sa[i]=(String)v.get(i); i++; }
380 cont = new REXPString(sa, getAttr());
383 if (xt==XT_STR||xt==XT_SYMNAME) {
385 while (buf[i]!=0 && i<eox) i++;
388 cont = new REXPString(new String[] { new String(buf, o, i-o, RConnection.transferCharset) }, getAttr());
390 cont = new REXPSymbol(new String(buf, o, i-o, RConnection.transferCharset));
391 } catch(Exception e) {
392 System.err.println("unable to convert string\n");
399 REXPFactory sym = new REXPFactory();
400 o = sym.parseREXP(buf, o); // PRINTNAME that's all we will use
401 cont = new REXPSymbol(sym.getREXP().asString()); // content of a symbol is its printname string (so far)
408 REXP form=new REXP();
409 REXP body=new REXP();
410 o=parseREXP(form,buf,o);
411 o=parseREXP(body,buf,o);
413 System.err.println("Warning: closure SEXP size mismatch\n");
422 if (xt==XT_UNKNOWN) {
423 cont = new REXPUnknown(RTalk.getInt(buf,o), getAttr());
429 cont = new REXPS4(getAttr());
436 System.err.println("unhandled type: "+xt);
440 /** Calculates the length of the binary representation of the REXP including all headers. This is the amount of memory necessary to store the REXP via {@link #getBinaryRepresentation}.
441 <p>Please note that currently only XT_[ARRAY_]INT, XT_[ARRAY_]DOUBLE and XT_[ARRAY_]STR are supported! All other types will return 4 which is the size of the header.
442 @return length of the REXP including headers (4 or 8 bytes)*/
443 public int getBinaryLength() throws REXPMismatchException {
446 if (type==XT_LIST || type==XT_LIST_TAG || type==XT_LIST_NOTAG)
447 rxt=(cont.asList()!=null && cont.asList().isNamed())?XT_LIST_TAG:XT_LIST_NOTAG;
448 //System.out.print("len["+xtName(type)+"/"+xtName(rxt)+"] ");
449 if (type==XT_VECTOR_STR) rxt=XT_ARRAY_STR; // VECTOR_STR is broken right now
452 if (type==XT_VECTOR && cont.asList()!=null && cont.asList().isNamed())
453 setAttribute("names",new REXPString(cont.asList().keys()));
456 boolean hasAttr = false;
457 REXPList a = getAttr();
459 if (a!=null) al = a.asList();
460 if (al != null && al.size()>0) hasAttr=true;
462 l+=attr.getBinaryLength();
467 case XT_INT: l+=4; break;
468 case XT_DOUBLE: l+=8; break;
469 case XT_RAW: l+=4 + cont.asBytes().length; if ((l&3)>0) l=l-(l&3)+4; break;
472 l+=(cont==null)?1:(cont.asString().length()+1);
473 if ((l&3)>0) l=l-(l&3)+4;
475 case XT_ARRAY_INT: l+=cont.asIntegers().length*4; break;
476 case XT_ARRAY_DOUBLE: l+=cont.asDoubles().length*8; break;
477 case XT_ARRAY_CPLX: l+=cont.asDoubles().length*8; break;
478 case XT_ARRAY_BOOL: l += cont.asBytes().length + 4; if ((l & 3) > 0) l = l - (l & 3) + 4; break;
486 final RList lst = cont.asList();
488 while (i<lst.size()) {
490 l += (x==null)?4:(new REXPFactory(x).getBinaryLength());
491 if (rxt==XT_LIST_TAG) {
493 String s = lst.keyAt(i);
494 l+=4; // header for a symbol
495 l+=(s==null)?1:(s.length()+1);
496 if ((l&3)>0) l=l-(l&3)+4;
497 // System.out.println("TAG length: "+(l-pl));
501 if ((l&3)>0) l=l-(l&3)+4;
506 String sa[] = cont.asStrings();
508 while (i < sa.length) {
511 byte b[] = sa[i].getBytes(RConnection.transferCharset);
517 } catch (java.io.UnsupportedEncodingException uex) {
518 // FIXME: we should so something ... so far we hope noone's gonna mess with the encoding
520 } else l++; // NA = -1
524 if ((l&3)>0) l=l-(l&3)+4;
528 if (l>0xfffff0) l+=4; // large data need 4 more bytes
529 // System.out.println("len:"+(l+4)+" "+xtName(rxt)+"/"+xtName(type)+" "+cont);
530 return l+4; // add the header
533 /** Stores the REXP in its binary (ready-to-send) representation including header into a buffer and returns the index of the byte behind the REXP.
534 <p>Please note that currently only XT_[ARRAY_]INT, XT_[ARRAY_]DOUBLE and XT_[ARRAY_]STR are supported! All other types will be stored as SEXP of the length 0 without any contents.
535 @param buf buffer to store the REXP binary into
536 @param off offset of the first byte where to store the REXP
537 @return the offset of the first byte behind the stored REXP */
538 public int getBinaryRepresentation(byte[] buf, int off) throws REXPMismatchException {
539 int myl=getBinaryLength();
540 boolean isLarge=(myl>0xfffff0);
541 boolean hasAttr = false;
542 final REXPList a = getAttr();
544 if (a != null) al = a.asList();
545 if (al != null && al.size()>0) hasAttr=true;
546 int rxt=type, ooff=off;
547 if (type==XT_VECTOR_STR) rxt=XT_ARRAY_STR; // VECTOR_STR is broken right now
548 if (type==XT_LIST || type==XT_LIST_TAG || type==XT_LIST_NOTAG)
549 rxt=(cont.asList()!=null && cont.asList().isNamed())?XT_LIST_TAG:XT_LIST_NOTAG;
550 // System.out.println("@"+off+": "+xtName(rxt)+"/"+xtName(type)+" "+cont+" ("+myl+"/"+buf.length+") att="+hasAttr);
551 RTalk.setHdr(rxt|(hasAttr?XT_HAS_ATTR:0),myl-(isLarge?8:4),buf,off);
553 if (hasAttr) off=attr.getBinaryRepresentation(buf, off);
558 case XT_INT: RTalk.setInt(cont.asInteger(),buf,off); break;
559 case XT_DOUBLE: RTalk.setLong(Double.doubleToRawLongBits(cont.asDouble()),buf,off); break;
562 int ia[]=cont.asIntegers();
565 RTalk.setInt(ia[i++],buf,io); io+=4;
571 byte ba[] = cont.asBytes();
573 RTalk.setInt(ba.length, buf, io);
576 for(int i =0; i < ba.length; i++)
577 buf[io++] = (byte) ( (ba[i] == REXPLogical.NA) ? 2 : ((ba[i] == REXPLogical.FALSE) ? 0 : 1) );
578 while ((io & 3) != 0) buf[io++] = 3;
582 case XT_ARRAY_DOUBLE:
584 double da[]=cont.asDoubles();
587 RTalk.setLong(Double.doubleToRawLongBits(da[i++]),buf,io); io+=8;
593 byte by[] = cont.asBytes();
594 RTalk.setInt(by.length, buf, off); off+=4;
595 System.arraycopy(by, 0, buf, off, by.length);
600 String sa[] = cont.asStrings();
602 while (i < sa.length) {
605 byte b[] = sa[i].getBytes(RConnection.transferCharset);
607 if (b[0] == -1) /* if the first entry happens to be -1 then we need to double it so it doesn't get confused with NAs */
609 System.arraycopy(b, 0, buf, io, b.length);
613 } catch (java.io.UnsupportedEncodingException uex) {
614 // FIXME: we should so something ... so far we hope noone's gonna mess with the encoding
617 buf[io++] = -1; /* NAs are stored as 0xff (-1 in signed bytes) */
622 while ((i & 3) != 0) { buf[io++] = 1; i++; } // padding if necessary..
634 final RList lst = cont.asList();
637 while (i<lst.size()) {
639 if (x == null) x=new REXPNull();
640 io = new REXPFactory(x).getBinaryRepresentation(buf, io);
641 if (rxt == XT_LIST_TAG || rxt == XT_LANG_TAG)
642 io = new REXPFactory(new REXPSymbol(lst.keyAt(i))).getBinaryRepresentation(buf, io);
646 // System.out.println("io="+io+", expected: "+(ooff+myl));
652 getStringBinaryRepresentation(buf, off, cont.asString());
658 public static int getStringBinaryRepresentation(byte[] buf, int off, String s) {
662 byte b[]=s.getBytes(RConnection.transferCharset);
663 // System.out.println("<str> @"+off+", len "+b.length+" (cont "+buf.length+") \""+s+"\"");
664 System.arraycopy(b,0,buf,io,b.length);
667 } catch (java.io.UnsupportedEncodingException uex) {
668 // FIXME: we should so something ... so far we hope noone's gonna mess with the encoding
671 while ((io&3)!=0) buf[io++]=0; // padding if necessary..
675 /** returns human-readable name of the xpression type as string. Arrays are denoted by a trailing asterisk (*).
676 @param xt xpression type
677 @return name of the xpression type */
678 public static String xtName(int xt) {
679 if (xt==XT_NULL) return "NULL";
680 if (xt==XT_INT) return "INT";
681 if (xt==XT_STR) return "STRING";
682 if (xt==XT_DOUBLE) return "REAL";
683 if (xt==XT_BOOL) return "BOOL";
684 if (xt==XT_ARRAY_INT) return "INT*";
685 if (xt==XT_ARRAY_STR) return "STRING*";
686 if (xt==XT_ARRAY_DOUBLE) return "REAL*";
687 if (xt==XT_ARRAY_BOOL) return "BOOL*";
688 if (xt==XT_ARRAY_CPLX) return "COMPLEX*";
689 if (xt==XT_SYM) return "SYMBOL";
690 if (xt==XT_SYMNAME) return "SYMNAME";
691 if (xt==XT_LANG) return "LANG";
692 if (xt==XT_LIST) return "LIST";
693 if (xt==XT_LIST_TAG) return "LIST+T";
694 if (xt==XT_LIST_NOTAG) return "LIST/T";
695 if (xt==XT_LANG_TAG) return "LANG+T";
696 if (xt==XT_LANG_NOTAG) return "LANG/T";
697 if (xt==XT_CLOS) return "CLOS";
698 if (xt==XT_RAW) return "RAW";
699 if (xt==XT_S4) return "S4";
700 if (xt==XT_VECTOR) return "VECTOR";
701 if (xt==XT_VECTOR_STR) return "STRING[]";
702 if (xt==XT_VECTOR_EXP) return "EXPR[]";
703 if (xt==XT_FACTOR) return "FACTOR";
704 if (xt==XT_UNKNOWN) return "UNKNOWN";
705 return "<unknown "+xt+">";