Send As SMS

Friday, November 02, 2007

Repeating group values in PROC REPORT

I was working on a summary report with several group variables. As you know, when you display group (or order) variables in PROC REPORT, only the first row containing a particular value with display that value; following rows will show a blank. This sample program and PROC REPORT show the problem (the sample data are somewhat complex; ignore that and take my word for it that the test data set will contain repetitions of the group variables HasMemberships, ProdLineMatch, ProdLine, and Mbsh_MCProd):

data test;
   drop i;
   do HasMemberships = 'y', 'n', 'y';
      do ProdLineMatch = 'y', 'n', 'y';
         if ranuni(92345) > .4 then 
            do ProdLine = 'M', 'C', 'R', ' ';
               do Mbsh_MCProd = 'C', 'R', 'M', ' ';
                  if ranuni(87890) > .6 then 
                     do i = 1 to round(ranuni(95605)*5-1);
                        _freq_ = round(ranuni(94612)*100);
                        if hasmemberships = 'n' then 
                           do;
                           mbsh_mcprod = ' ';
                           prodlinematch = 'n';
                           end;
                        output;
                     end;
               end;
            end;
      end;
   end;
run;

title 'Default Display of Repeated Group Values';

proc report data=test missing nofs;
   column HasMemberships ProdLineMatch ProdLine Mbsh_MCProd
          _freq_;

   define HasMemberships / group width=14;
   define ProdLineMatch  / group width=13;
   define ProdLine       / group width=8;
   define Mbsh_MCProd    / group width=11;

   define _freq_ / sum format=comma12.0 width=12 'Freq';

run;

displays:

HasMemberships ProdLineMatch ProdLine Mbsh_MCProd Freq
n n     281
    C   479
    M   252
    R   689
y n   M 175
      R 151
    C R 123
    M   143
      M 7
    R   243
  y C C 61
      M 191
    M   175
    R C 235

As you can see, it can be difficult to tell which real values are behind each cell. Having the group values repeat in each cell would make the table much easier to read. Unfortunately, in SAS 9.1 there's no way to make that happen automatically.

You can, however, do it by using compute blocks. I'm not going to explain the code, but here's an example:

proc report data=test missing nofs;
   column HasMemberships ProdLineMatch ProdLine Mbsh_MCProd
          _freq_;

   define HasMemberships / group width=14;

   define ProdLineMatch  / group width=13;
   compute before ProdLinematch;
      hold_ProdLinematch = ProdLinematch;
   endcomp;
   compute ProdlineMatch;
      if _break_ = ' ' then
         ProdLinematch = hold_ProdLinematch;
   endcomp;

   define ProdLine       / group width=8;
   compute before ProdLine;
      hold_ProdLine = ProdLine;
   endcomp;
   compute Prodline;
      if _break_ = ' ' then
         do;
         ProdLine = hold_ProdLine;
         end;
   endcomp;

   define Mbsh_MCProd    / group width=11;

   define _freq_ / sum format=comma12.0 width=12 'Freq';

run;

displays:

HasMemberships ProdLineMatch ProdLine Mbsh_MCProd Freq
n n     281
  n C   479
  n M   252
  n R   689
y n   M 175
  n   R 151
  n C R 123
  n M   143
  n M M 7
  n R   243
  y C C 61
  y C M 191
  y M   175
  y R C 235

Saturday, May 05, 2007

Mixing Means and PctNs in a column in PROC REPORT

In a posting to SAS-L (), Janina Nowles asked how to create a report where a column contains PctN at the detail level but the Mean at the summary level. The code below comes pretty close; the data required some preprocessing to make it suitable.

dm 'clear log; clear out;';

proc format; 
   value ldfmt 
      9 ='Like Extremely' 8 ='Like very much' 
      7 ='Like moderately' 6 ='Like slightly' 
      5 ='Neither like nor dislike' 4 ='Dislike slightly' 
      3 ='Dislike moderately' 2 ='Dislike very much' 
      1 ='Dislike extremely'
      ;
run;

data raw; 
   infile cards; 
   input id sample q1 q2; 
   label q1='constr' q2='extrus'; 
cards; 
1 558 6 8 
1 911 5 6 
2 911 8 8 
2 558 7 7 
3 558 7 8 
3 911 8 8 
4 911 7 7 
4 558 7 7 
5 558 7 6 
5 911 8 9 
6 911 6 8 
6 558 6 8
;;;; 

data normal;
   set raw;
   array qnames{2} $ _temporary_ ('q1' 'q2');
   array qvals{2} q1 q2;
   do i = 1 to 2;
      q = qnames{i};
      value = qvals{i};
      select (sample);
         when (558) sample_558 = value;
         when (911) sample_911 = value;
         otherwise  error 'Unknown sample type';
      end;
      output;
   end;
   drop q1 q2 i;
run;

proc report data=normal missing nowindows nocenter split='!';

   column q value ('- Sample -' 
                   sample_558 sample_558=sample_558_n show_558 
                   sample_911 sample_911=sample_911_n show_911);

   define q / group;
   define value        / group format=ldfmt. order=internal descending;
   define sample_558   / analysis mean format=10.0 '558 Mean' noprint;
   define sample_911   / analysis mean format=10.0 '911 Mean' noprint;
   define sample_558_n / analysis n format=10.1 '558 N' noprint;
   define sample_911_n / analysis n format=10.1 '911 N' noprint;
   define show_558     / computed format=10.0 '558!%';
   define show_911     / computed format=10.0 '911!%';
   
   compute before q;
      n_558 = sample_558_n;
      n_911 = sample_911_n;
   endcomp;

   break after q / summarize skip dol;

   compute show_558;
      if _break_ = ' ' then 
         do;
         if min(sample_558_n, n_558) = 0 then 
            show_558 = .;
         else 
            show_558 = 100 * sample_558_n / n_558;         
         end;
      else 
         do;
         show_558 = sample_558.mean;
         call define(_col_, 'format', '10.1');
         end;
   endcomp;

   compute show_911;
      if _break_ = ' ' then 
         do;
         if min(sample_911_n, n_911) = 0 then 
            show_911 = .;
         else 
            show_911 = 100 * sample_911_n / n_911;         
         end;
      else 
         do;
         show_911 = sample_911.mean;
         call define(_col_, 'format', '10.1');
         end;
   endcomp;

   compute after q;
      q = 'Mean';
   endcomp;

run;

Here is the output:

                                      ------- Sample -------
                                             558         911
  q                            value           %           %
  q1        Like very much                     .          50
            Like moderately                   67          17
            Like slightly                     33          17
            Neither like nor dislike           .          17
  ========                            ==========  ==========
  Mean                                       6.7         7.0

  q2        Like Extremely                     .          17
            Like very much                    50          50
            Like moderately                   33          17
            Like slightly                     17          17
  ========                            ==========  ==========
  Mean                                       7.3         7.7

Saturday, April 28, 2007

Dynamically generated PIPEs

data _null_;
   command = 'nslookup 207.255.105.92';
   infile ns1 pipe filevar=command end=eod1;
   do until (eod1);
      input;
      put '(1) ' _infile_;
   end;
   command = 'nslookup 207.255.105.93';
   infile ns2 pipe filevar=command end=eod2;
   do until (eod2);
      input;
      put '(2) ' _infile_;
   end;
   stop;
run;

data _null_;
   do command = 'nslookup 207.255.105.92', 'nslookup 207.255.105.93';
      eod = 0;
      infile nslookup pipe filevar=command end=eod;
      do until (eod);
         input;
         put command ':' _infile_;
      end;
   end;
   stop;
run;

prints

332  data _null_;
333     command = 'nslookup 207.255.105.92';
334     infile ns1 pipe filevar=command end=eod1;
335     do until (eod1);
336        input;
337        put '(1) ' _infile_;
338     end;
339     command = 'nslookup 207.255.105.93';
340     infile ns2 pipe filevar=command end=eod2;
341     do until (eod2);
342        input;
343        put '(2) ' _infile_;
344     end;
345     stop;
346  run;

NOTE: The infile NS1 is:
      Unnamed Pipe Access Device,
      PROCESS=nslookup 207.255.105.92,RECFM=V,
      LRECL=256

(1) Server:  spoke.dcn.davis.ca.us
(1) Address:  168.150.253.2
(1)
(1) Name:    207-255-105-092-dhcp.unt.pa.atlanticbb.net
(1) Address:  207.255.105.92
(1)
NOTE: The infile NS2 is:
      Unnamed Pipe Access Device,
      PROCESS=nslookup 207.255.105.93,RECFM=V,
      LRECL=256

(2) Server:  spoke.dcn.davis.ca.us
(2) Address:  168.150.253.2
(2)
(2) Name:    207-255-105-093-dhcp.unt.pa.atlanticbb.net
(2) Address:  207.255.105.93
(2)
NOTE: 6 records were read from the infile NS1.
      The minimum record length was 0.
      The maximum record length was 51.
NOTE: 6 records were read from the infile NS2.
      The minimum record length was 0.
      The maximum record length was 51.
NOTE: DATA statement used (Total process time):
      real time           0.75 seconds
      cpu time            0.01 seconds


347
348  data _null_;
349     do command = 'nslookup 207.255.105.92', 'nslookup 207.255.105.93';
350        eod = 0;
351        infile nslookup pipe filevar=command end=eod;
352        do until (eod);
353           input;
354           put command ':' _infile_;
355        end;
356     end;
357     stop;
358  run;

NOTE: The infile NSLOOKUP is:
      Unnamed Pipe Access Device,
      PROCESS=nslookup 207.255.105.92,RECFM=V,
      LRECL=256

nslookup 207.255.105.92 :Server:  spoke.dcn.davis.ca.us
nslookup 207.255.105.92 :Address:  168.150.253.2
nslookup 207.255.105.92 :
nslookup 207.255.105.92 :Name:    207-255-105-092-dhcp.unt.pa.atlanticbb.net
nslookup 207.255.105.92 :Address:  207.255.105.92
nslookup 207.255.105.92 :
NOTE: The infile NSLOOKUP is:
      Unnamed Pipe Access Device,
      PROCESS=nslookup 207.255.105.93,RECFM=V,
      LRECL=256

nslookup 207.255.105.93 :Server:  spoke.dcn.davis.ca.us
nslookup 207.255.105.93 :Address:  168.150.253.2
nslookup 207.255.105.93 :
nslookup 207.255.105.93 :Name:    207-255-105-093-dhcp.unt.pa.atlanticbb.net
nslookup 207.255.105.93 :Address:  207.255.105.93
nslookup 207.255.105.93 :
NOTE: 6 records were read from the infile NSLOOKUP.
      The minimum record length was 0.
      The maximum record length was 51.
NOTE: 6 records were read from the infile NSLOOKUP.
      The minimum record length was 0.
      The maximum record length was 51.
NOTE: DATA statement used (Total process time):
      real time           0.75 seconds
      cpu time            0.03 seconds

Friday, April 27, 2007

Dynamic formats in PROC REPORT

This code shows how to apply a dynamically defined format in PROC REPORT. In this simplified example, the format will be 10.0 if the variable DECIMALS has a value of 0, 10.1 if DECIMALS has a value of 1, and so forth.

data test (keep=decimals value displayed);
   do i = 1 to 5;
      do decimals = 0 to 3;
         value = ranuni(95605) * 10;
         displayed = value;
         output;
      end;
   end;
   stop;
run;
 
proc report data=test nowindows missing nocenter;
   columns value decimals displayed;
   define _all_ / display;
   compute displayed;
      call define(_col_, 'format', '10.' || put(decimals, z1.0));
   endcomp;
run;

prints:

      value   decimals  displayed
  3.4648784          0          3
  3.3165982          1        3.3
  2.0278367          2       2.03
  8.0749122          3      8.075
  8.1445687          0          8
  7.6102304          1        7.6
  5.2384895          2       5.24
  3.8716015          3      3.872
  5.5787562          0          6
   0.951658          1        1.0
  9.5194012          2       9.52
  6.4120504          3      6.412
  7.6308912          0          8
  1.8146458          1        1.8
  2.6590652          2       2.66
   2.289426          3      2.289
  0.5009372          0          1
  7.2236493          1        7.2
  6.0467117          2       6.05
  2.5131059          3      2.513

Thursday, March 22, 2007

Simple example using "hash" object (associative array)

This is a simple example of the use of the "hash" or associative array object in the SAS version 9 data step. This example doesn't accomplish anything except demonstrate the syntax.

data test;
   length outname $5.;
   do outname = 'one', 'two', 'three';
      do i = 1 to 5;
         random = ranuni(95605);
         output;
      end;
   end;
run;

data _null_;

   if _n_ = 1 then 
      do;
      length outname $5.;
      length i random 8.;
      /* Read TEST data set */
      declare hash test(dataset: 'test');
      /* Define OUTNAME as key and I, RANDOM as data for hash object */
      test.defineKey('outname', 'i', 'random');
      test.defineData('outname', 'i', 'random');
      test.defineDone();
      /* avoid uninitialized variable notes */
      call missing(outname, i, random);
      end;

   /* Create output data set from hash object */
   rc = test.output(dataset: 'work.out');
   stop;

*****; run;

proc print data=work.out;
run;

prints

Obs    outname    i     random

  1     three     2    0.64121
  2     one       4    0.80749
  3     three     1    0.95194
  4     one       2    0.33166
  5     three     3    0.76309
  6     one       5    0.81446
  7     two       5    0.09517
  8     one       3    0.20278
  9     three     4    0.18146
 10     two       1    0.76102
 11     two       3    0.38716
 12     three     5    0.26591
 13     two       4    0.55788
 14     one       1    0.34649
 15     two       2    0.52385

Saturday, March 17, 2007

Writing to an in-memory array with PUT

This code writes to an in-memory array using the PUT statement. Although there ought to be a builtin way to do this (there was in the data step language's model, PL/I), there's not. This provides a workaround.

The general idea is create a memory buffer using FILE and INFILE with SHAREBUFFERS; when the file is written to, the value of the automatic variable _INFILE_ is set. You can then copy _INFILE_ to a member of your array. Only one actual I/O operation takes place, because everything is buffered.

The same technique works for a single variable; this example uses an array because that's what I needed when I created the original code.

filename buffer catalog 'work.io.buffer.source' lrecl=100 recfm=f;

data _null_;
   file buffer lrecl=100 recfm=f;
   put ' ';
run;

options stimer fullstimer;

data _null_;

   array claims (1000) $100. _temporary_ (1000 * ' ');

   infile buffer sharebuffers lrecl=100 recfm=f;
   file buffer;

   input @@;  /* Make sure buffer exists */

   do i = 1 to 1000;
      put @1 'a b c ' i comma6.0 @@;
      claims(i) = _infile_;
   end;

   file log; 

   do i = 1 to 5, 996 to 1000;
      put i= claims{i}=;      
   end;

   stop;

run;

See also the documentation for the _INFILE_ and _FILE_ variables. On some cases, _FILE_ will be all you need.

Case-insensitive Sorting

If you want to sort a field in a case-insensitive way (for use by humans, not computers), there are several ways you can do it.

options nocenter nodate nonumber;
data test;
infile cards;
input sometext $8.;
cards;
abcd
ABCD
aBcD
efgh
EFGH
111
a
Z
A
z
;;;; run;

proc sort data=test out=test_default;
   by sometext;
run;

title 'Default Sort Order';
proc print data=test_default;
run;

proc sort data=test out=test_lat1_lcs
          sortseq=lat1_lcs; 
   by sometext;
run;

title 'lat1_lcs Sort Order';
proc print data=test_lat1_lcs;
run;

proc sql;
   create table test_sql as
      select   *
      from     test
      order    by upcase(sometext);
quit;

title 'SQL Sort Order';
proc print data=test_sql;
run;

The result:

Default Sort Order

Obs    sometext

  1      111
  2      A
  3      ABCD
  4      EFGH
  5      Z
  6      a
  7      aBcD
  8      abcd
  9      efgh
 10      z

lat1_lcs Sort Order Obs sometext 1 111 2 a 3 A 4 abcd 5 ABCD 6 aBcD 7 efgh 8 EFGH 9 Z 10 z
SQL Sort Order Obs sometext 1 111 2 A 3 a 4 aBcD 5 abcd 6 ABCD 7 EFGH 8 efgh 9 z 10 Z

In this case, the last two sorts produce the same result. On an EBCDIC system, however, they would be different because of differences between ASCII encoding and EBCDIC encoding. Also, the first sort would produce different results on the two systems.

Writing a file with PROC SQL

It's hard to think of a good practical application for this code, but maybe one will come up. It shows the use of the file information functions in PROC SQL to write a flat file.

data test;
   input @1 value $8.;
cards;
abc
defg
hijklmno
;;;;

proc sql;

   select   F.filename_rc, 
            F.file_id, 
            D.value,
            fput  (f.file_id, 'value=' || value) as fput_rc, 
            fwrite(f.file_id) as fwrite_rc
   from
           (select   filename('myfile', 'c:\temp\myfile.txt') as filename_rc,
                     fopen('myfile', 'O', 256, 'v') as file_id
            from     test (obs=1)
            ) as F
   right join
            test as D
   on       1=1
   ;

quit;      

data _null_;
   infile 'c:\temp\myfile.txt';
   input;
   put 'INFO: myfile.txt: ' _infile_;
run;

prints in the log:

NOTE: The infile 'c:\temp\myfile.txt' is:
      File Name=c:\temp\myfile.txt,
      RECFM=V,LRECL=256

INFO: myfile.txt: value=abc
INFO: myfile.txt: value=defg
INFO: myfile.txt: value=hijklmno
NOTE: 3 records were read from the infile 'c:\temp\myfile.txt'.
      The minimum record length was 14.
      The maximum record length was 14.

Monday, March 12, 2007

Four methods of performing a look-ahead read

A question about determining whether the dates in two records are adjacent was posed to SAS-L.

As is frequently the case, the discussion quickly morphed to a related topic; in this case, how to do a look-ahead read.

I suggested the use of a second SET statement on the original data set, using the data set option FIRSTOBS=2. Richard DeVenezia submitted sample data and code using the BY statement; I submitted sample code that doesn't use a BY statement.

Here's the code to create the sample data:

data demo;
   input k $ val @@ ;
cards;
a 11 a 12 a 13
b 21
c 31 c 32
d 41 d 42 d 43 d 44
;

Here's my code:

data lookahead1 (drop=nextk);

   set demo end=endmain;

   if not endnext then 
      set demo (firstobs=2 
                keep=k val
                rename=(k=nextk val=nextval))
          end=endnext;

   if (k ne nextk) or endmain then 
      nextval = .;

run;

Here's Richard's code:

data lookahead2 (drop=i);

   set demo;
   by k; 

   if (first.k or not last.k) then
      do i = 1 to 1 + (first.k and not last.k);
         set demo(keep=val rename=(val=nextval) );
      end;
   if last.k then 
      nextval = .;

run;

Both methods produce the same output:

Obs    k    val    nextval

  1    a     11       12
  2    a     12       13
  3    a     13        .
  4    b     21        .
  5    c     31       32
  6    c     32        .
  7    d     41       42
  8    d     42       43
  9    d     43       44
 10    d     44        .

Ed Heaton later posted a method, credited to Mike Rhoads, of using MERGE for the same effect, without the need to do a special check for the last observation:

Options mergeNoBy=noWarn ;
Data lookahead1( drop=nextk ) ;
   Merge
      demo
      demo(
         firstObs=2
         rename=( k=nextK val=nextVal )
      )
   ;
   ...
Run ;
Options mergeNoBy=error ;

Howard Schreier posted generalized code which looks ahead an arbitrary number of observations:

%let lookdepth = 2;

data lookahead&LOOKDEPTH(drop = many count close_to_last i);

   do many = 1 by 1 until (last.k);
      set demo(keep=k);
      by k;
      end;
   do count = 1 to many;
      set demo;
      by k;
      close_to_last = (many - count) < &LOOKDEPTH ;
      if (first.k or not close_to_last) then
       do i = 1 to 1 + first.k * min(&LOOKDEPTH,many-1);
         set demo(keep=val rename=(val=ahead&LOOKDEPTH.val) );
         end;
      if close_to_last then do;
         ahead&LOOKDEPTH.val = .;
         end;
      output;
      end;

run;

Thursday, February 22, 2007

Counting across variables

On SAS-L, 2/22/2005, Laughing Beggar asked about counting the occurence of strings in variables:

These 3 must be easy, but I am forced to defer to the wisdom of the group. My data set has, say, ten character variables. Each variable can have a text value that starts with a letter of the aplhabet and is then followed by some numbers (eg A24.6). What I want to do is count the number of times a string starting with eg "A" appears (1) in the dataset, (2) for each record and (3) in each variable. I also need to do this for all the other letters of the alphabet. So for (2), for example, I'd like a new variable created for each letter of the alphabet that has the count of the number of times that letter appears as the leading character in the string across the ten variables. All hints gratefully accepted. SAS V8.02, WIN-XP.

Arrays are one solution to this problem.

The code can probably be made simpler, but something like this will do it:

data test;
   infile cards;
   input (var1-var5) ($);
   cards;
A1 B1 A1 Z2 C1
A1 A1 A1 A1 A1
B1 B2 A1 T2 D1
B1 B2 B3 B4 Z5
;;;;

data colcounts (keep= c1_c1-c1_c26  c2_c1-c2_c26  
                      c3_c1-c3_c26  c4_c1-c4_c26  
                      c5_c1-c5_c26)
     rowcounts (keep= r1-r26)
     dscounts  (keep=d1-d26);
   retain alpha 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';  
   drop alpha;

   set test end=end;

   array vars var1-var5;
   array letters_in_dataset{26} d1-d26;
   array letters_in_columns{5, 26} c1_c1-c1_c26  
                                   c2_c1-c2_c26
                                   c3_c1-c3_c26
                                   c4_c1-c4_c26
                                   c5_c1-c5_c26;
   array letters_in_row{26} r1-r26;

   do p = 1 to 26;
      letters_in_row{p} = 0;
   end;

   do v = 1 to 5;
      do p = 1 to 26;
         letters_in_dataset(p) + (vars{v} =: substr(alpha, p, 1));
         letters_in_row(p) + (vars{v} =: substr(alpha, p, 1));
         letters_in_columns(v, p) + (vars{v} =: substr(alpha, p, 1));
      end;
   end;

   output rowcounts;

   if end then 
      output colcounts dscounts;

run;
;

Saturday, February 17, 2007

PROC REPORT with numeric calculated total title

This is very similar to PROC REPORT and PROC TABULATE with ratios and total titles, except that it uses a numeric variable for Sex and shows only PROC REPORT code.

options nocenter;

data test;
   do Sex = 1, 2, 2;
      do i = 1 to 847;
         Savings = round(ranuni(12345)*50, .01);
         Chgs = savings + round(ranuni(12345)*40, .01) +
(Sex=1)*Savings*.2;
         output;
      end;
   end;
   drop i;
   format chgs savings comma10.2;
run;

proc format;
   value sex
      1  = 'Male'
      2  = 'Female'
      ._ = 'Total'
      other = 'Unknown'
      ;
   picture tabpct
      0-100 = '009.9%'
      ;
run;

title 'Output from PROC REPORT';

proc report data=test 
     missing nowindows;

   column sex csex savings=nobs savings savings=meansavings chgs
chgs=meanchg SaveRate;

   define sex         / noprint group width=8;
   define csex        / computed 'Sex' format=sex.;
   define nobs        / n format=comma5.0 'N';
   define chgs        / sum;
   define meanchg     / mean 'Mean Chrgs';
   define savings     / sum;
   define meansavings / mean 'Mean Savings';
   define SaveRate    / computed format=percent7.1 'Savings Rate';

   compute saverate;
      saverate = savings.sum / chgs.sum;
   endcomp;

   compute csex;
      if _break_ = '_RBREAK_' then
         csex = ._;
      else
         csex = sex;
   endcomp;

   rbreak after / summarize;

run;

The output:

Output from PROC REPORT

SexNSavingsMean SavingsChgsMean ChrgsSavings Rate
Male 847 21,193.54 25.02 42,496.27 50.17 49.9%
Female 1,694 43,075.18 25.43 77,354.14 45.66 55.7%
Total 2,541 64,268.72 25.29119,850.41 47.17 53.6%

An example of using multiple ACROSS variables in PROC TABULATE

From Richard de Venezia, slightly modified:

data abc;

   group = 1;

   idx = '1'; x = 1.1;
   idy = 'a'; y = 10.10;
   output;

   idx = '2'; x = 2.2;
   idy = 'b'; y = 11.11;
   output;

   idx = '3'; x = 3.3;
   idy = 'c'; y = 13.13;
   output;

run;

proc report nowindows data=abc;
  columns group (idx, x) (idy, y);
  define group / display group;
  define idx / across ;
  define idy / across ;
  define x / sum;
  define y / sum;
run;

Tuesday, February 13, 2007

Using PROC TRANSPOSE to get a list of variable names

I'm not a fan of PROC TRANSPOSE; there's almost nothing it can do that I can't do in a data step or PROC SQL. But back in August of 2004 on SAS-L, Michael Murff proposed a use of PROC TRANSPOSE which even I would support:
proc transpose data=sasuser.iris (obs=0)
               out=xnames (keep=_name_);
    var _all_;
run;
I added "obs=0" to the original proposal to speed processing, and Ya Huang added "var _all_" to get all variables, not just numeric ones.

Wednesday, January 24, 2007

Sending a page or text message from SAS

You might have occasion to send a page or a text message from a SAS program. That's easy to do if you know the email address used by the paging carrier. You can usually get this information on the carrier's web page. For example, the address used for Verizon Wireless pagers and cell phones is number@vtext.com. There's a list of the major carriers and their gateway addresses at . You have to set some system options before starting SAS if you want to use the email engine (I'm not sure why this has to be done at invocation, but it does). Here's a fragment of my sasv9.cfg file:
-emailsys smtp
-emailhost smtp.mail.yahoo.com
-emailid kd6ttl@yahoo.com
-emailpw xxxxx
-emailauthprotocol login
Here's code I could put in my SAS program:
filename textmsg email '9999999999@vtext.com' 
         subject='Program notification';                               
                           
                                                                       
                                                                
data _null_;                                                           
                                                                
   file textmsg;                                                       
                                                                
   put 'Program ABC completed';                                        
                                                                
run; 
A few seconds after I run this program (with a real phone number and email password, of course), the message appears on my cell phone. I've used a similar technique to send messages to a pager. There are some cautions, of course. A logical place to put this code would be in the TERMSTMT system option under version 9, but if something sufficiently bad happens the termination code will not be executed. It's always possible that the mail system or your internet connection is out of service. And not all pagers handle lower case letters correctly, so this is one of the very rare cases when it's ok to put text in ALL CAPS.

Using a dynamic style directive in PROC REPORT

This example was written in response in a question on SAS-L. Someone wanted to know how to force a line split within an cell. As it happens, the mechanism to do that is the same mechanism used to make part of the output bold or in a different font - ODS.

This should work for RTF and PDF output in SAS 8.2 and later (maybe earlier, but I can't check), and for HTML in 9.1.3 (but HTML in 8.2 doesn't work right).

data x;                                                                                                                                 
  text1='hello';                                                                                                                        
  text2='everyone';                                                                                                                     
  output;                                                                                                                               
run;                                                                                                                                    
                                                                                                                                        
ods listing close;                                                                                                                      
ods rtf file='h:\temp\test.rtf';                                                                                                        
ods pdf file='h:\temp\test.pdf';                                                                                                        
ods escapechar='^';                                                                                                                     
                                                                                                                                        
proc report data=x nowindows missing;                                                                                                   
  column text1 text2 text;                                                                                                              
  define text1-text2 / noprint;                                                                                                         
  define  text / computed;                                                                                                              
  compute text / character length=200;                                                                                                  
     text = '^S={font_face=courier font_weight=bold}' || text1
            || ' ^n^S={font_face=times font_style=italic}' || text2;                 
  endcomp;                                                                                                                              
run;                                                                                                                                    
                                                                                                                                        
ods rtf close;                                                                                                                          
ods pdf close;                                                                                                                          
ods listing;                                                                                                                            

The key here is the use of style directives. ^S tells SAS that style information will follow, and ^n means "insert a line break".

Using htmSQL in a batch SAS job

One of my interests lately has been in figuring out ways to create master/detail (invoice/lineitem, etc.) reports in SAS. I've concentrated on what can be done in base SAS.

Although it's not a base product, SAS/Intrnet also has a way to produce master/detail reports - the htmSQL processor. It takes an HTML page marked up with special tags and expands the tags using data from SAS. This is typically done though a web server, but if you have SAS/Intrnet licensed you can use the module outside of the web. You can produce only HTML (or other text-based output), but even so this has the potential to be a powerful tool for certain uses. This usage is documented, but I haven't seen a usage in the wild, so I'm posting an example.

I haven't done anything at all fancy with the HTML; it's just the bare minimum. Also, this doesn't show the hoops I had to jump through to get all the file permissions set right, but they would be site-dependent anyway.

%let htmsqlpgm = /opt/nethome1/datawebapps/sas-cgi/htmSQL;

/**** Set up data ****/
options compress=no nocenter nodate nonumber;

libname me '/usr/users/hamiltja';

data me.Invoices;

  infile cards;

  input @1  Invoice_Num   3.
        @5  Customer      $14.
        @20 Invoice_Date  date9.;

  format Invoice_Date worddatx12.;

cards;
101 Hugo Furst     01Jan2004
102 Freida Peeples 15Jan2004
103 Al E. Loohah   30Jan2004
;;;;

data items;

  infile cards;

  length Item $17.;

  input @1  Item  $10.
        @12 Cost  10.2;

  format cost comma10.2;

cards;
Widget     12.00
Gadget     10.00
Frammet    36.00
Gizmo      50.37
Whatsit    100.00
ItsIts     1.00
Thingie    37.56
Sprocket   13.00
Geehaws    5.76
HooHaas    22.22
DingDing   37.54
Thing      79.43
HoopyDoopy 40.01
Mairsy     67.66
Doat       77.21
Doesy      85.31
Divy       49.37
;;;;

data me.LineItems;

  keep Invoice_Num Line_Num Item Cost;

  set me.invoices (keep=invoice_num);

  length size1-size5 $6.;
  length item $30.;

  array sizes{5} size1-size5 (' ' 'Small' 'Medium' 'Large' 'Jumbo');

  Line_Num = 0;

  do i = 1 to 6;
     if ranuni(95819) > .2 then
        do;
        set items (rename=(Item=ItemName Cost=ItemCost))point=i;
        do j = 1 to 5;
           if ranuni(40324) > .2 then
              do;
              size = sizes(j);
              item = catx(' ', size, itemname);
              cost = itemcost * j;
              line_num + 1;
              output;
              end;
        end;
        end;
  end;

run;

proc datasets library=me nolist;
  modify LineItems;
     index create Invoice_Num;
run; quit;

/**** The Real Work ****/

/* Create a file containing the htmSQL we want to use. */

filename hsql '/usr/users/hamiltja/batchweb.hsql';

data _null_;
  infile cards;
  file hsql;
  input;
  put _infile_;
cards4;
<html>
  <body>
  <h1>Invoices and Items</h1>

  {query server="mtshrtst"}
  {library sqlname="invoice" path="/usr/users/hamiltja"}

  {sql}
  select   Invoice_Num,
           Customer,
           Invoice_Date
  from     invoice.invoices
  order    by invoice_num
  {/sql}

  {eachrow}
     <h2>Invoice {&INVOICE_NUM} for {&CUSTOMER} dated {&INVOICE_DATE}</h2>

     {sql}
     select   Line_Num,
              Item,
              Cost
     from     invoice.lineitems
     where    invoice_num = {&INVOICE_NUM}
     order    by line_num
     {/sql}

     <table border="1">
        <tr>{label var="{&sys.colname[*]}" 
                         before="<th>" 
                         between="</th><th>" 
                         after="</th>"}</tr>
        {eachrow}
        <tr>{&{&sys.colname[*]}
                      before="<td>" 
                      between="</td><td>" 
                      after="</td>"}</tr> {/eachrow}
     <table>
  {/eachrow}

  {/query}

  </body>
</html>
;;;;

filename htmsql pipe 
         "&HTMSQLPGM. /usr/users/hamiltja/batchweb.hsql '_debug=0'";
filename webout '/usr/users/hamiltja/batchweb.html';


/* Run program through pipe.  Throw away first four lines of output, */
/* which contain header information used by browsers.                                                                         */
data _null_;
  file webout;
  infile htmsql lrecl=1024 firstobs=4;
  input;
  put _infile_;
run;

Here's the first part of the output, with extraneous blank lines deleted:

<html>

  <body>

  <h1>Invoices and Items</h1>

     <h2>Invoice      101 for Hugo Furst dated 01JAN2004  </h2>
     <table border="1">
        <tr><th>Line_Num</th><th>item</th><th>cost</th></tr>
        <tr><td>       1</td><td>Widget</td><td>      12</td></tr>
        <tr><td>       2</td><td>Small Widget</td><td>      24</td></tr>
        <tr><td>       3</td><td>Medium Widget</td><td>      36</td></tr>
        <tr><td>       4</td><td>Small Gadget</td><td>      20</td></tr>

Friday, January 19, 2007

Using the mtPrivatizeVars macro in htmSQL

Here's how I would use the macro in an htmSQL page. The page is designed to display arbitrary data sets (for a particular data set, I could just hard code everything), and assumes that variables LIBRARY and TABLE have been passed as parameters. I'm not showing the queery statement or surrounding HTML here:

{sql}
select   resolve('%mtprivatizesqlvars(data={&LIBRARY}.{&TABLE})') 
            as selectstring length=32000
from     dictionary.columns
where    libname = upcase('{&LIBRARY}') 
         and upcase(memname) = upcase('{&TABLE}') 
         and memtype = 'DATA'
         and varnum = 1
{/sql}

{sql}
select    {&SELECTSTRING}
from     {&LIBRARY}.{&TABLE}
{/sql}

<table border="1">
   <tr><th>Name</th>{&SYS.COLNAME[*] before="<th>" between="</th><th>" after="</th>"}</tr>
   <tr><th>Label</th>{label var="{&SYS.COLNAME[*]}" before="<th>" between="</th><th>" after="</th>"}</tr>
  {eachrow}
   <tr><th>{&SYS.QROW}</th>{&{&SYS.COLNAME[*]} before="<td>" between="</td><td>" after="</td>"}</tr>
  {/eachrow}
</table>

The first block of code captures the output of the mtprivatizesql macro into an htmSQL variable. The macro must have been stored in a macro search path known to the htmSQL processor. The key here is the use of the RESOLVE function, which returns the results of evaluating a string. I'm assuming a reasonable number of variables (something which is under my control), so the results will fit into the selectstring length of 32,000 bytes. By the way, this doesn't work in SAS 8.2, where you'll get only the first 200 characters of the result.

The second block of code selects the records to display.

The third block of code displays the records. I've chosen to display two header lines, the first showing the variable names and the second showing the variable labels.

By the way, there are other checks, not shown, which prevent the end user from changing the URL to display an arbitrary table anywhere on the system.

Making Variable Values Private in SQL

Here's another usage of the file info functions. I wrote this because I wanted to display claims records on a web page, but without any personal information. Various data sets might be displayed, and they might not all have all of the variables I wanted to make private. I didn't want to write a separate SQL statement for each of the dozens of data sets, so I wrote a utility macro.

Task: Given a data set which contains soc_sec_num and other variables, create a select clause which masks soc_sec num but displays the other variables normally. Extend that to an arbitrary number of privatized variables and a large number of data set variables.

Here's my solution:

%macro mtPrivatizeSqlVars(
      data=,
      privatevars=soc_sec_num prime_prsn_ssn last_nm first_nm,
      newvalues=0 0 Lastname Firstname);
/* 10-Dec-2004 Jack Hamilton                                   */
/* Macro:      mtPrivatizeSqlVars                              */
/*                                                             */
/* Purpose:    Create a list of variables in a data set,       */
/*    separated by commas and suitable for use in an SQL       */
/*    SELECT statement, where values of specified vars are     */
/*    replaced by a fixed string.                              */
/*                                                             */
/* Parameters:                                                 */
/*    Data - the name of the dataset (libref.member)           */
/*    PrivateVars - list of variables to be privatized,        */
/*             separated by spaces                             */
/*    NewValues - values to be used instead of private values  */
/*                                                             */

   %local i j dsid nvars varlist thisvarname lowvarname varlabel vartype privatewordpos newtext thisword;

   %let privatevars = %lowcase(&PRIVATEVARS.);

   %let varlist = ERROR IN mtPrivatizeSqlVars MACRO;

   /* Open the dataset.  */

   %let dsid = %SYSFUNC(open(&DATA., I));
   %if &DSID = 0 %then
      %do;
      %let varlist = ERROR IN mtPrivatizeSqlVars MACRO: %sysfunc(sysmsg());
      %put ERROR: &VARLIST.;
      %goto EXIT;
      %end;

   %let nvars = %SYSFUNC(attrn(&DSID., nvars));
   %let varlist = ;

   %do i = 1 %to %EVAL(&NVARS. - 1);
      %let thisvarname = %SYSFUNC(varname(&DSID. ,&I.));
      %let lowvarname = %LOWCASE(&thisvarname);
      %let privatewordpos = 0;

      /* Find location of varname in private list. */
      %do j = 1 %to 999;
         %let thisword = %lowcase(%scan(&PRIVATEVARS., &J., %str( )));
         %if "&THISWORD." = "" %then %goto nomorewords;
         %if "&THISWORD." = "&LOWVARNAME." %then
            %do;
            %let privatewordpos = &J.;
            %goto nomorewords;
            %end;
      %end;
      %NOMOREWORDS:

      %if &PRIVATEWORDPOS. = 0 %then
         %let varlist = &VARLIST. &THISVARNAME. ,;
      %else
         %do;
         %let varlabel = %SYSFUNC(varlabel(&DSID. ,&I.));
         %let vartype = %SYSFUNC(vartype(&DSID. ,&I.));
         %let newtext = %scan(&NEWVALUES., &PRIVATEWORDPOS., %str( ));
         %if "&VARLABEL." = "" %then %let varlabel = &THISVARNAME.;
         %if &VARTYPE. = N %then
            %let varlist = &VARLIST. &NEWTEXT. as &THISVARNAME. label="&VARLABEL.", ;
         %else
            %let varlist = &VARLIST. "&NEWTEXT." as &THISVARNAME. label="&VARLABEL.", ;
         %end;
   %end;

   %let thisvarname = %SYSFUNC(varname(&DSID. ,&I.));
   %let lowvarname = %LOWCASE(&thisvarname);
      %if %sysfunc(indexw(&PRIVATEVARS., &LOWVARNAME.)) = 0 %then
         %let varlist = &VARLIST. &THISVARNAME.;
      %else
         %do;
         %let varlabel = %SYSFUNC(varlabel(&DSID. ,&I.));
         %let vartype = %SYSFUNC(vartype(&DSID. ,&I.));
         %let newtext = %scan(&NEWVALUES., &PRIVATEWORDPOS., %str( ));
         %if "&VARLABEL." = "" %then %let varlabel = &THISVARNAME.;
         %if &VARTYPE. = N %then
            %let varlist = &VARLIST. &NEWTEXT. as &THISVARNAME. label="&VARLABEL." ;
         %else
            %let varlist = &VARLIST. "&NEWTEXT." as &THISVARNAME. label="&VARLABEL." ;
         %end;

   /* Close the dataset.                                          */

%EXIT:

   %let rc = %SYSFUNC(close(&DSID.));

   &VARLIST.

%mend mtPrivatizeSqlVars;

data test;
   last_nm = 'Hamilton'; soc_sec_num = 123456789; 
   incrd_da = '12dec2004'd; paid_amnt = 75.00; 
   proc_cd = '99213';
   format incrd_da worddatx12.;
   output;
run;

%let changedvars = %mtprivatizesqlvars(data=test);
%put &CHANGEDVARS.;

proc sql;

   select   &CHANGEDVARS.
   from     test
   ;

quit;

Relevant log:

2023
2024  %let changedvars = %mtprivatizesqlvars(data=test);
2025  %put &CHANGEDVARS.;
"Lastname" as last_nm label="last_nm", 0 as soc_sec_num label="soc_sec_num", incrd_da , paid_amnt , proc_cd

Output:

last_nm   soc_sec_num      incrd_da  paid_amnt  proc_cd
Lastname            0   12 Dec 2004         75  99213

I then use the macro in an htmSQL page. htmSQL works better in this case than a SAS program in the broker would, because it's easy to generate a table with two header rows, one for the variable name and one for the variable label.

Thursday, January 18, 2007

PROC COMPUTAB Example

A Simple Example of PROC COMPUTAB
data example;
input year sales cost;
cards;
1988 83 52
1989 106 85
1990 120 114
;;;; run;

ods listing;
proc computab data=example; columns y1988 y1989 y1990 total; rows sales cost gprofit pctprofit; gprofit = sales - cost; y1988 = (year = 1988); y1989 = (year = 1989); y1990 = (year = 1990); col: total = y1988 + y1989 + y1990; row: pctprofit = gprofit / cost * 100; *****; run;

Drug Days

This program calculates the number days after a discharge date that a patient was taking (or rather, had a prescription for) a drug.

data patients;
   input @1  patient_id 2.
         @4  discharge_da date9.;
   format discharge_da date9.;
cards;
1  01feb2001
2  01jun2001
3  31dec2001
;;;; run;

data drugs;
   input @1  patient_id 2.
         @4  fill_da date9.
         @14 rx_days 3.;
   format fill_da date9.;
cards;
1  01mar2001 31
1  05feb2001 90
1  01jun2001 90
2  01jan2001 90
2  01jun2001 90
2  01dec2001 90
;;;;; run;

%global daysbefore;
data _null_;
   length daysbefore $10000;
   daysbefore = ' ';
   do i = 720 to 1 by -1;
      daysbefore = trim(daysbefore) || ' daybefore' || put(i, 3.-L);
   end;
   call symput('DAYSBEFORE', daysbefore);
run; 
   
data info;

   merge patients 
         drugs (in=in_drugs);
      by patient_id;

   array drug_days{-720:720} &DAYSBEFORE 
         day0 daysafter1-daysafter720;
   retain &DAYSBEFORE day0 daysafter1-daysafter720;

   if first.patient_id then 
      do day = -720 to 720;
         drug_days{day} = 0;
      end;

   if in_drugs then 
      do day = (fill_da - discharge_da) to 
               (fill_da - discharge_da + rx_days - 1);
         drug_days{day} = 1;
      end;

   if last.patient_id then 
      do;
      days0_through_30 = sum(0, day0, of daysafter1-daysafter30);
      days31_through90 = sum(0, of daysafter31-daysafter90);
      output;
      end;

   keep patient_id discharge_da days0_through_30 days31_through90;

run;

proc report data=info nowindows split='/';
   columns patient_id discharge_da days0_through_30 days31_through90;
   define  patient_id       / order 'Patient/ID';
   define  discharge_da     / display width=10 'Discharge/Date';
   define  days0_through_30 / display width=10 'Days 0-30';
   define  days31_through90 / display width=10 'Days 31-90';
run;

prints:

Patient   Discharge
     ID        Date   Days 0-30  Days 31-90
      1   01FEB2001          27          60
      2   01JUN2001          31          59
      3   31DEC2001           0           0

FTP with some error checking

filename cmds temp;

data _null_;
   file cmds recfm=v;
   infile datalines;
   input;
   put _infile_;
datalines;
verbose
open ftp.excursive.com
user nogood nogood
ascii on
get index.html
quit
;;;;

/* Windows */
filename ftp pipe "type %SYSFUNC(pathname(cmds)) | ftp -i -n";
/* Unix */
*filename ftp pipe "cat %SYSFUNC(pathname(cmds)) | ftp -i -n";

data _null_;
   retain errfound 1.;
   infile ftp end=end;
   input;
   put 'INFO: ' _infile_;
   if _infile_ =: '226 Transfer completed' then 
      errfound = 0;
   if end and errfound then 
      do;
      put 'ER' 'ROR: FTP failed.';
      *abort abend 12;
      end;
   return;
run;

PROC REPORT with pseudo-observation numbers

/* Create pseudo-observation numbers, restarted at each break.  */

options ls=64;
options nodate;
title ' ';

proc report
        data=sasuser.iris
        headskip split='\'
        colwidth=12
        nowindows;

   column species seq sepallen sepalwid;

   define species  / order      'Species\--';
   define seq      / computed   'Seq\--';
   define sepallen / order      'Sepal Length\--';
   define sepalwid / sum        'Sepal Width\--';

   compute seq;
      if species ne ' ' and _break_ eq ' ' then
         obs_no = 1;
      else
         obs_no + 1;
      seq = obs_no;
   endcomp;

   break after species / page;

*****; run;

PROC REPORT and PROC TABULATE with ratios and total titles

options nocenter;

data test;
   do Sex = 'M', 'F', 'F';
      do i = 1 to 847;
         Savings = round(ranuni(12345)*50, .01);
         Chgs = savings + round(ranuni(12345)*40, .01) +
(Sex='M')*Savings*.2;
         output;
      end;
   end;
   drop i;
   format chgs savings comma10.2;
run;

proc format;
   value $osex
      'M' = 'Male'
      'F' = 'Female'
      'T' = 'Total'
      ;
   picture tabpct
      0-100 = '009.9%'
      ;
run;

title 'Output from PROC REPORT';

proc report data=test 
     missing nowindows;

   column sex csex savings=nobs savings savings=meansavings chgs
chgs=meanchg SaveRate;

   define sex         / noprint group width=8;
   define csex        / computed 'Sex';
   define nobs        / n format=comma5.0 'N';
   define chgs        / sum;
   define meanchg     / mean 'Mean Chrgs';
   define savings     / sum;
   define meansavings / mean 'Mean Savings';
   define SaveRate    / computed format=percent7.1 'Savings Rate';

   compute saverate;
      saverate = savings.sum / chgs.sum;
   endcomp;

   compute csex / character;
      if _break_ = '_RBREAK_' then
         csex = 'Total';
      else
         csex = sex;
   endcomp;

   rbreak after / summarize;

run;

title 'Output from PROC TABULATE';

proc tabulate data=test missing;

   class sex;
   var chgs savings;

   keylabel sum=' ';
   keylabel mean=' ';
   keylabel rowpctsum='Savings Rate';

   tables sex=' ' all='Total',
          n*format=comma5.0
          savings=' '*(sum='Savings' mean='Mean
Savings')*format=comma10.2
          chgs=' '*(sum='Chgs' mean='Mean Chgs')*format=comma10.2
          savings=' '*rowpctsum*format=tabpct.
        / box='Sex'
          ;

run;

The output:

Output from PROC REPORT

Sex N Savings Mean Savings Chgs Mean Chrgs Savings Rate
F 1,694 43,075.18 25.43 77,354.14 45.66 55.7%
M 847 21,193.54 25.02 42,496.27 50.17 49.9%
Total 2,541 64,268.72 25.29 119,850.41 47.17 53.6%

Output from PROC TABULATE

Sex N Savings Mean Savings Chgs Mean Chgs Savings Rate
F 1,694 43,075.18 25.43 77,354.14 45.66 55.6%
M 847 21,193.54 25.02 42,496.27 50.17 49.8%
Total 2,541 64,268.72 25.29 119,850.41 47.17 53.6%

Copy a ZIP file from the Internet and then read a member

filename dane url
'http://www.parkiet.com:80/dane/danesesji/bazytxt/akcje/zywiec.zip'
         proxy='http://slcpx01:8080';

filename zipcopy  "d:\temp\zywiec.zip";
filename zipfile  SASZIPAM "d:\temp\zywiec.zip";

data _null_;
   nbyte=-1;
   infile dane nbyte=nbyte recfm=s;
   input;
   file zipcopy recfm=n;
   put _infile_;
run;

data _null_;
   infile zipfile(ZYWIEC.txt);
   input;
   put _infile_;
   if _n_ > 10 then stop;
run;

Using the TRANSLATE function to validate data

data test;
   infile cards;
   input phone $14.;
cards;
(916) 555-1234
9165551234
916-555-1234
916 555-1234
916 555 1234
916 5551234
916555123499
1234
5551212
555-1212
(916) 555-HELP
;;;;

data validate;
   set test;
   pattern = translate(phone, '000000000', '123456789');
   if pattern in ('(000) 000-0000', '0000000000', '000-000-0000', 
                  '000 000-0000', '000 000 0000', '000 0000000') then 
      valid = '1';
   else
      valid = '0';

   put (_all_) (=);

run;

StatGraph Cowboy Hat

data hat (compress=no pointobs=no);
   do x = -5 to 5 by .25;
      do y = -5 to 5 by .25;
         z = sin(sqrt(x*x + y*y));
         output;
      end;
   end;
run;

proc template;
   define statgraph myhat;
      layout gridded;
         surfaceplot x=x y=y z=z;
      endlayout;
   end;
run;

ods graphics on;
ods pdf file='c:\temp\hat.pdf' notoc;

data _null_;
   set hat;
   file print ods=(template="myhat");
   put _ods_;
run;

ods pdf close;

produces a PDF file containing an image.

Another way to read an Ingres table from SAS version 9

SAS Version 9 doesn't support Access to Ingres, so we have to use a Connect session to version 8.

options autosignon;

rsubmit sascmd='sas8' remote=newton persist=yes;

   %sysrput v8worklib = %SYSFUNC(pathname(work));

   libname ocnrates ingres database="%SYSGET(MT_RPT_OCNRATES)"
access=readonly;

   proc sql stimer;

      create table cnvrsn_fctr_typ_r as
         select  cnvrsn_fctr_typ_cd,
                 cnvrsn_fctr_typ_desc
         from    ocnrates.cnvrsn_fctr_typ_r
         where   cnvrsn_fctr_typ_cd not in ('BYPAS', 'FACIL')
         order   by cnvrsn_fctr_typ_cd;

   quit;

endrsubmit;

libname work8 v8 "&V8WORKLIB.";

proc copy in=work8 out=work constraint=yes index=yes datecopy move
clone;
   select cnvrsn_fctr_typ_r;
run;

rsubmit;
   proc datasets;
   run; quit;
endrsubmit;

libname work8 clear;
signoff;

"options autosignon" tells SAS to start the remote session with the rsubmit, without an explicit signon command.

"rsubmit sascmd='sas8' remote=newton persist=yes" tells SAS to start a SAS v8 session on Newton, and not to close the session after the rsubmit block has finished executing. This keeps the remote work library available until a signoff command is executed.

"%sysrput v8worklib = %SYSFUNC(pathname(work))" tells SAS to create a macro variable V8WORKLIB in the local session whose value is the path to the work library of the remote session.

We then write some data to that work library.

"libname work8 v8 "&V8WORKLIB.";" creates a libref in the local session that points to the remote session's work library, where the results of the code we just executed are stored.

PROC COPY moves the selected tables from the remote library to the local library. The work library doesn't have to be WORK, of course. The other options are things I think we would usually want. For moving ordinary data sets, PROC COPY is probably better than PROC MIGRATE.

The next rsubmit block is only there to show that you can submit more remote code to execute; after the first time you on't have to specify any options, and the last active session will be used.

Finally, we clear the libname and close the remote session. If you don't do this explicitly, it will happen automatically when the job ends.

Wordcount using rxparse

Counting words in a string using SAS regular expression functions

data _null_;
   retain rx;
   infile cards eof=eof;
   input @1 string $char30.;

   if _n_ = 1 then
      do;
      rx = rxparse('" "* ((~" "+ " "+) #1)*');
      put @1 'Words'
          @7 'String';
      end;

   call rxsubstr(rx, string || ' ', pos, len, score);

   put @1  score   5.0-r
       @7  string  $char30.;

return;

EOF:

      put 'end of data';
      call rxfree(rx);

cards4;
123456789012345678901234567890
String
oneword
 oneword
two words
three words here
  big gaps a   b  c          d
a*b*c=d

xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
;;;;

Celko's median

This is Joe Celko's solution for calculating a median in SQL.

* From SQL For Smarties, Second Edition, by Joe Celko.  ;

* If there is no statistical median, the mean of the   ; 
* value just below and the value just above is used.   ;

data parts;
   input @1  pno    $2.
         @4  pname  $5.
         @10 color  $5.
         @16 weight 2.
         @19 city   $6.;
cards;
p1 Nut   Red   12 London
p2 Bolt  Green 17 Paris
p3 Cam   Blue  12 Paris
p4 Screw Red   14 London
p5 Cam   Blue  12 Paris
p6 Cog   Red   19 London
;;;; *****; run;

proc sql;

   select avg(distinct weight)
   from   (select f1.weight
           from   parts as f1,
                  parts as f2
           group  by f1.pno, f1.weight
           having sum(case when f2.weight = f1.weight then 1
                           else 0
                      end)
                  >=
                  abs(sum(case when f2.weight < f1.weight then 1
                               when f2.weight > f1.weight then -1
                               else 0
                          end)));

*****; quit;

Thursday, April 13, 2006

Parsing SYSPARM with infile magic

Here's a method of using "infile magic" to parse the value of the &SYSPARM macro variable, which is valued by setting the SYSPARM system option, typically on the command line or in JCL when you start SAS.

options sysparm='value=37.2 start=01jan2006, end=01mar2006';

data _null_;
   retain repeats 2.;
   infile cards;
   informat start end date9.;
   input @;
   _infile_ = translate(symget('sysparm'), ' ', ',');
   input start= end= value= repeats=; 
   format start end date9.;
   put (_all_) (=);
   stop;
cards;

;;;; 

prints:

start=01JAN2006 end=01MAR2006 value=37.2 repeats=2

A few comments:

  • The separator has to be a blank for named input. Commas are translated into blanks because under MVS a comma will appear in the value of SYSPARM if you use JCL continuation when specifying SYSPARM. This will, of course, present difficulties if you need to have a comma in one of your values.
  • You can assign a default value to a parameter by establishing its value before the INPUT statement. In this case, I used the RETAIN statement to set the value.
  • The parameters can appear in any order. Unfortunately, you can't tell (without reparsing the data) which order they did appear in.
  • If you include in SYSPARM a variable which does not appear in the INPUT statement, you'll get a note in the log and _ERROR_ will be set to 1.
  • In real life, you'd want to save the values somewhere, either in a SAS data set or with CALL SYMPUT/SYMPUTX.

Creating and emailing an Excel workbook on MVS

The sample code below shows how to create an "Excel workbook" on MVS and email it to yourself.

I put "Excel workbook" in quotes because it's not really a native Excel workbook. It's XML which Excel knows how to read. Before distributing a workbook created this way, you should open it in Excel and save it as a native document - recent versions of Excel can read the XML, but not everyone has upgraded.

(Future versions of Excel might use XML as their native format, in which case SAS might be able to create native format Excel files. But that hasn't happened as of spring 2006.)

//EMAILXLS JOB xxxxxx,'Email XLS',NOTIFY=&SYSUID,
//          MSGCLASS=X,CLASS=S
//*
/*ROUTE PRINT FETCH
//*
//S1REPORT EXEC SASPROD,
//            OPTIONS='ls=71 noovp nocenter'
//ODSTEXT   DD DSN=&SYSUID..ODSTEMP.XLS,DISP=(MOD,CATLG,CATLG),
//             LRECL=16392,RECFM=VB,SPACE=(16392,(1000,2000)),
//             UNIT=SYSWRK
//SYSIN DD *,DLM='\\'

/* Get latest copy of tagset from SAS web site */
filename tagset http
   'http://support.sas.com:80/rnd/base/topics/odsmarkup/excltags.tpl';
%include tagset / nosource2;

/* Empty the output file, in case this is a rerun. */
data _null_;
   file odstext old;
run;

ods tagsets.excelxp file=odstext record_separator=none style=sasweb;
ods listing close;

title 'Transfer';

data VolumeExpense;
   length PurchMC ProvMC Type $8.;
   format Cost comma10.2;
   do purchmc = 'Hay', 'Rch', 'Oak';
      do provmc = 'Oak', 'Hay', 'Rch';
         do type = 'Volume', 'Expense';
            cost = round(ranuni(94612)*100, .01);
            output;
         end;
      end;
   end;
run;

/* Print the first sheet */
ods tagsets.excelxp
      options(sheet_name='Output from PRINT'
      frozen_headers='1'
      row_repeat='1'
      );

proc print data=VolumeExpense;
run;

/* Print the second sheet, triggered by a new ODS statement  */
/* to the tagset without a new FILE=.                        */
ods tagsets.excelxp
      options(sheet_name='Output from REPORT'
      frozen_headers='3'
      row_repeat='1-3'
      autofilter='2'
      );

proc report data=VolumeExpense missing nofs nocenter
      completerows completecols;
   column purchmc type provmc, cost cost=totcost;
   define purchmc / group 'Purch MC' width=8 order=data;
   define type    / group 'Type'     width=8 order=data;
   define provmc  / across 'Prov MC' width=8 order=data;
   define Cost    / sum 'Cost';
   define totcost / sum 'Total Cost' width=10 format=comma10.2;
   compute provmc;
      if type = 'Expense' then
         call define(_col_, 'style', 'style={background=yellow}');
   endcomp;
run;

ods _all_ close;
ods listing;

/* Close and free the output file. */            
data _null_;                                      
   length filename $44.;                          
   file odstext mod close=free filename=filename;
   call symput('REPORTDSN', trim(filename));      
run; 

filename email email
               attach=("&REPORTDSN."
                       name='EmailXLS' extension='xls')
               to='Jack.Hamilton@kp.org'
               from='Jack.Hamilton@kp.org'
               subject='Excel test ';

data _null_;

   file email;

   put 'Results are attached.';

run;

filename email clear;

\

A few interesting features of this code:

//ODSTEXT   DD DSN=&SYSUID..ODSTEMP.XLS,DISP=(MOD,CATLG,CATLG),
//             LRECL=16392,RECFM=VB,SPACE=(16392,(1000,2000)),
//             UNIT=SYSWRK

This makes sure that there's a file to which to write the output. DISP=MOD tells the system to use an existing file if there is one, and to create a new one if there isn't.

/* Empty the output file, in case this is a rerun. */
data _null_;
   file odstext old;
run;

This makes sure the file is empty (which it wouldn't be if this were a rerun).

ods tagsets.excelxp
      options(sheet_name='Output from PRINT'
      frozen_headers='1'
      row_repeat='1'
      );

This tells SAS to create a new sheet (in this case, the first one), and specifies the sheet name and some header information.

ods tagsets.excelxp
      options(sheet_name='Output from REPORT'
      frozen_headers='3'
      row_repeat='1-3'
      autofilter='2'
      );

This tells SAS to start the second sheet, and specifies a different sheet name and header information. It also specifies an autofilter on the second column.

/* Close and free the output file. */            
data _null_;                                      
   length filename $44.;                          
   file odstext mod close=free filename=filename;
   call symput('REPORTDSN', trim(filename));      
run; 

This tells SAS to close and free the output data set. In the case of a new data set, it will be catalogued (ordinarily, it wouldn't be catalogued until the job step finished). Use of the FILENAME= option captures the data set name; this allows you to specify the data set name in only one place, the JCL, instead of in both the JCL and the SAS code.

filename email email
               attach=("&REPORTDSN." 
                       name='EmailXLS' extension='xls')
               to='Jack.Hamilton@kp.org'
               from='Jack.Hamilton@kp.org'
               subject='Excel test ';

This sends the newly written file as an attachment to email. Notice that SAS requires the actual catalogued data set name, not a DDname, which is why I had to free the file in the previous data step.

The resulting file is here.

Creating and emailing an RTF document under MVS

RTF is a document format which can be easily read by Microsoft Word, and by other applications such as OpenOffice.org on various hardware and software platforms. The program below shows sample JCL needed to create an RTF file containing a graph and a table, along with the resulting RTF file itself.

You could get much fancier, changing the font sizes and colors of various things, creating side-by-side graphics, and so forth, but I wanted to stick with a simple example.

p>I did want to point out the graph type I used, which is relatively new:

This is a radar chart, and it is often used to compare multiple values across multiple groups. In this example with randomly generated data, you can see that Sacramento and Oakland have higher medical costs than surgical costs, but the other locations have higher surgical costs than medical costs. PROC GRADAR is documented in the Online Docs, which contain several other examples.

Two other new graph types that may be of interest: GAREABAR lets you display the magnitude of two variables of interest, and there's a nifty example. GBARLINE lets you easily place a line chart on top of a bar chart.

Here's the JCL and source code; you will probably need to change the JCL to match your site's requirements (in particular, you will need to change the JOB card and possibly the name of the catalogued procedure in the EXEC card and the unit name in the ODSTEXT DD statement:

//EMAILRTF JOB xxxxxx,'EMAIL RTF',NOTIFY=&SYSUID,
//          MSGCLASS=X,CLASS=S
//*
/*ROUTE PRINT FETCH
//*
//S1REPORT EXEC SASPROD,
//          OPTIONS='ls=71 noovp nocenter errorabend errorcheck=strict'
//ODSTEXT   DD DSN=&SYSUID..ODSTEMP.RTF,DISP=(MOD,CATLG,CATLG),
//             LRECL=16392,RECFM=VB,SPACE=(16392,(1000,2000)),
//             UNIT=SYSWRK
//SYSIN DD *,DLM='\\'

options errorcheck=strict noovp nocenter;

data Costs;
   length PurchMC ProvMC Type $8.;
   format Cost comma10.2;
   do i = 1 to 20;
      do purchmc = 'Hay', 'Rch', 'Oak', 'Sac', 'SF';
         do provmc = 'SF', 'Oak', 'Sac', 'Hay', 'Rch';
            Cost = round(ranuni(94612)*100, .01);
            if ranuni(95819) > .5 then
               type = 'Medical';
            else
               type = 'Surgical';
            output;
         end;
      end;
   end;
run;

/* Empty the output file, in case this is a rerun. */
data _null_;
   file odstext old;
run;

/* Send output to RTF */
options orientation=portrait;
ods rtf file=odstext style=sasweb;
ods listing close;
ods rtf startpage=never;

/* Send graphics to PNG (somewhat arbitrary choice).  */
goptions
  reset   = goptions
  device  = png
  target  = png
  xmax    = 5in
  ymax    = 4in;

title 'Cost by Type by Medical Center';

ods rtf text='Here is a chart:';

proc gradar data=Costs;
    chart purchmc
          / sumvar=Cost
            overlay=Type
            cstars=(red blue)
            starcircles=(0.5 1.0)
            cstarcircles=black
         ;
run;

/* Create table output */

ods rtf text='And here is a table:';

proc report data=Costs nofs missing;

   column type purchmc Cost n;

   define type     / Group 'Type';
   define purchmc  / group 'Medical Center';
   define Cost     / analysis sum 'Cost';
   define n        / 'Records';

run;

/* Close ODS destination */
ods rtf close;

/* Empty the output file, in case this is a rerun. */
data _null_;
   length filename $44.;                          
   file odstext mod close=free filename=filename;
   call symput('REPORTDSN', trim(filename)); ;
run;

/* Send via email */
filename email email
               attach=("&REPORTDSN." 
                       name='emailrtf'
                       lrecl=16392
                       type='text/rtf'
                       extension='rtf')
               to='Jack.Hamilton@kp.org'
               from='Jack.Hamilton@kp.org'
               subject='Test File';

/* Send the mail */
data _null_;
   file email;
   put 'SAS Output is attached.';
run;

//
The results are here.

Zipping and Emailing a file under MVS

This example is completely platform-dependent, and runs under MVS and its descendents. Windows and Unix platforms can perform a similar function using pipes.

Release 9.2 of SAS will probably support the SASZIPAM engine for reading zip files, under most platforms (including MVS, where it currently doesn't work). It doesn't sound likely that a write engine will be added, and it is almost certain that support for enhanced encryption, in either direction, will not be added.

The attached SAS job shows how to write an Excel workbook and and PDF using ODS, place those files into a ZIP archive under MVS, and then email the results.

The job attempts to do as much as possible using DDnames rather than using data set names; I wanted to take advantage of JCL for naming and enqueuing. The job could undoubtedly be made shorter or simpler, but I didn't want to spend a lot of time on it. Documentation for PKZIP for zSeries can be found at www.pkzip.com. If you have the appropriate license, you can encrypt as well as compress files.

Similar code could be written to unZIP files.

If you run the job, please remember to change the email addresses near the bottom from mine to yours; also, you'll have to change the JOB card.

//ZIPSTEP JOB xxxxxx,jobtext,NOTIFY=&SYSUID,CLASS=S,MSGCLASS=X
/*ROUTE PRINT FETCH
/*JOBPARM L=999
//*
//ZIPFILE SET ZIPFILE=&SYSUID..ZIPSTEP.SAMPLE.ZIP
//XLSFILE SET XLSFILE=&SYSUID..ZIPSTEP.TRANSFER.XLS
//PDFFILE SET PDFFILE=&SYSUID..ZIPSTEP.TRANSFER.PDF
//*
//S1DELETE EXEC PGM=IEFBR14
//ZIPFILE   DD DSN=&ZIPFILE.,
//             DISP=(MOD,DELETE,DELETE),UNIT=SYSWRK,SPACE=(1,1)
//XLSFILE   DD DSN=&XLSFILE,
//             DISP=(MOD,DELETE,DELETE),UNIT=SYSWRK,SPACE=(1,1)
//PDFFILE   DD DSN=&PDFFILE,
//             DISP=(MOD,DELETE,DELETE),UNIT=SYSWRK,SPACE=(1,1)
//*
//S2DATA  EXEC SASPROD,
//         OPTIONS='ls=71 noovp nocenter'
//XLSFILE   DD DSN=&XLSFILE,DISP=(NEW,PASS,DELETE),
//             UNIT=SYSWRK,SPACE=(TRK,(20,50)),
//             RECFM=VB,LRECL=16392
//PDFFILE   DD DSN=&PDFFILE,DISP=(NEW,PASS,DELETE),
//             UNIT=SYSWRK,SPACE=(TRK,(20,50)),
//             RECFM=VB,LRECL=16392
//SYSIN     DD *,DLM='\\'

/* Get latest copy of ExcelXP tagset from SAS web site */
filename tagset http
   'http://support.sas.com:80/rnd/base/topics/odsmarkup/excltags.tpl';
%include tagset / nosource2;

/* Use it */
ods tagsets.excelxp file=xlsfile record_separator=none style=sasweb;
/* We also want PDF. */
ods pdf file=pdffile notoc style=sasweb;
ods listing close;

title 'Transfers';

/* Sample data */
data test;
   length purchmc provmc type $8.;
   do purchmc = 'Hay', 'Rch', 'Oak';
      do provmc = 'Oak', 'Hay', 'Rch';
         do type = 'Volume', 'Expense';
            cost = round(ranuni(94612)*100, .01);
            output;
         end;
      end;
   end;
run;

/* Set sheet options */
ods tagsets.excelxp
      options(sheet_name='Transfers'
      frozen_headers='1'
      row_repeat='1'
      );

proc report data=test missing nofs nocenter
      completerows completecols;
   column purchmc type provmc, cost cost=totcost;
   define purchmc / group 'Purch MC' width=8
                    order=data;
   define type    / group 'Type' width=8;
   define provmc  / across 'Prov MC' width=8
                    order=data;
   define cost    / sum 'Cost' format=comma8.2;
   define totcost / sum 'Total Cost' width=10 format=comma10.2;
   compute provmc;
      if type = 'expense' then
         call define(_col_, 'style', 'style={background=yellow}');
   endcomp;
run;

ods _all_ close;
ods listing;

\//*
//S3ZIP   EXEC PGM=PKZIP
//STEPLIB   DD DSN=SYSL.PKZIPMVS.NPRD.LOADLIB,DISP=SHR
//XLSFILE   DD DSN=&XLSFILE,DISP=(SHR,PASS)
//PDFFILE   DD DSN=&PDFFILE,DISP=(SHR,PASS)
//ZIPFILE   DD DSN=&ZIPFILE.,DISP=(NEW,CATLG,CATLG),
//             UNIT=SYSWRK,SPACE=(TRK,(20,50)),FREE=CLOSE
//SYSIN     DD *
-TRANSLATE_TABLE_DATA(EBC#850)
-DATA_DELIMITER(CRLF)
-FILE_TERMINATOR()
-ARCHIVE_OUTFILE(ZIPFILE)
-COMPRESSION_LEVEL(MAXIMUM)
-ZIPPED_DSN(*.*.*.*,++*.*)
-INFILE(XLSFILE)
-ACTION(ADD)
-INFILE(PDFFILE)
-ACTION(ADD)
//*
//S4MAIL  EXEC SASPROD,
//         OPTIONS='ls=71 noovp nocenter'
//SYSIN     DD *,DLM='\\'
filename email email
               attach=(".zipstep.sample.zip"
                       name='Sample' extension='zip')
               to='Jack.Hamilton@kp.org'
               from='Jack.Hamilton@kp.org'
               subject='Sample ZIP file';

data _null_;

   file email;

   put 'ZIP Sample attached.';

run;
\

The file sample.zip contains the results of running this job.