*==============================================================================================; *Program Name : readrtf.sas ; *==============================================================================================; %macro readrtf(f = /* RTF file to be read */ ,outdat = readrtf /* OUTPUT file name */ ,clength = /* Override default length of c2-c[n] */ ,devtype = 0 /* Deviation type from default layout */ ,wrapcrit = /* > x% of cols blank counts as wrapped row */ ,last_idcol = /* For listings, last ID column number */ ,debug = n /* Set to y for troubleshooting */ ,indent_c1 = n); /* Deprecated parameter */ *-------------------------------------------------------------------------------------------------* | | | SECTION 1 | | | | Parameter pre-processing: Set defaults and check for err(ors) | | | *-------------------------------------------------------------------------------------------------*; *----------------------------------------------------------------------* | If data set readrtf already exists, remove it for good housekeeping | *----------------------------------------------------------------------*; %if %sysfunc(exist(work.readrtf)) %then %do; proc datasets lib=work nolist; delete readrtf; quit; %end; *-----------------------------------------------------------------------------------------* | - Set defaults for wrap cutoff and last ID column. | | - Check that an input RTF file is indicated and exit if it is not or it does not exist. | | - If a wrap criterion is indicated, check that it is a valid value (between 0 and 1). | *-----------------------------------------------------------------------------------------*; %if "&wrapcrit"="" %then %do; %if &devtype ne 4 and &devtype ne 6 %then %let wrapcrit=1; %else %let wrapcrit=0.4;; %end; %let msg=0; %if "&f"="" or %sysfunc(fileexist(&f))=0 %then %let msg=%upcase(ER)ROR: Indicated RTF file (&f) does not exist. READRTF will abort.; %else %if %sysevalf(&wrapcrit < 0) or %sysevalf(&wrapcrit > 1) %then %let msg=%upcase(ER)ROR: Indicated wrap criterion (&wrapcrit) not between 0 and 1. READRTF will abort.;; %if "&last_idcol" ne "" %then %do; %if %sysfunc(verify(%sysfunc(compress(&last_idcol, %str( ))), 0123456789)) ne 0 %then %let msg=%upcase(ER)ROR: Indicated last ID column should be a number between 1 and 20, inclusive. READRTF will abort.;; %end; %if "&msg" ne "0" %then %do; %put &msg; %goto rrexit; %end; %if "&last_idcol"="" %then %let last_idcol = 99; *-------------------------------------------------------------------------------------------* | Determine whether this is a table or a listing based on the first letter of the file name | | and set the default column length accordingly (for readrtf output data set) | *-------------------------------------------------------------------------------------------*; %let utype=table; %if %sysfunc(substr(%sysfunc(scan(&f, -1, /)), 1, 1)) = l %then %let utype=listing;; %if "&clength" = "" %then %do; %let clength = 200; %if &utype=table %then %let clength = 75;; %end; %else %let clength = %sysfunc(compress(&clength, %str($)));; %let slength = %eval(&clength + 400); *-------------------------------------------------------------------------------------------------* | | | SECTION 2 | | | | Data pre-processing: Read RTF file and "repair" lines split by line feeds | | | *-------------------------------------------------------------------------------------------------*; *----------------------------------------------------------------------------------------------------------* | 1) Read the indicated RTF file into a SAS data set, including all hidden RTF code. | | 2) Determine whether the file was generated by SAS ODS (progsrc = O) or by percent print or a dept macro | | as identified in the top section of the RTF file. Absence of an indication this was generated by ODS | | is taken as evidence this was a percent print/departmental macro-generated file. | *----------------------------------------------------------------------------------------------------------*; data _null_; retain progsrc; infile "&f" missover length=l obs=50 lrecl=2000; input string $varying2000. l; if _n_ = 1 then progsrc = 'D'; if index(string, '\author SAS') then progsrc='O'; call symput('progsrc', progsrc); run; data dumpster_ (drop=src); retain rownum 0; infile "&f" missover length=l end=lastobs lrecl=2000; input string $varying2000. l; olength=l; nlength=length(trim(string)); *------------------------------------------------------------------------------------* | For dept. macro or %print output, split lines read from the RTF file into multiple | | observations based on RTF table cell and paragraph delimiters, to facilitate | | determination of which records to keep and which to drop (setup differs from ODS) | *------------------------------------------------------------------------------------*; string = tranwrd(string, '\cellx', '@^@@@@'); if index(string, '{\line}') then do; string = tranwrd(string, '{{\line}', '{@blankrow@'); string = tranwrd(string, '{\~{\line}', '{@blankrow@'); string = tranwrd(string, '{\line}', ''); end; src = string; rownum + 1; %if &progsrc = D %then %do; if index(substr(src, 2), '\trowd') or index(substr(src, 2), '\pard') then do; keepgoing = 1; do while (keepgoing); break1 = index(substr(src, 2), '\trowd'); break2 = index(substr(src, 2), '\pard' ); if break1 and not break2 then break = break1 + 1; else if break2 and not break1 then break = break2 + 1; else break = min(break1, break2) + 1; string = substr(src, 1, break - 1); output; src = substr(src, break); if index(substr(src, 2), '\trowd')=0 and index(substr(src, 2), '\pard')=0 then do; keepgoing = 0; string = src; output; end; end; end; else output; %end; run; *---------------------------------------------------------------------------------------------* | For dept. macro or %print output, consolidate observations that contain trailing fragments | | that really should be appended to the value of the preceding observation to form a coherent | | "sentence" in RTF speak | *---------------------------------------------------------------------------------------------*; %if &progsrc=D %then %do; data dumpster2_(drop=src trailingspace olength nlength) dumpdel2 (keep = delrow where=(delrow)) %if &debug=y %then dumpster2_debug;; length string $2000; retain string trailingspace; set dumpster_(rename=(string=src)); rownum = _n_; if index(src, '\trowd') or index(src, '\pard') then string=src; else do; delrow = _n_ - 1; if trailingspace then string = trim(string)||' '||src; else string = trim(string)||src; end; if olength = nlength + 1 then trailingspace = 1; else trailingspace = 0; run; proc sql; delete from dumpster2_ where rownum in (select delrow from dumpdel2); quit; %end;; *---------------------------------------------------------------------------* | Pre-process dumpster(2)_ for missing "\row}" or "\cell\row}" so that each | | RTF "sentence" reads coherently within an observation, which facilitates | | programming further down below. Such fragments are required to signal | | the end of a cell or a row. | *---------------------------------------------------------------------------*; proc sort data=%if &progsrc=D %then dumpster2_; %else dumpster_;; by descending rownum; run; data dumpster3_(drop=i cchars:) %if &debug=y %then dumpster3_debug;; set %if &progsrc=D %then dumpster2_; %else dumpster_;; by descending rownum; length orphcell $ 200; retain orphcell; if index(lag(string), '\trowd') then do; if reverse(scan(reverse(string),1,'\'))='cell' then string=trim(left(string))||'\row}'; else if substr(reverse(trim(left(string))),1,1)='}' and index(string,'\cell')=0 and index(string,'\row' )=0 then string=trim(left(string))||'\cell\row}'; end; *------------------------------------------------------------------------* | Convert greater-than-or-equal signs coded in a certain way, and remove | | RTF control words for cell alignment and certain border formatting | *------------------------------------------------------------------------*; if index(string, '\field') then do; string=tranwrd(string, '{\field{\*\fldinst SYMBOL 179 \\f "Symbol" }}', '>='); string=tranwrd(string, '{{ \field{\*\fldinst SYMBOL 179 \\f "Symbol" \\s 10}}', '{>='); end; if index(string, '\pard') then do; string=tranwrd(string, '\f6\fs18\u8805\3\f1', '>='); string=tranwrd(string, '\f6\fs18\u8804\3\f1', '<='); end; if index(string, '\super') then do; array cchars {10} $1 ('a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'); do i=1 to dim(cchars); string=tranwrd(string, '{\super '||cchars(i)||'} ', ''); string=tranwrd(string, '{\super '||cchars(i)||' \nosupersub} ', ''); end; end; if index(string, '\q') then do; string=tranwrd(string, '\qr' , ''); string=tranwrd(string, '\qc' , ''); string=tranwrd(string, '\ql' , ''); end; if index(string, '\brdrs') then do; string=tranwrd(string, '\brdrl\brdrs', ''); string=tranwrd(string, '\brdrt\brdrs', ''); string=tranwrd(string, '\brdrr\brdrs', ''); string=tranwrd(string, '\brdrb\brdrs', ''); end; string=tranwrd(string, '\~', ''); ** ssingh(03MAR2015): added (and string not in: ('\cell}') to read in blank cells; if string in: ('{' '}' '\') and string not in: ('\cell}') then do; string = trim(string)||' '||trim(left(orphcell)); orphcell = ''; output; end; else orphcell = trim(left(string))||' '||trim(left(orphcell)); do until(index(trim(string),' ')=0); string=tranwrd(trim(string),' ',''); end; run; proc sort data=dumpster3_; by rownum; run; *-------------------------------------------------------------------------------------------------* | | | SECTION 3 | | | | Convert vertical RTF cell data into horizontal column vars c1-c[n] | | | *-------------------------------------------------------------------------------------------------*; %if &devtype=3 %then %do; data dumpster3_; set dumpster3_; retain trtstrt header; length header $ 200; if _n_ = 1 then trtstrt=0; if index(string, 'Table ') then trtstrt=1; if trtstrt=1 and index(string, '\cell}') then do; header = reverse(scan(reverse(string), 2, '{\')); trtstrt = 0; end; run; data read_(keep=rownum indent c: prevblank header index=(indent)) %end; %else %if &devtype=4 or &devtype=5 or &devtype=6 %then %do; data read_(keep=rownum indent c: prevblank header index=(indent)) %end; %else %do; data read_(keep=rownum indent c: prevblank index=(indent)) %end; %if &debug=y %then read_debug;; *------------------------------------------------------------------------------------------------* | This data step is the heart of this macro, where all hidden RTF code is stripped from the text | | and numbers in the body of the table on each page to reproduce a crude resemblance of the | | summary SAS data set that went into the RTF file to begin with | *------------------------------------------------------------------------------------------------*; set dumpster3_; retain c1-c20 dropme indent blankrow type4_header hdrul hdrcell dev1_c1width %if &devtype=4 or &devtype=5 or &devtype=6 %then header;; length c1 $200 c2-c20 $&clength lowc $&slength %if &devtype=4 or &devtype=5 or &devtype=6 %then header $200;; lowc = lowcase(string); ix_endrow = index(string, '\row'); if _n_ = 1 then do; dropme = 1; type4_header = 1; %if &devtype=4 or &devtype=5 or &devtype=6 %then header='';; hdrul = 0; dev1_c1width = 0; end; *---------------------------------------------------------------------------* | Initialize array of blank column variables at the start of each table row | *---------------------------------------------------------------------------*; array c{20} $; if index(string, '\trowd') %if &devtype=4 or &devtype=5 or &devtype=6 %then or index(string, '@@@@'); then do; count = 0; indent = 0; if not dropme then blankrow + 1; do i=1 to dim(c); c{i} = ''; end; end; *-------------------------------------------------------------------------------------------* | When in the table header, track cell definitions. An underlined cell in column 1 of the | | table header signals the end of that header and the beginning of the body. An underlined | | cell in column 2 or up does NOT, by itself, trigger the table body section: it could be | | a span label from a departmental macro etc. | *-------------------------------------------------------------------------------------------*; if dropme in (1 2) then do; hdrul=0; if index(string, '\trowd') then hdrcell=1; xc=index(string, '@@@@' ); if hdrcell=1 then do; ul=index(string, '\clbrdrb'); if (xc=0 and ul) or (xc>0 and 0 < ul < xc) then hdrul=1; end; if xc then do; hdrcell + 1; %if &devtype=1 %then if dev1_c1width=0 then dev1_c1width = input(scan(substr(string, xc+4), 1, '\{}'), best.);; end; end; *-------------------------------------------* | Replace <= and >= symbols with plain text | *-------------------------------------------*; ix10 = index(string, '\uc1'); ix11 = index(string, '\u880'); if ix10 or ix11 then do; string = tranwrd(string, "{\uc1\u8805 '}{", ">="); string = tranwrd(string, "{\uc1\u8804 '}{", "<="); string = tranwrd(string, "\uc1\u8805 '" , ">="); string = tranwrd(string, "\uc1\u8804 '" , "<="); string = tranwrd(string, "\uc1\u8805\~" , ">="); string = tranwrd(string, "\uc1\u8804\~" , "<="); string = tranwrd(string, "\uc1\u8805 " , ">="); string = tranwrd(string, "\uc1\u8804 " , "<="); string = tranwrd(string, "{\u8805\4}", ">="); string = tranwrd(string, "{\u8804\4}", "<="); string = tranwrd(string, "}\cell", "\cell"); end; *---------------------------------------------------------------------------------* | Read the value of a cell into the appropriate column variable and determine the | | level of indentation of this table row item using the \li tag (left indent) for | | ODS, or blank spaces for dept. macros and %print | *---------------------------------------------------------------------------------*; if index(string, '{') and index(string, '\cell') then do; count + 1; c{count} = compress(scan(substr(string, 1, index(string, '\cell')-1), 2, '{'), byte(13)); if "&progsrc" = "O" then do; if count = 1 then do; templev = substr(string, index(string, '\li') + 3); nvc1=verify(templev, '-0123456789'); if nvc1 > 1 then indent = input(substr(templev, 1, nvc1-1), best.); end; if index(c{count}, '\li') then do; c{count}=left(tranwrd(c{count}, ' \line ', '')); c{count}=left(tranwrd(c{count}, '\line ', '')); c{count}=left(tranwrd(c{count}, '\lin', '')); if index(c{count}, '\li') then do; c{count} = substr(c{count}, index(c{count}, '\li')+3); c{count} = substr(c{count}, verify(c{count}, '0123456789')); end; end; c{count} = trim(left(c{count})); end; else do; if count = 1 then indent = verify(c1, ' '); c{count} = trim(left(compress(c{count}, '}'))); end; end; *----------------------------------------------------------------------------------------------------------------------* | Determine where the body of the table starts: | | - At the top of each page, or at "page x of y", set dropme to 1 to signal start of page. | | - At the start of the first row of the table, set dropme to 2 to signal start of table header. | | - At the first occurrence of the leftmost cell being underlined, set dropme to 3 to signal last row in table header. | | - At the end of the last row in table header, set dropme to 4 to signal end of table header. | | - At the first row underneath the table header, set dropme to 0 to signal observations are to be kept now. | *----------------------------------------------------------------------------------------------------------------------*; ix=index(lowc, '\page') or index(lowc, '\sectd') or index(compress(lowc, ' 0123456789'), 'pageof'); if dropme = 4 then dropme = 0; if ix then do; dropme = 1; type4_header = 1; header = ''; hdrul = 0; end; %if &progsrc=O %then else; if index(string, '\trowd' ) and dropme = 1 then dropme = 2; %if &progsrc=O %then else; if hdrul and dropme = 2 then dropme = 3; else if ix_endrow and dropme = 3 then dropme = 4; %if &devtype=3 %then if dropme=4 and c1 ne '' then dropme=0;; %if &devtype=6 %then if dropme=3 and hdrul and type4_header then dropme=0;; allmiss='Y'; do i=2 to dim(c); if c(i) ne '' then allmiss='N'; end; *-----------------------------------------------------------------------------------------* | Process devtype=1 (row labels are indented by 1 column underneath a category row label) | | by shifting over column 1, if filled and the rest is blank, into column 2. Then, | | shift over all columns to the left to align them starting at c1 | *-----------------------------------------------------------------------------------------*; %if &devtype=1 %then %do; if c1^='' and allmiss='Y' then do; c2 = c1; c1 = ''; allmiss = 'N'; end; if c1='' and allmiss='N' and dev1_c1width > 300 then do; do i=1 to 19; c(i) = trim(left(c(i+1))); end; end; %end; *-------------------------------------------------------------------------------------------------* | Once all cells on the row were read from the vertical RTF structure (one observation per cell), | | output the observation with all values for column 1-[n] (c1-c[n] retained. This completes the | | transpose from a vertical to a horizontal file. | *-------------------------------------------------------------------------------------------------*; if not dropme and ix_endrow then do; %if &devtype=1 %then %do; if c1='' and allmiss='N' and 0 < dev1_c1width <= 300 then do; do i=1 to 19; c(i) = trim(left(c(i+1))); end; end; %end; *-----------------------------------------------------------------------------------------* | Output the info on the RTF table row just processed, to data set read_. Rows with | | missing column 1 (c1) or a few other triggers not relevant to the data in the body of | | the table will not be output. For the 2-panel listings (devtypes 4-6), the header var | | is created here as a variable to be merged onto each observation further down. | *-----------------------------------------------------------------------------------------*; allblank = 1; do i=1 to dim(c); if compress(c{i}, ' \') ne '' and not index(c{i}, '\fs') and not index(lowcase(c{i}), "cont'd") and not index(lowcase(string), 'program: ') then allblank = 0; end; if not allblank then do; prevblank = (blankrow > 1 or c1=:'@blankrow@'); %if &devtype=4 or &devtype=5 or &devtype=6 %then %do; if type4_header then do; prevblank=0; do i=1 to dim(c); if compress(c{i}) ne '' then header = trim(left(header))||'!'||trim(left(c{i})); end; if header=:'!' then header=substr(header, 2); type4_header = 0; dropme = 1; end; else output read_; %end; %else %do; output read_; %end; blankrow = 0; end; end; %if &debug=y %then output read_debug;; run; *------------------------------------------------------------------------------------* | Process devtype=2 and identify entirely blank variables (columns). Such columns | | are either not existent in the table or they are spacer columns to separate | | underlined column span labels in the table headers, which is often done in %print. | *------------------------------------------------------------------------------------*; data read1_(drop=i drops: kpnum: keeps:) levels_(keep=indent index=(indent)) %if &debug=y %then read1_debug;; retain keeps: drops: kpnum:; set read_ end=lastobs; if c1=:'@blankrow@' or header=:'@blankrow@' then do; c1 = left(tranwrd(c1, '@blankrow@', '')); header = left(tranwrd(header, '@blankrow@', '')); end; if _n_=1 then do; array c{20} $; array keeps{20} $ (%do i=1 %to 20; " " %end;); array drops{20} $ (%do i=1 %to 20; "c&i" %end;); array kpnum{20} (%do i=1 %to 20; 0 %end;); end; %if &devtype=2 %then %do; if c1='' then do i=1 to dim(c)-1; c(i)=c(i+1); end; if c1='' then do i=1 to dim(c)-2; c(i)=c(i+1); end; %end; *-----------------------------------------------------------------------------------------* | Determine which columns are missing throughout the entire data set, from top to bottom, | | and stick those names into the dropper macro variable which will be used to drop these | | blank column vars in readrtf0 below. The keeper and numc macro vars are used in the | | renaming in readrtf0 of remaining column vars to ensure they are named consecutively. | *-----------------------------------------------------------------------------------------*; do i=1 to dim (c); if c(i) ne '' then do; keeps(i)='c'||compress(put(i, best.)); drops(i)=''; kpnum(i)= 1; end; end; if lastobs then do; call symput('dropper', compbl(drops(1)||' '|| %do i=2 %to 19; drops(&i)||' '|| %end; drops(20))); call symput('keeper', compbl(keeps(1)||' '|| %do i=2 %to 19; keeps(&i)||' '|| %end; keeps(20))); call symput('numc', compress(put(sum(of kpnum1-kpnum20), best.))); end; run; *-------------------------------------------------------------------------------------------* | Determine the number of unique levels of indentation, sort in order of magnitude, assign | | ordinal scale (1, 2, 3, ...) to each unique value, and merge that back into the data set. | | This provides the values for the "level" variable in ultimate data set readrtf. | *-------------------------------------------------------------------------------------------*; data levels_; set levels_; by indent; if first.indent then level + 1; if last.indent; run; proc sql; create table read_ as select b.level, a.* from levels_ b right join read1_ a on a.indent = b.indent order by rownum; quit; *----------------------------------------------------------------* | Drop the columns identified in read1_ above as entirely blank, | | using the variables stored in macro variable "dropper". | *----------------------------------------------------------------*; %let mss = ; data readrtf0 %if &devtype=2 %then (drop=i allblank);; retain segment 0 level; set read_(drop = count indent rownum &dropper); if level = 1 then segment + 1; %if &devtype=2 %then %do; if level in (1 2) then do; allblank=1; array c(&numc) $ &keeper; do i=(level + 1) to dim(c); if compress(c{i}, ' \') ne '' then allblank = 0; end; if allblank then delete; end; %end; run; *---------------------------------------------------------------------------------------* | Renumber the column variables that remain. For instance, if from c1, c2, c3, and c4, | | column 3 was deleted above because it was completely blank, then readrtf0 will have | | variables c1, c2, and c3 when this data step is done -- as opposed to c1, c2, and c4. | *---------------------------------------------------------------------------------------*; %if &dropper ne &mss %then %do; proc contents data=readrtf0 noprint out=delcol; run; data _null_; set delcol end=last; retain rnames; length rnames $ 200; if _n_=1 then ncol=0; if substr(name,1,1)='c' then do; ncol+1; if ncol ne input(substr(name,2),8.) then rnames=trim(left(rnames))||' '||compress(name)||'=c'||compress(put(ncol, best.)); end; if last then call symput('rnames', rnames); run; data readrtf0; set readrtf0 (rename=(&rnames)); run; %end; *-------------------------------------------------------------------------------------------------* | | | SECTION 4 | | | | Post-processing: handle wrapped rows, create num/pct/ID variables, merge header info | | | *-------------------------------------------------------------------------------------------------*; *-------------------------------------------------------------------------------------* | For values reported as n (%) or similar, create a numeric variable to represent "n" | | and another to represent the percent, for easier use in a proc compare by QCers. | | | | This data step also handles the restoration of wrapped values in table rows, back | | to the original preceding row. Such wrapped values, split over 2 or more table | | rows, are usually created in the output generation code rather than present in the | | original input data set. The QC data set to which work.readrtf will ultimately be | | compared will in almost all cases, also have these values on 1 observation rather | | than split over 2 or more. | *-------------------------------------------------------------------------------------*; data readrtf1(keep=segment level rownum c1-c&numc num1-num&numc pct1-pct&numc delrow %if &devtype=4 or &devtype=5 or &devtype=6 %then header;) readdel1(keep=delrow where=(delrow ne .)) %if &debug=y %then readrtf1_debug;; set readrtf0; length retc1-retc&numc tempc tempc2 $%sysfunc(max(200, &clength)); retain segment level retc1-retc&numc retpct1-retpct&numc retnum1-retnum&numc retlevel medtable last_delrow; rownum = _n_; if _n_=1 then do; retlevel = .; medtable = 0; last_delrow = .; end; *----------------------------------------------------------------------------------* | Set up arrays for variables that will carry over values from one obs to the next | *----------------------------------------------------------------------------------*; array retc (&numc) $; array retpct(&numc) ; array retnum(&numc) ; *--------------------------------------------------------------------* | Remove superscript and subscript special chars from the table body | *--------------------------------------------------------------------*; array c (&numc) c:; do i=1 to dim(c); ix1 = max(index(c(i), '\super'), index(c(i), '\sub')) - 1; if ix1 > 0 then do; ix2 = index(c(i), '\nos'); ix3 = index(substr(c(i), ix2), ' ') - 1; c(i) = substr(c(i), 1, ix1)||' '||left(substr(c(i), ix2 + ix3)); end; end; *---------------------------------------------------------------* | Create numeric variables for n and for percent, as applicable | *---------------------------------------------------------------*; array num(&numc); array pct(&numc); do i=2 to dim(c); tempc = scan(strip(c(i)), 1, ''); tempc2 = compress(scan(strip(c(i)), 2, '('), ')'); if tempc not in ('' '-' '--') and verify(tempc, '-0123456789. ')=0 then num(i) = input(tempc, best.); if tempc not in ('' ')' '-' '--') and verify(tempc2, '0123456789.() %')=0 then do; pct(i) = input(scan(compress(c(i), '%)'), 2, '('), best.); end; end; *-------------------------------------------------------------------------------------------* | Consolidate rows that are blank except for a row label, but whose preceding row contained | | numbers. These are typically wrapped row headers from previous rows. Devtype 4 is the AE | | listing which requires targeted branching logic here (eg, if >40% of columns are blank | | it is assumed to be a wrapper from the previous record). | *-------------------------------------------------------------------------------------------*; allblank = 1; wcols_for_wrap = 0; delrow = .; laglev = lag(level); do i=1 to dim(c); if i>1 and c(i)='' then do; if index(lowcase(c1), 'cont') then do; if index(lowcase(c1), 'continue') and (index(lowcase(c1), 'next page') or index(lowcase(c1), 'previous page')) then delete; else if scan(reverse(lowcase(c1)), 1, ' ') = reverse('(continued)') then delete; end; end; end; %if &devtype ne 5 %then %do; do i=2 to dim(c); if compress(c{i}, ' \') ne '' then do; allblank = 0; wcols_for_wrap + 1; end; end; if compress(c1, ' \') ne '' then wcols_for_wrap + 1; pct_filled=round(wcols_for_wrap/&numc, .001); if (&devtype not in (4 6) and not allblank and pct_filled >= (1-&wrapcrit)) or (&devtype in (4 6) and level=1 and pct_filled >= (1-&wrapcrit)) then do i=1 to dim(c); retc (i) = c(i) ; retnum(i) = num(i); retpct(i) = pct(i); retlevel = level ; prevblank = 0; end; else if (medtable and _n_>1 and (allblank or .=2 or .1 and (allblank or . 1 and retc(i) ne '' then blankret=0; end; if _n_ > 1 then delrow = _n_ - 1; if blankret then delrow = _n_; end; else %if &devtype=1 %then if compress(c1, ' \') = '' then; do; delrow = _n_; %if &utype=table %then last_delrow=_n_;; end; %end; *----------------------------------------------* | Some remaining clean up of RTF junk in cells | *----------------------------------------------*; if index(c1, '\~') then c1 = tranwrd(c1, '\~',' '); lc1=lowcase(c1); if index(lc1, '(cont') then do; lc1_ix1=index(lc1, '\b0 (cont' ); lc1_ix2=index(lc1, '(continued'); lc1_ix3=index(lc1, "(cont'" ); if lc1_ix1 then c1 = substr (c1, 1, lc1_ix1-1); else if lc1_ix2 then c1 = substr (c1, 1, lc1_ix2-1); else if lc1_ix3 then c1 = substr (c1, 1, lc1_ix3-1); end; *--------------------------------------------------------------------------------------------* | The dept. conmed table sometimes splits 1 term over 2 pages in the simple frequency table, | | separated by 2 blank lines. As a matter of exception, readrtf recognizes this table and | | concatenates the column 1 values for such wrapped rows in this setup regardless of whether | | the previous line was blank, so the QCer will not need to do this after the fact. | *--------------------------------------------------------------------------------------------*; if medtable=0 and "&progsrc"="D" then do; if index(lowcase(c1), 'number of subjects reporting use of concomitant medications') then medtable=1; end; run; /* proc sql; delete from readrtf1 where rownum in (select delrow from readdel1); quit; */ *------------------------------------------------------------------------------------* | Some tables and listings will have ID columns, meaning columns whose value is only | | displayed when it changes. Examples are site IDs or subject IDs, with multiple | | records per site or subject. The blank cells underneath such ID cells will be | | backfilled here with the most recently filled value. | | | | Applies to all tables and listings except the 2-panel displays (devtypes 4, 5, 6). | *------------------------------------------------------------------------------------*; %if &devtype ne 4 and &devtype ne 5 and &devtype ne 6 %then %do; data readrtf1(drop=i tlag: t2lag: num_id upbound) %if &debug=y %then readrtf1b_debug;; set readrtf1; length tlag1-tlag&numc t2lag1-t2lag&numc $&clength; retain tlag1-tlag&numc t2lag1-t2lag&numc tlaglev; array c (&numc) c:; array tlag {*} $ tlag1 - tlag&numc; array t2lag{*} $ t2lag1 - t2lag&numc; if _n_=1 then tlaglev=level; *----------------------------------------------------------------------* | Identify the unique ID variables from left to first non-missing cell | *----------------------------------------------------------------------*; num_id=0; upbound = min(dim(c), &last_idcol); do i=1 to upbound; if c(i)='' and num_id=i-1 then num_id=i; end; do i=1 to num_id; tlag1_lc=lowcase(tlag1); if index(tlag1_lc, '(continued')=0 and index(tlag1_lc, '(cont)')=0 and index(tlag1_lc, '(cont.)')=0 then c(i)=tlag (i); else c(i)=t2lag(i); level=tlaglev; end; do i=1 to upbound; tlag (i)=c(i); t2lag(i)=lag(c(i)); end; tlaglev=level; run; %end; /* data readrtf(drop = segment delrow rename = (newmain = segment)); length newmain level subitem rownum 8.; set readrtf1; by segment; if first.segment then do; subitem=0; newmain + 1; end; if level ne lag(level) then subitem = 0; subitem + 1; rownum = _n_; %if "&indent_c1" = "y" %then %do; if level = 2 then c1 = ' ' ||c1; else if level > 2 then c1 = repeat(' ', level-2)||c1; %end; run; */ data &outdat.(keep=rownum c:); length rownum 8.; set readrtf0; rownum=_n_; c1=compbl(c1); ** removes indentation spaces; if index(c1, '(cont.)') then delete; **** remoce cont. lines ; run; *-------------------------------------------------------------------------------* | Clean up work library to avoid contamination of multiple runs in same program | *-------------------------------------------------------------------------------*; %if &debug ne y %then %do; proc datasets lib=work nolist; delete dumpster_ dumpster3_ read_ read1_ levels_ readrtf0 readrtf1 readdel1 delcol %if &progsrc=D %then dumpster2_ dumpdel2 ;; quit; %end; %rrexit: %mend readrtf;