Return to Snippet

Revision: 65161
at November 2, 2013 04:53 by webonomic


Initial Code
/* Create sample .csv file containing CR/LF bytes.  '0D'x is the */
/* hexadecimal representation of CR and '0A'x is the hexadecimal */
/* representation of LF.                                         */

data _null_;
  file "c:\sample.csv";  
  /* Code the PUT statement so the hex values will be inside the */
  /* the double quotes in the output file SAMPLE.CSV.            */
  put '"field1","field2","field3'
    '0D'x    
    '",'
    '"field4","field5","field6'
    '0A'x   
    '",'
    '"field7","field8","field9"'
  ;
run;

/* Read in the test file created above. */
data out;
  /* The DSD option assumes the delimiter is a comma. You can override */
  /* the delimiter with the DLM= option if needed.                     */
  infile "c:\sample.csv" dsd truncover;
  length var1 - var9 $15;
  input var1 - var9 $;
run;

proc print;
  title 'File is read incorrectly due to embedded CR/LF';
run;

/************************** CAUTION ***************************/
/*                                                            */
/* This program UPDATES IN PLACE, create a backup copy before */
/* running.                                                   */
/*                                                            */
/************************** CAUTION ***************************/
                                                           
/* Replace carriage return and linefeed characters inside     */
/* double quotes with a specified character.  This sample     */
/* uses '@' and '$', but any character can be used, including */
/* spaces.  CR/LFs not in double quotes will not be replaced. */


%let repA='@';                    /* replacement character LF */
%let repD='$';                    /* replacement character CR */

                             
%let dsnnme="c:\sample.csv";      /* use full path of CSV file */

data _null_;
  /* RECFM=N reads the file in binary format. The file consists    */
  /* of a stream of bytes with no record boundaries.  SHAREBUFFERS */
  /* specifies that the FILE statement and the INFILE statement    */
  /* share the same buffer.                                        */
  infile &dsnnme recfm=n sharebuffers;
  file &dsnnme recfm=n;

  /* OPEN is a flag variable used to determine if the CR/LF is within */
  /* double quotes or not.  Retain this value.                        */
  retain open 0;

  input a $char1.;
  /* If the character is a double quote, set OPEN to its opposite value. */
  if a = '"' then open = ^(open);

  /* If the CR or LF is after an open double quote, replace the byte with */
  /* the appropriate value.                                               */
  if open then do;
    if a = '0D'x then put &repD;
    else if a = '0A'x then put &repA;
  end;
run;

/* Read in new version of file to check for success. */
data outp;
  infile "c:\sample.csv" dsd dlm=',' truncover;
  length var1 - var9 $ 15;
  input var1 - var9 $ ;
run;

proc print;
  title1 'File is read correctly after transformation ';
  title2 "Look for printable characters &repa &repd in variable values ";
run;

/* OPTIONAL -- Delete external file */
data _null_;
  fname="tempfile";
  rc=filename(fname,"c:\sample.csv");
  if rc = 0 and fexist(fname) then rc=fdelete(fname);
  rc=filename(fname);
run;

Initial URL
http://support.sas.com/kb/26/065.html

Initial Description
If a flat file contains embedded carriage return (CR) and linefeed characters (LF) inside double quotes, SAS will interpret them as end of line markers. This may cause your file to be read incorrectly. 
This example replaces CR/LF characters within double quotes with other printable characters. CR/LF characters outside of double quotes are untouched. 

In this sample, the external file is updated in place. You cannot separate the input and output because the code uses shared buffers. 

You should make a copy of your file before running this sample on it.

Initial Title
Remove carriage return and linefeed characters within quoted strings

Initial Tags
line

Initial Language
SAS