Posted By

mandric on 04/26/08


Tagged

Bash script finance scraper nyse


Versions (?)

Scrape NYSE


 / Published in: Bash
 

More gibberish.

  1. #!/bin/sh -x
  2.  
  3. PATH=$PATH:/usr/local/bin
  4. BASEDIR=/home/stockh/xyz.com/api/scripts/
  5. DATAFILE=${BASEDIR}../data/nyse_index_symbols.txt
  6. LOGFILE=${BASEDIR}../logs/scraping
  7.  
  8. echo running $0 on `date` >> $LOGFILE
  9. lynx -dump http://www.csidata.com/factsheets/nyse.txt > /tmp/nyse_index.txt 2>> $LOGFILE
  10.  
  11. if [ $? = 0 ]; then
  12.  
  13. mv ${BASEDIR}../files/nyse_index.txt ${BASEDIR}../files/nyse_index.txt.bak 2>> $LOGFILE
  14. mv /tmp/nyse_index.txt ${BASEDIR}../files/nyse_index.txt 2>> $LOGFILE
  15.  
  16. #cat ${BASEDIR}../files/nyse_index.txt | perl -ne 'if (s/^(.*)[\s\s]+(\S+)[\s\s]+(\S+)\s(\S+)/\1\2\3\4/) { print }'
  17. # get symbols and do + to -P translation for yahoo
  18. cat ${BASEDIR}../files/nyse_index.txt | perl -ne 'if (s/^(.*)[\s\s]+(\S+)[\s\s]+(\S+)\s(\S+)/\2/) { $_ =~ s/\+/-P/; print }' > /tmp/nyse_index_symbols.txt 2>> $LOGFILE
  19.  
  20. mv ${BASEDIR}../data/nyse_index_symbols.txt ${DATAFILE}.bak 2>> $LOGFILE
  21.  
  22. # removed windows newlines
  23. tr -d "\015" < /tmp/nyse_index_symbols.txt > $DATAFILE 2>> $LOGFILE
  24.  
  25. fi
  26.  
  27. echo `diff $DATAFILE ${DATFILE}.bak` >> $LOGFILE
  28. echo `wc -l $DATAFILE ` lines in new datafile >> $LOGFILE
  29. echo `wc -l ${DATAFILE}.bak ` lines in old datafile >> $LOGFILE

Report this snippet  

You need to login to post a comment.