Posted By

dom111 on 09/15/10


Tagged

mysql php apache log process


Versions (?)

Who likes this?

4 people have marked this snippet as a favorite

korpz
developer
tux-world
Priestd09


php-log - Process log files with PHP


 / Published in: PHP
 

URL: http://www.dom111.co.uk/blog/coding/php-log-process-apache-log-files-with-php/49

I’ve recently had to quickly parse an Apache log file, to get basic usage statistics for a site over a month and wanted the ability to quickly process any general logs and store them in a MySQL table, so I’ve made a small package class.log.

Included in the package are:

class.log.php (the main class) class.log.processor.php (an interface for a log processor, very basic!!) class.log.output.php (a simple extension of the processor (outputs

tags with s) class.log.mysql.php (a simple mysql importer of the processed data) parse.php (a simple implementation, bringing all the classes together) The class is designed to use one line at a time from the log retrieved, and the regular expression specified in class.log.php can be modified to parse different types of logs as long as the matches array is also updated.

I’ve only used this for apache logs currently, which it managed quite well, I’m not sure if I’d use this script in an automated script, but I’ll leave that for you to decide.

  1. <?php
  2. /* class.log.php */
  3.  
  4. // -------------------------
  5. // log
  6. //
  7. // main log class for parsing text logs and processing the data
  8. // must be used with a log processor descendent
  9. //
  10. // -------------------------
  11. class log {
  12. // $patterns
  13. //
  14. // add other regular expression patterns and matches arrays here to match other log formats
  15. // the $patterns[$key] (eg. apacheDefault) should be passed to the parse() function as the
  16. // $type parameter
  17. //
  18. static $patterns = array(
  19. 'apacheDefault' => array(
  20. // pattern
  21. //
  22. // a perl compatible regular expression for separating out the data in the log line
  23. //
  24. // ip address identd auth day month year time TZ request http code size referrer navigator
  25. 'pattern' => '/(\d+\.\d+\.\d+\.\d+) ([^\s]+) ([^\s]+) \[(\d+)\/(\w+)\/(\d+):(\d{1,2}:\d{1,2}:\d{1,2} ?[\+\-]?\d*)\] "(.*) (HTTP\/\d\.\d)" (\d+) (\d+) "([^"]*)" "([^"]*)"/',
  26.  
  27. // matches
  28. //
  29. // the matches here, represent the $matches index from preg_match for the pattern
  30. // above
  31. //
  32. // if using log.mysql, an array must be passed to the parser constructor containing
  33. // key => value pairs relating these matches to mysql tables
  34. //
  35. 'matches' =>array(
  36. 1 => 'ip',
  37. 2 => 'identd',
  38. 3 => 'auth',
  39. 4 => 'day',
  40. 5 => 'month',
  41. 6 => 'year',
  42. 7 => 'time',
  43. 8 => 'request',
  44. 9 => 'http_version',
  45. 10 => 'response_code',
  46. 11 => 'size',
  47. 12 => 'referrer',
  48. 13 => 'navigator'
  49. )
  50. )
  51. );
  52.  
  53. // parser
  54. //
  55. // this will be set to the specified parser when required
  56. //
  57. static $parser = null;
  58.  
  59. // parse
  60. //
  61. // this is the function called when you have set up all the settings
  62. //
  63. // $filename is the filename of the log you wish to parse, and $type is the
  64. // key in the $patterns array specified above
  65. //
  66. function parse($filename = '', $type = 'apacheDefault') {
  67. // check all the settings are correct
  68. if (!isset(self::$patterns[$type])) {
  69. throw new Exception('Requested type not available ('.$type.')');
  70. }
  71.  
  72. if (!file_exists($filename)) {
  73. throw new Exception('File does not exist ('.$filename.')');
  74. }
  75.  
  76. if (!is_readable($filename)) {
  77. throw new Exception('File is not readable ('.$filename.')');
  78. }
  79.  
  80. if (empty(self::$parser)) {
  81. throw new Exception('No parser specified (Set: log::$parser = new parser_type();)');
  82. }
  83. // end check settings
  84.  
  85. // open the file
  86. $handle = fopen($filename, 'r');
  87.  
  88. // while it's not at the end...
  89. while (!feof($handle)) {
  90. // read the line
  91. $line = fgets($handle);
  92.  
  93. // if the line matches
  94. if (preg_match(self::$patterns[$type]['pattern'], $line, $matches)) {
  95. // set up an array
  96. $data = array();
  97.  
  98. // loop through the pattern's matches and set the data array correctly
  99. foreach (self::$patterns[$type]['matches'] as $i => $key) {
  100. $data[$key] = $matches[$i];
  101. }
  102.  
  103. // parse the data
  104. self::$parser->process($data);
  105. }
  106. }
  107.  
  108. // close the file
  109. fclose($handle);
  110.  
  111. // return true, why not!
  112. return true;
  113. }
  114. }
  115.  
  116. /* class.log.processor.php */
  117.  
  118. // -------------------------
  119. // log_processor interface
  120. //
  121. // implement this class in any processors your write
  122. //
  123. // currently the only required function is process, which processes the
  124. // array log of log data returned from preg_match
  125. //
  126. // -------------------------
  127. interface log_processor {
  128. function process($data);
  129. }
  130.  
  131. /* class.log.output.php */
  132.  
  133. // include the interface class
  134. if (!class_exists('log_processor')) {
  135. require_once('class.log.processor.php');
  136. }
  137.  
  138. // -------------------------
  139. // log_output
  140. //
  141. // very basic class to output data in a very simple format
  142. //
  143. // -------------------------
  144. class log_output implements log_processor {
  145. // process
  146. //
  147. // the function called by the log class
  148. //
  149. final function process($data) {
  150. // open a <p> tag
  151. $r = '<p>';
  152.  
  153. // loop through each field of the data
  154. foreach ($data as $key => $value) {
  155. // build a <span> with a class of $key containing $value
  156. $r .= "<span class=\"{$key}\">{$value}</span> ";
  157. }
  158.  
  159. // close the </p>
  160. $r .= '</p>';
  161.  
  162. // output the html
  163. print $r;
  164.  
  165. // return it too, just in case
  166. return $r;
  167. }
  168. }
  169.  
  170.  
  171. /* class.log.mysql.php */
  172.  
  173. // include the interface class
  174. if (!class_exists('log_processor')) {
  175. require_once('class.log.processor.php');
  176. }
  177.  
  178. // -------------------------
  179. // log_mysql
  180. //
  181. // mysql class for storing parsed log data in a table
  182. // in my tests, this script processes ~1500-4000 rows/second
  183. //
  184. // -------------------------
  185. // Example table structure for apache:
  186. //
  187. // CREATE TABLE `log` (
  188. // `id` int(11) NOT NULL auto_increment,
  189. // `ip` varchar(255) NOT NULL default '',
  190. // `identd` varchar(255) NOT NULL default '',
  191. // `auth` varchar(255) NOT NULL default '',
  192. // `day` int(8) NOT NULL default '0',
  193. // `month` varchar(255) NOT NULL default '',
  194. // `year` int(8) NOT NULL default '0',
  195. // `time` varchar(255) NOT NULL default '',
  196. // `request` text NOT NULL,
  197. // `http_version` varchar(255) NOT NULL default '',
  198. // `response_code` int(8) NOT NULL default '0',
  199. // `size` int(11) NOT NULL default '0',
  200. // `referrer` text NOT NULL,
  201. // `navigator` text NOT NULL,
  202. // PRIMARY KEY (`id`)
  203. // ) ENGINE=MyISAM DEFAULT CHARSET=latin1
  204. //
  205. // and the example $fields array to complement it:
  206. //
  207. // $fields => array(
  208. // // key in $data field name in table
  209. // 'ip' => 'ip',
  210. // 'identd' => 'identd',
  211. // 'auth' => 'auth',
  212. // 'day' => 'day',
  213. // 'month' => 'month',
  214. // 'year' => 'year',
  215. // 'time' => 'time',
  216. // 'request' => 'request',
  217. // 'http_version' => 'http_version',
  218. // 'response_code' => 'response_code',
  219. // 'size' => 'size',
  220. // 'referrer' => 'referrer',
  221. // 'navigator' => 'navigator'
  222. // );
  223. //
  224. // -------------------------
  225. class log_mysql implements log_processor {
  226. // set up the variables
  227. var $host = 'localhost';
  228. var $user = 'root';
  229. var $pass = '';
  230. var $db = '';
  231. var $table = '';
  232. // end set up variables
  233.  
  234. // fields
  235. //
  236. // this should be an array of key => value to 'translate' the data array
  237. // keys to mysql fields
  238. //
  239. var $fields = array();
  240.  
  241. // the mysql connection data
  242. var $connection = false;
  243.  
  244. // counter for rows processed
  245. var $rows = 0;
  246.  
  247. // __construct
  248. //
  249. // executed when instatiated
  250. //
  251. // $settings is an array that contains the database settings
  252. // host, user, pass, db and table are all strings relating the mysql database
  253. // fields should be an array of key => value pairs that are $data['key'] => mysql table field
  254. //
  255. final function __construct($settings = array()) {
  256. // process $settings
  257. if (!is_array($settings)) {
  258. throw new Exception('log_mysql $settings should be an array');
  259. }
  260.  
  261. if (isset($settings['user'])) {
  262. $this->user = $settings['user'];
  263. }
  264.  
  265. if (isset($settings['pass'])) {
  266. $this->pass = $settings['pass'];
  267. }
  268.  
  269. if (isset($settings['host'])) {
  270. $this->host = $settings['host'];
  271. }
  272.  
  273. if (isset($settings['db'])) {
  274. $this->db = $settings['db'];
  275. }
  276.  
  277. if (isset($settings['table'])) {
  278. $this->table = $settings['table'];
  279. }
  280.  
  281. if (isset($settings['fields'])) {
  282. $this->fields = $settings['fields'];
  283. }
  284.  
  285. if (empty($this->fields)) {
  286. throw new Exception('Missing field data ($this->fields)');
  287. }
  288.  
  289. if (empty($this->table)) {
  290. throw new Exception('Missing MySQL table name');
  291. }
  292. // end process $settings
  293.  
  294. // connect to the database
  295. $this->connect();
  296.  
  297. // don't need to return anything, we're getting the object anyway
  298. }
  299.  
  300. // process
  301. //
  302. // the function called by the log class
  303. //
  304. // $data is the array of data from the parsed log
  305. //
  306. final function process($data) {
  307. // try and insert the data
  308. if ($this->insert($data)) {
  309. // if it's worked, increment the $rows counter
  310. $this->rows++;
  311.  
  312. // return true for good measure
  313. return true;
  314.  
  315. // if not...
  316. } else {
  317. // throw an exception
  318. throw new Exception('Error inserting data to MySQL server');
  319. }
  320. }
  321.  
  322. // connect
  323. //
  324. // connect to the mysql database
  325. //
  326. private function connect() {
  327. // set $this->connection to the mysql server connection
  328. $this->connection = mysql_connect($this->host, $this->user, $this->pass);
  329.  
  330. // if we connected ok...
  331. if ($this->connection) {
  332. // try to select the database
  333. if (mysql_select_db($this->db, $this->connection)) {
  334. // ... again for good measure...
  335. return true;
  336.  
  337. // if something went wrong
  338. } else {
  339. // throw an exception
  340. throw new Exception('Unable to select database ('.$this->db.')');
  341. }
  342.  
  343. // if something went wrong
  344. } else {
  345. // throw an exception
  346. throw new Exception('Unable to connect to MySQL server');
  347. }
  348. }
  349.  
  350. // insert
  351. //
  352. // inserts the data to the mysql table
  353. //
  354. // $data is the array passed from process
  355. //
  356. private function insert($data) {
  357. // build the query
  358. $q = "INSERT INTO
  359. `{$this->table}`
  360. SET ";
  361.  
  362. // add each set to an array, for easy string concatenation
  363. $sets = array();
  364.  
  365. // loop through the fields
  366. foreach ($this->fields as $name => $field) {
  367. // escape the data
  368. $data[$name] = mysql_real_escape_string($data[$name]);
  369. $field = mysql_real_escape_string($field);
  370.  
  371. // add it to the array
  372. $sets[] = "`{$field}` = '{$data[$name]}'";
  373. }
  374.  
  375. // implode the array
  376. $q .= implode(', ', $sets);
  377.  
  378. // finish the query building
  379. $q .= ';';
  380.  
  381. // execute the query
  382. $result = mysql_query($q, $this->connection);
  383.  
  384. // return the result
  385. return $result;
  386. }
  387. }
  388.  
  389. /* ---- */
  390. /* Demo */
  391. /* ---- */
  392.  
  393. /* parse.php */
  394.  
  395. // make sure the script doesn't die if parsing a long log
  396. ini_set('max_execution_time', '0');
  397.  
  398. // include all the classes
  399. require_once('class.log.php');
  400. require_once('class.log.mysql.php');
  401. require_once('class.log.output.php');
  402.  
  403. // see class.log.mysql.php for example table setup
  404. //
  405. // log::$parser = new log_mysql(array(
  406. // 'user' => 'mysql-username',
  407. // 'pass' => 'My5q1_p455w0|2D',
  408. // 'db' => 'php_log',
  409. // 'table' => 'log',
  410. // 'fields' => array(
  411. // 'ip' => 'ip',
  412. // 'identd' => 'identd',
  413. // 'auth' => 'auth',
  414. // 'day' => 'day',
  415. // 'month' => 'month',
  416. // 'year' => 'year',
  417. // 'time' => 'time',
  418. // 'request' => 'request',
  419. // 'http_version' => 'http_version',
  420. // 'response_code' => 'response_code',
  421. // 'size' => 'size',
  422. // 'referrer' => 'referrer',
  423. // 'navigator' => 'navigator'
  424. // )
  425. // ));
  426. //
  427. log::$parser = new log_output();
  428.  
  429. log::parse('test.log');
  430.  
  431. /* test.log
  432.  
  433. 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET / HTTP/1.1" 200 304 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
  434. 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /img/logo.gif HTTP/1.1" 200 570 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"
  435. 123.45.67.89 - - [09/Jan/2009:12:49:17 +0000] "GET /css/css.css HTTP/1.1" 200 124 "" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.5+ (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"

Report this snippet  

You need to login to post a comment.