<?php
// Example layout of the table we are parsing
/**
 * This was used to scrape the inspections for the individual Estalishments
 *
 * The targeted table looks like this
 * 
 * <table cellspacing="2" cellpadding="2" width="100%" border="1" align="center">
 * <tr><th>...</th></tr>
 * <tr>
        <td class="DetailsTD">6202062</td>
        <td class="DetailsTD">FoodSvcSemi</td>
        <td class="DetailsTD">02/24/2009</td>
        <td class="DetailsTD"><b>93</b></td>
    </tr>
 */
require_once 'PHPUnit/Extensions/SeleniumTestCase.php';
class 
Example extends PHPUnit_Extensions_SeleniumTestCase {
    private 
$match_cell_text "//table[@id='DetailsView']/tbody/tr/td/table/tbody/tr[{row}]/td[{index}]";
    private 
$establishemnt_header_fields = array();
    private 
$collected_data_rows = array();
    private 
$scrape_date null;
    function 
setUp() {
    
// key to establishemnt_header_fields:
    //      {TABLE CELL number} => '{db field name}::{seleniumRC method name}::{match rule}'
    // order important, should be same as column order of target table (columns start at 1 not 0)
    
$this->establishemnt_header_fields = array(
        
=> "number::getText::{$this->match_cell_text}",
        
=> "type::getText::{$this->match_cell_text}",
        
=> "inspection_date::getText::{$this->match_cell_text}",
        
=> "score::getText::{$this->match_cell_text}"
    
);
    
$this->setBrowser("*chrome");
    
$this->setBrowserUrl("http://www2.co.multnomah.or.us");
    
$this->db_connection $this->db_connect();
    
$this->scrape_date date('Y-m-d H:i:s');
    }
    protected function 
tearDown() {
        
$this->db_connection->close();
    }
    
/**
     * This is the method that PHPUnit will call to start the scraping process
     * You need to have selenium RC running before you do this.
     * @see http://seleniumhq.org/projects/remote-control/
     */
    
function testGetEstablishmentScores() {
        
// get the inspection links from the Db
        
$establishemnt_links $this->get_establishment_detail_links();
        foreach (
$establishemnt_links as $establishment) {
            
$this->open("/Health/mchealthinspect/{$establishment['detail_link']}");
            
$this->waitForPageToLoad("30000");
            
// be nice to server
            
sleep(1);
            
$this->parse_table(1$establishment['mchd_id']);
        }
    }
    
/**
     *
     * @param int $max_iterations just a sanity break point so we don't loop ininite if
     * we get the code wrong below.  Set to 1 here since we are only looking
     * for one row.
     * @param string $establishemnt_id the MCHD id for the establishment
     * @example
     */
    
private function parse_table($max_iterations=1$establishemnt_id) {
        
$current_row 2// skip the table header row
        
for($table_rows=1;$table_rows<=$max_iterations;$table_rows++) {
            try {
                
// sanity check, see if {row one}/{cell one} of target table is present
                
$this->assertTrue($this->isElementPresent(
                    
"//table[@id='DetailsView']/tbody/tr/td/table/tbody/tr[{$current_row}]/td[1]"));
                
$this_row = array();
                
// parse over each header field in this row
                
foreach ($this->establishemnt_header_fields as $index => $field) {
                    
$field is_array($field)?$field:array($field);
                    foreach (
$field as $sub_field) {
                        
// $sub_field is in form ex: "name::getText:://table[@id='ResultsDataGrid']/tbody/tr[{row}]/td[{index}]"
                        
$field_parts explode('::',$sub_field);
                        
$sub_field $field_parts[0];
                        
$match_command $field_parts[1];
                        
$match_rule $field_parts[2];
                        
$match_rule str_replace(array('{row}','{index}'),array($current_row,$index),$match_rule);
                        
$field_value $this->{$match_command}($match_rule);
                        
$this_row[$sub_field] = $field_value;
                        
//special case for detail_link: we also grab the mchd_id out of it
                        
$match null;
                        if(
$sub_field=='detail_link' && preg_match('/\d*$/',$field_value,$match)) {
                            
$this_row['mchd_id'] = $match[0];
                        }
                    }
                }
                
$this->collected_data_rows[] = $this_row;
            } catch (
PHPUnit_Framework_AssertionFailedError $e) {
                
array_push($this->verificationErrors$e->toString());
                break;
            }
            
$current_row++;
        }
        
//## INSERT the Establishment ROW ##//
        // @todo convert to prepared statements
        /*
         * insert the rows from the scraped table into the db
         */
        
foreach ($this->collected_data_rows as $establishment_data) {
            
$establishment_data['mchd_id'] = $establishemnt_id;
            
$sql_insert_message $this->build_insert_query($establishment_data);
            
$this->db_connection->query($sql_insert_message);
        }
        
$this->collected_data_rows = array();
    }
    
/*
     * build and insert query for each table row
     * @todo convert to stored proc
     */
    
private function build_insert_query($insert_elements) {
        
$inspection_date explode('/',$insert_elements['inspection_date']);
        
$inspection_date "{$inspection_date[2]}-{$inspection_date[0]}-{$inspection_date[1]}";
        
$insert_elements['inspection_date'] = $inspection_date;
        
$insert_elements['scrape_date'] = $this->scrape_date;
        
$query "INSERT `inspections` (`".implode('`, `',array_keys($insert_elements))."` ) VALUES(";
        foreach (
$insert_elements as &$element) {
            
$element mysqli_real_escape_string($this->db_connection$element);
        }
        
$query .= "'".implode("', '"$insert_elements)."')";
        return 
$query;
    }
    
/*
     * get the previously stored establishments from the Db
     */
    
private function get_establishment_detail_links() {
        
$query "SELECT `mchd_id`, `detail_link` FROM `establishments`
            WHERE `detail_link` != '' AND `detail_link` IS NOT NULL ORDER BY `mchd_id`"
;
        
$result $this->db_connection->query($query);
        while (
$establishments[] = mysqli_fetch_array($result,MYSQLI_ASSOC)) {}
        return 
$establishments;
    }
    private function 
db_connect() {
        
$mysqli mysqli_connect'localhost''test''test''mchd' );
        
/* check connection */
        
if (mysqli_connect_errno()) {
            
printf "Connect failed: %s\n"mysqli_connect_error () );
            exit ();
        }
        return 
$mysqli;
    }
}
?>