Revision: 18518
Updated Code
at October 2, 2009 03:57 by dreadwarrior
Updated Code
<?php
$search_replace_mapping = array(
// country prefix normalization
'+00' => '+', '++' => '+',
// country prefix is always 00
'+' => '00',
// funny user input goulash
'i' => '1', 'I' => '1', 'l' => '1',
'o' => '0', 'O' => '0',
// ([^\diIloO\+]*)
// ...brackets
'(' => '', ')' => '',
'[' => '', ']' => '',
'[' => '', ']' => '',
// slashes
'/' => '', '\\\\' => '',
// dashes
'-' => '', '_' => '',
// whitespaces
' ' => ''
);
function normalizeTelephoneNumber($telephone_number, $search_replace_mapping) {
// fetch search and replace arrays
$search = array_keys($search_replace_mapping);
$replace = array_values($search_replace_mapping);
// simple string replacement
$telephone_number = str_replace($search, $replace, $telephone_number);
// lets kick out all dutty stuff which is left...
$telephone_number = preg_replace('~[^\d]~', '', $telephone_number);
return $telephone_number;
}
function generateSqlReplaceStatement($telephone_number, $search_replace_mapping)
{
$s = '';
$template = 'REPLACE(%s, \'%s\', \'%s\')';
$i = 0;
foreach ($search_replace_mapping as $search => $replace)
{
$s = sprintf($template, ($i == 0 ? '\''. $telephone_number .'\'' : $s), $search, $replace);
$i++;
}
$s = 'SELECT '. $s .' AS normalized FROM Accommodation';
return $s;
}
$numbers = array(
'0049 03831 667 156',
'+39 0471 / 975 642',
'+0039 6757 - 3939 9393',
'+49 (0) 3834 50 77 73',
'+43 (i) 4m 4n idiOt',
'+44 (0) 1234 \\ 55 55'
);
print_r($numbers);
$numbers_clean = array();
foreach ($numbers as $number)
{
$numbers_clean[] = normalizeTelephoneNumber($number, $search_replace_mapping);
}
print_r($numbers_clean);
$sql = array();
foreach ($numbers as $number)
{
$sql[$number] = generateSqlReplaceStatement($number, $search_replace_mapping);
}
print_r($sql);
?>
Revision: 18517
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at October 1, 2009 11:44 by dreadwarrior
Initial Code
<?php
function normalizeTelephoneNumber($telephone_number) {
$search_replace_mapping = array(
// country prefix normalization
'+00' => '+', '++' => '+',
// country prefix is always 00
'+' => '00',
// funny user input goulash
'i' => '1', 'I' => '1', 'l' => '1',
'o' => '0', 'O' => '0',
// ...brackets
'(' => '', ')' => '',
'[' => '', ']' => '',
'[' => '', ']' => '',
// slashes
'/' => '', '\\' => '',
// dashes
'-' => '', '_' => '',
// whitespaces
' ' => ''
);
// fetch search and replace arrays
$search = array_keys($search_replace_mapping);
$replace = array_values($search_replace_mapping);
// simple string replacement
$telephone_number = str_replace($search, $replace, $telephone_number);
// lets kick out all dutty stuff which is left...
$telephone_number = preg_replace('~[^\d]~', '', $telephone_number);
return $telephone_number;
}
$numbers = array(
'0049 03832 647 352',
'+39 0471 / 975 642',
'+0039 6757 - 3939 9393',
'+49 (0) 3831 50 77 73',
'+43 (i) 4m 4n idiOt'
);
print_r($numbers);
$numbers_clean = array();
foreach ($numbers as $number)
{
$numbers_clean[] = normalizeTelephoneNumber($number);
}
print_r($numbers_clean);
?>
Initial URL
Initial Description
This snippet is usable for telephone number normalization. It supports re-mapping of misused alphabetical characters (e.g. i for 1) and some other clean up stuff... Take a look. Update, 2009-10-02: added function to create a corresponding MySQL REPLACE construct to normalize data which is already stored in database. I decided to use this approach because MySQL's regular expression engine is very slow. TODO: add the possibility to remove non digit characters at the end of the laaaarge REPLACE call.
Initial Title
Normalize telephone numbers
Initial Tags
sql
Initial Language
PHP