i am importing around 3000 records each week into a mysql database and have to ensure that no duplicate records exist. the database isn't even up to full size yet. it's final size will be around 1,000,000 records. does anyone have a suggestion on how to speed up the process of checking for duplicate records? here's the current code...which is getting very slow at 100,000 records. as the database nears 1,000,000 records and imports 3,000 records, it has to do 3,000,000,000 functions...yikes!
<?php
set_time_limit(200);
// See if a file has been posted
if (is_uploaded_file($HTTP_POST_FILES['textfile']['tmp_name']))
{
// Init the counters
$update_count = 0;
$insert_count = 0;
$invalid_count = 0;
include("config.inc.php");
$db = mysql_pconnect($hostname, $username, $password) or die ("Error: Could
not connect to database!");
mysql_select_db("SalesLeads") or die ("Error: Could not select database
'".$tablename."'");
echo "Filename: <b>".$HTTP_POST_FILES['textfile']['name']."</b><br>\n";
echo "Filesize: <b>".$HTTP_POST_FILES['textfile']['size']."</b><br>\n";
// Make sure that the import date is valid
list($import_year, $import_month, $import_day) = split("-", $date_imported);
if (checkdate($import_month, $import_day, $import_year) == TRUE)
{
echo "Import Date: <b>".$date_imported." - VALID!</b><br>\n";
echo "Parsing uploaded data...";
// Open the file
$filehandle = fopen($HTTP_POST_FILES['textfile']['tmp_name'], "r");
// Read it in csv format - takes care of quotes in the fields
while ($buffer = fgetcsv($filehandle, 1000, $field_terminator))
{
if ($buffer[0] != "")
{
// Massage the data a little (remove whitespace and make it all uppercase
$buffer[0] = addslashes(trim(strtoupper($buffer[0]))); // taxpayer_number
$buffer[1] = addslashes(trim(strtoupper($buffer[1]))); // outlet number
$buffer[2] = addslashes(trim(strtoupper($buffer[2]))); // taxpayer_name
$buffer[3] = addslashes(trim(strtoupper($buffer[3]))); // taxpayer_address
$buffer[4] = addslashes(trim(strtoupper($buffer[4]))); // taxpayer_city
$buffer[5] = addslashes(trim(strtoupper($buffer[5]))); // taxpayer_state
$buffer[6] = addslashes(trim(strtoupper($buffer[6]))); // taxpayer_zip
$buffer[7] = addslashes(trim(strtoupper($buffer[7]))); //
taxpayer_county_code
$buffer[8] = addslashes(trim(strtoupper($buffer[8]))); // taxpayer_phone_nbr
$buffer[9] = addslashes(trim(strtoupper($buffer[9]))); // outlet_name
$buffer[10] = addslashes(trim(strtoupper($buffer[10]))); // outlet_address
$buffer[11] = addslashes(trim(strtoupper($buffer[11]))); // outlet_city
$buffer[12] = addslashes(trim(strtoupper($buffer[12]))); // outlet_state
$buffer[13] = addslashes(trim(strtoupper($buffer[13]))); // outlet_zip
$buffer[14] = addslashes(trim(strtoupper($buffer[14]))); //
outlet_county_code
$buffer[15] = addslashes(trim(strtoupper($buffer[15]))); // outlet_phone_nbr
$buffer[16] = addslashes(trim(strtoupper($buffer[16]))); // permit_type
$buffer[17] = addslashes(trim(strtoupper($buffer[17]))); // tax_code
$buffer[18] = addslashes(trim(strtoupper($buffer[18]))); // outlet_sic_code
$buffer[19] = addslashes(trim(strtoupper($buffer[19]))); //
permit_issue_date
$buffer[20] = addslashes(trim(strtoupper($buffer[20]))); // first_sales_date
// See if there is a record like it already (same outlet address)
$query=("SELECT * FROM sales_tax_permits WHERE outlet_address =
'".$buffer[10]."' AND outlet_city = '".$buffer[11]."' AND outlet_state =
'".$buffer[12]."' AND outlet_sic_code = '".$buffer[18]."' AND
taxpayer_number = '".$buffer[0]."'");
$result=mysql_query($query) or die ("Error: Could not execute query
'".$query."'");
$row = mysql_fetch_array($result);
if ($row[0] == "")
{
// Make sure we only get the texas taxpayers data
if ($buffer[5] == "TX")
{
// This record does not match, make sure it has an outlet_sic_code and
permit_type = "use_tax"
if (!(($buffer[16] == "USE TAX" | $buffer[16] == "MAQUILADORA") &&
($buffer[18] == "")))
{
// Okay? then insert!
$query=("INSERT INTO sales_tax_permits (taxpayer_number, outlet_number,
taxpayer_name, taxpayer_address, taxpayer_city, taxpayer_state,
taxpayer_zip, taxpayer_county_code, taxpayer_phone_nbr, outlet_name,
outlet_address, outlet_city, outlet_state, outlet_zip, outlet_county_code,
outlet_phone_nbr, permit_type, tax_code, outlet_sic_code, permit_issue_date,
first_sales_date, record_import_date) VALUES
('".$buffer[0]."','".$buffer[1]."','".$buffer[2]."','".$buffer[3]."','".$buf
fer[4]."','".$buffer[5]."','".$buffer[6]."','".$buffer[7]."','".$buffer[8]."
','".$buffer[9]."','".$buffer[10]."','".$buffer[11]."','".$buffer[12]."','".
$buffer[13]."','".$buffer[14]."','".$buffer[15]."','".$buffer[16]."','".$buf
fer[17]."','".$buffer[18]."','".$buffer[19]."','".$buffer[20]."','".$date_im
ported."')");
$result=mysql_query($query) or die ("Error: Could not execute query
'".$query."'");
// Increase the count
$insert_count = $insert_count + 1;
}
else
{
// Increase the count, permit_type = USE TAX and there is no outlet_sic_code
$invalid_count = $invalid_count + 1;
}
}
else
{
// Increase the count, not Texas Data
$invalid_count = $invalid_count + 1;
}
}
else
{
// This record exists, update!
$query=("UPDATE sales_tax_permits SET taxpayer_number = '".$buffer[0]."',
outlet_number = '".$buffer[1]."', taxpayer_name = '".$buffer[2]."',
taxpayer_address = '".$buffer[3]."', taxpayer_city = '".$buffer[4]."',
taxpayer_state = '".$buffer[5]."', taxpayer_zip = '".$buffer[6]."',
taxpayer_county_code = '".$buffer[7]."', taxpayer_phone_nbr =
'".$buffer[8]."', outlet_name = '".$buffer[9]."', outlet_zip =
'".$buffer[13]."', outlet_county_code = '".$buffer[14]."',outlet_phone_nbr =
'".$buffer[15]."', permit_type = '".$buffer[16]."', tax_code =
'".$buffer[17]."', outlet_sic_code = '".$buffer[18]."', permit_issue_date =
'".$buffer[19]."', first_sales_date = '".$buffer[20]."', record_import_date
= '".$date_imported."' WHERE outlet_address = '".$buffer[10]."' AND
outlet_city = '".$buffer[11]."' AND outlet_state = '".$buffer[12]."'");
$result=mysql_query($query) or die ("Error: Could not execute query
'".$query."'");
// Increase the count
$update_count = $update_count + 1;
}
}
}
// Close the data file
fclose($filehandle);
// Close the mysql connection
@mysql_close();
thanks for your help!
ross