Magento 2 : Data cleaning and import

Posted . Visible to the public.

Following example shows how to clean customer and order records and import customer data from another database

## Following query to identify the name of the attribute and attribute id
select * from  live.eav_attribute where entity_type_id = 1 order by attribute_id;

## Following query to fetch the mobile number 


select * from  live.customer_entity_varchar where attribute_id = 543;

## Following query to create temp table with mobile number

CREATE TABLE  live.temp_customer_entity
select ce.*, cev.value as mobile_number from  live.customer_entity as ce
    left join  live.customer_entity_varchar as cev on ce.entity_id = cev.entity_id
where cev.attribute_id = 543;
    
## select the temp table value with schema
select * from  live.temp_customer_entity;

## Following query to update the mobile number without 0 and append 62
update  live.temp_customer_entity as otce
set otce.mobile_number = concat('62',
                                trim(LEADING '0' FROM otce.mobile_number))
where otce.mobile_number is not null;

## Following query to update email with mobile number
update  live.temp_customer_entity as otce
set otce.email = concat(otce.mobile_number, ' @vasan.com')
where otce.mobile_number is not null;

## There are three records with duplicate mobile number
6289619221751 31857 36438
62895347094690 34449 34452
6288229281508 36783 36786

## Delete the duplicate record
select * from  live.temp_customer_entity as otce where otce.entity_id = 31857;
delete from  live.temp_customer_entity as otce where otce.entity_id = 31857;
delete from  live.temp_customer_entity as otce where otce.entity_id = 34452;
delete from  live.temp_customer_entity as otce where otce.entity_id = 36783;
============================================================================
## Following query to add addtional coulmn to maintain the original entity id

ALTER TABLE  dev.customer_entity ADD COLUMN ori_entity_id INT;

## Folloiwng query to insert temp table records to current table

INSERT INTO  dev.customer_entity (
     dev.customer_entity.website_id,
     dev.customer_entity.email,
     dev.customer_entity.group_id,
     dev.customer_entity.store_id,
     dev.customer_entity.is_active,
     dev.customer_entity.created_in,
     dev.customer_entity.prefix,
     dev.customer_entity.firstname,
     dev.customer_entity.lastname,
     dev.customer_entity.middlename,
     dev.customer_entity.gender,
     dev.customer_entity.password_hash,
     dev.customer_entity.ori_entity_id)
SELECT  live.temp_customer_entity.website_id,
        live.temp_customer_entity.email,
        live.temp_customer_entity.group_id,
        live.temp_customer_entity.store_id,
        live.temp_customer_entity.is_active,
        live.temp_customer_entity.created_in,
        live.temp_customer_entity.prefix,
        live.temp_customer_entity.firstname,
        live.temp_customer_entity.lastname,
        live.temp_customer_entity.middlename,
        live.temp_customer_entity.gender,
        live.temp_customer_entity.password_hash,
        live.temp_customer_entity.entity_id
FROM  live.temp_customer_entity
;
## Following query to identify the attribute id
select * from  dev.eav_attribute where entity_type_id = 1 order by attribute_id;

## Following query to identify attribute
select * from  dev.customer_entity_varchar where attribute_id = 186;
select * from  dev.customer_entity_int where attribute_id = 187;
select * from  dev.customer_entity_int where attribute_id = 190;

## For inserting mobile_number
insert into  dev.customer_entity_varchar
(    dev.customer_entity_varchar.attribute_id,
     dev.customer_entity_varchar.entity_id,
     dev.customer_entity_varchar.value )

select 186 as attribute_id, mce.entity_id as entity_id, otce.mobile_number
from  live.temp_customer_entity as otce
         left join  dev.customer_entity as mce on otce.entity_id = mce.ori_entity_id;

## for inserting is_otp_validated

insert into  dev.customer_entity_int
(    dev.customer_entity_int.attribute_id,
     dev.customer_entity_int.entity_id,
     dev.customer_entity_int.value )
select 187 as attribute_id, mce.entity_id as entity_id, 1 as is_otp_validated
from  live.temp_customer_entity as otce
    left join  dev.customer_entity as mce on otce.entity_id = mce.ori_entity_id;

## for inserting otp_count

insert into  dev.customer_entity_int
(    dev.customer_entity_int.attribute_id,
     dev.customer_entity_int.entity_id,
     dev.customer_entity_int.value )
select 190 as attribute_id, mce.entity_id as entity_id, 1 as otp_count
from  live.temp_customer_entity as otce
    left join  dev.customer_entity as mce on otce.entity_id = mce.ori_entity_id;



    
### Following is clear the customer records

DELETE FROM customer_address_entity WHERE 1;
DELETE FROM wishlist WHERE 1;
DELETE FROM wishlist_item WHERE 1;
DELETE FROM customer_entity WHERE 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE `customer_address_entity`;
TRUNCATE TABLE `customer_address_entity_datetime`;
TRUNCATE TABLE `customer_address_entity_decimal`;
TRUNCATE TABLE `customer_address_entity_int`;
TRUNCATE TABLE `customer_address_entity_text`;
TRUNCATE TABLE `customer_address_entity_varchar`;
TRUNCATE TABLE `catalog_compare_item`;
TRUNCATE TABLE `catalog_compare_list`;

TRUNCATE TABLE `customer_entity`;
TRUNCATE TABLE `customer_entity_datetime`;
TRUNCATE TABLE `customer_entity_decimal`;
TRUNCATE TABLE `customer_entity_int`;
TRUNCATE TABLE `customer_entity_text`;
TRUNCATE TABLE `customer_entity_varchar`;

TRUNCATE TABLE `customer_grid_flat`;
TRUNCATE TABLE `customer_log`;
TRUNCATE TABLE `customer_log`;
TRUNCATE TABLE `customer_visitor`;
TRUNCATE TABLE `persistent_session`;
TRUNCATE TABLE `wishlist`;
TRUNCATE TABLE `wishlist_item`;
TRUNCATE TABLE `wishlist_item_option`;
TRUNCATE TABLE `customer_store_profile`;

TRUNCATE TABLE `review`;
TRUNCATE TABLE `review_detail`;
TRUNCATE TABLE `review_entity_summary`;
TRUNCATE TABLE `review_store`;

ALTER TABLE `customer_address_entity` AUTO_INCREMENT=1;
ALTER TABLE `customer_address_entity_datetime` AUTO_INCREMENT=1;
ALTER TABLE `customer_address_entity_decimal` AUTO_INCREMENT=1;
ALTER TABLE `customer_address_entity_int` AUTO_INCREMENT=1;
ALTER TABLE `customer_address_entity_text` AUTO_INCREMENT=1;
ALTER TABLE `customer_address_entity_varchar` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity_datetime` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity_decimal` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity_int` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity_text` AUTO_INCREMENT=1;
ALTER TABLE `customer_entity_varchar` AUTO_INCREMENT=1;
ALTER TABLE `customer_store_profile` AUTO_INCREMENT=1;
SET FOREIGN_KEY_CHECKS = 1;


## Following to clear the order records
## Clear the order data
SET FOREIGN_KEY_CHECKS=0;
TRUNCATE TABLE `gift_message`;
TRUNCATE TABLE `sales_bestsellers_aggregated_daily`;
TRUNCATE TABLE `sales_bestsellers_aggregated_monthly`;
TRUNCATE TABLE `sales_bestsellers_aggregated_yearly`;
TRUNCATE TABLE `sales_creditmemo`;
TRUNCATE TABLE `sales_creditmemo_comment`;
TRUNCATE TABLE `sales_creditmemo_grid`;
TRUNCATE TABLE `sales_creditmemo_item`;
TRUNCATE TABLE `sales_invoice`;
TRUNCATE TABLE `sales_invoiced_aggregated`;
TRUNCATE TABLE `sales_invoiced_aggregated_order`;
TRUNCATE TABLE `sales_invoice_comment`;
TRUNCATE TABLE `sales_invoice_grid`;
TRUNCATE TABLE `sales_invoice_item`;
TRUNCATE TABLE `sales_order`;
TRUNCATE TABLE `sales_order_address`;
TRUNCATE TABLE `sales_order_aggregated_created`;
TRUNCATE TABLE `sales_order_aggregated_updated`;
TRUNCATE TABLE `sales_order_grid`;
TRUNCATE TABLE `sales_order_item`;
TRUNCATE TABLE `sales_order_payment`;
TRUNCATE TABLE `sales_order_status_history`;
TRUNCATE TABLE `sales_order_tax`;
TRUNCATE TABLE `sales_order_tax_item`;
TRUNCATE TABLE `sales_payment_transaction`;
TRUNCATE TABLE `sales_refunded_aggregated`;
TRUNCATE TABLE `sales_refunded_aggregated_order`;
TRUNCATE TABLE `sales_shipment`;
TRUNCATE TABLE `sales_shipment_comment`;
TRUNCATE TABLE `sales_shipment_grid`;
TRUNCATE TABLE `sales_shipment_item`;
TRUNCATE TABLE `sales_shipment_track`;
TRUNCATE TABLE `sales_shipping_aggregated`;
TRUNCATE TABLE `sales_shipping_aggregated_order`;
TRUNCATE TABLE `quote`;
TRUNCATE TABLE `quote_address`;
TRUNCATE TABLE `quote_address_item`;
TRUNCATE TABLE `quote_id_mask`;
TRUNCATE TABLE `quote_item`;
TRUNCATE TABLE `quote_item_option`;
TRUNCATE TABLE `quote_payment`;
TRUNCATE TABLE `quote_shipping_rate`;

TRUNCATE TABLE sequence_invoice_0;
TRUNCATE TABLE sequence_order_0;
TRUNCATE TABLE sequence_shipment_0;
TRUNCATE TABLE sequence_creditmemo_0;


TRUNCATE TABLE sequence_invoice_1;
TRUNCATE TABLE sequence_order_1;
TRUNCATE TABLE sequence_shipment_1;
TRUNCATE TABLE sequence_creditmemo_1;

TRUNCATE TABLE sequence_invoice_3;
TRUNCATE TABLE sequence_order_3;
TRUNCATE TABLE sequence_shipment_3;
TRUNCATE TABLE sequence_creditmemo_3;

TRUNCATE TABLE inventory_reservation;
alter table sequence_invoice_0 AUTO_INCREMENT=1;
alter table sequence_order_0 AUTO_INCREMENT=1;
alter table sequence_shipment_0 AUTO_INCREMENT=1;
alter table sequence_creditmemo_0 AUTO_INCREMENT=1;

alter table sequence_invoice_1 AUTO_INCREMENT=1;
alter table sequence_order_1 AUTO_INCREMENT=1;
alter table sequence_shipment_1 AUTO_INCREMENT=1;
alter table sequence_creditmemo_1 AUTO_INCREMENT=1;

alter table sequence_invoice_3 AUTO_INCREMENT=1;
alter table sequence_order_3 AUTO_INCREMENT=1;
alter table sequence_shipment_3 AUTO_INCREMENT=1;
alter table sequence_creditmemo_3 AUTO_INCREMENT=1;
SET FOREIGN_KEY_CHECKS = 1;



==================================================
## for clearing the directory record and reimport


delete from  dev.directory_country_region where country_id = 'ID';
delete from  dev.directory_city where 1;
ALTER TABLE  dev.directory_city AUTO_INCREMENT = 1;
ALTER TABLE  dev.directory_district AUTO_INCREMENT = 1;
ALTER TABLE  dev.directory_sub_district AUTO_INCREMENT = 1;

==================================================    
## for inserting address data


INSERT INTO  dev.customer_address_entity (
     dev.customer_address_entity.increment_id,
     dev.customer_address_entity.parent_id,
     dev.customer_address_entity.is_active,
     dev.customer_address_entity.city,
     dev.customer_address_entity.company,
     dev.customer_address_entity.country_id,
     dev.customer_address_entity.fax,
     dev.customer_address_entity.firstname,
     dev.customer_address_entity.lastname,
     dev.customer_address_entity.middlename,
     dev.customer_address_entity.postcode,
     dev.customer_address_entity.prefix,
     dev.customer_address_entity.region,
     dev.customer_address_entity.region_id,
     dev.customer_address_entity.street,
     dev.customer_address_entity.suffix,
     dev.customer_address_entity.telephone,
     dev.customer_address_entity.vat_id,
     dev.customer_address_entity.vat_is_valid,
     dev.customer_address_entity.vat_request_date,
     dev.customer_address_entity.vat_request_id,
     dev.customer_address_entity.vat_request_success)
SELECT  live.customer_address_entity.increment_id,
        dev.customer_entity.entity_id,
        live.customer_address_entity.is_active,
        live.customer_address_entity.city,
        live.customer_address_entity.company,
        live.customer_address_entity.country_id,
        live.customer_address_entity.fax,
        live.customer_address_entity.firstname,
        live.customer_address_entity.lastname,
        live.customer_address_entity.middlename,
        live.customer_address_entity.postcode,
        live.customer_address_entity.prefix,
        live.customer_address_entity.region,
        live.customer_address_entity.region_id,
        live.customer_address_entity.street,
        live.customer_address_entity.suffix,
        live.customer_address_entity.telephone,
        live.customer_address_entity.vat_id,
        live.customer_address_entity.vat_is_valid,
        live.customer_address_entity.vat_request_date,
        live.customer_address_entity.vat_request_id,
        live.customer_address_entity.vat_request_success
FROM  live.customer_address_entity
    LEFT JOIN  dev.customer_entity
        on  dev.customer_entity.ori_entity_id = ` live`.customer_address_entity.`parent_id`
where ` live`.`customer_address_entity`.city like '%,%,%'
;


## for city and district and sub_district 


select * from  dev.eav_attribute where entity_type_id = 2 order by attribute_id;
SELECT * FROM  dev.eav_attribute WHERE entity_type_id = 2 and attribute_id = 217;
SELECT * FROM  dev.eav_attribute WHERE entity_type_id = 2 and attribute_id = 218;

select mcae.city ,
       SUBSTRING_INDEX(mcae.city, ',', 1) as sub_city,
       SUBSTRING_INDEX(SUBSTRING_INDEX(mcae.city, ',', 2), ',', -1) as district,
       SUBSTRING_INDEX(SUBSTRING_INDEX(mcae.city, ',', 3), ',', -1) as sub_district

from  dev.customer_address_entity as mcae;

## for inserting district

insert into  dev.customer_address_entity_varchar
(    dev.customer_address_entity_varchar.attribute_id,
     dev.customer_address_entity_varchar.entity_id,
     dev.customer_address_entity_varchar.value )
select 217 as attribute_id, mcae.entity_id as entity_id,
       trim(SUBSTRING_INDEX(SUBSTRING_INDEX(mcae.city, ',', 2), ',', -1)) as district
from  dev.customer_address_entity as mcae ;

## for inserting sub_district

insert into  dev.customer_address_entity_varchar
(    dev.customer_address_entity_varchar.attribute_id,
     dev.customer_address_entity_varchar.entity_id,
     dev.customer_address_entity_varchar.value )
select 218 as attribute_id, mcae.entity_id as entity_id,
       trim(SUBSTRING_INDEX(SUBSTRING_INDEX(mcae.city, ',', 3), ',', -1)) as sub_district
from  dev.customer_address_entity as mcae ;

## for updating city

update  dev.customer_address_entity as mcae
set mcae.city = trim(SUBSTRING_INDEX(mcae.city, ',', 1))
where mcae.entity_id > 0;


select * from  dev.customer_address_entity;
select mcae.city, caev.value as district
from  dev.customer_address_entity as mcae
    left join  dev.customer_address_entity_varchar caev on mcae.entity_id = caev.entity_id
where caev.attribute_id = 217;

select mcae.city, caev.value as sub_district
from  dev.customer_address_entity as mcae
    left join  dev.customer_address_entity_varchar caev on mcae.entity_id = caev.entity_id
where caev.attribute_id = 218;

## for updating region with directory region

select mcae.region, mcae.region_id from  dev.customer_address_entity as mcae;

UPDATE  dev.customer_address_entity as mcae
    INNER JOIN  dev.directory_country_region as mdcr
    ON mcae.region = mdcr.default_name
SET mcae.region_id = mdcr.region_id
where mcae.region is not null;


vasan
Last edit
vasan
Posted by vasan to vasan's deck (2021-09-24 12:02)