-- set up some test data create table numbers (n int(11) not null primary key); insert into numbers values (1), (2), (3), (4), (5); create table data (addresslist varchar(50)); insert into data values ('a'), ('b, c'), ('d, e, f'), ('g, h, i'); -- basically, we use the numbers table to provide offsets for substrings: select addresslist, 'set 1', substring_index(addresslist, ',', 1) set1part1, substring_index(substring_index(addresslist, ',', 1), ',', -1) set1part2, 'set 2', substring_index(addresslist, ',', 2) set2part1, substring_index(substring_index(addresslist, ',', 2), ',', -1) set2part2, 'set 3', substring_index(addresslist, ',', 3) set3part1, substring_index(substring_index(addresslist, ',', 3), ',', -1) set3part2, 'set 4', substring_index(addresslist, ',', 4) set4part1, substring_index(substring_index(addresslist, ',', 4), ',', -1) set4part2 from data; -- but of course we do it as a join, so we essentially loop over the numbers. SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1)) FROM data r CROSS JOIN numbers n; -- The join condition makes sure we only pull back the relevant entry, rather than repeating a lot. SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1)) FROM data r INNER JOIN numbers n ON char_length(r.addresslist) - char_length(replace(r.addresslist, ',', '')) >= n.n - 1; -- but of course, we're not using a special numbers table in this query, we're dynamically building it: SELECT 1 n UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5; -- and filtering to non-cleared data using a where ip <> localhost SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox FROM request r INNER JOIN (SELECT 1 n UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5) n ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1 WHERE ip <> '127.0.0.1' -- the exists clause looks in a child resultset to see if there is at least one row, thus this will only return individual addresses where there is /not/ a geolocation record cached. SELECT p.prox FROM ( SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox FROM request r INNER JOIN (SELECT 1 n UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5) n ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1 WHERE ip <> '127.0.0.1' ) p WHERE NOT EXISTS (SELECT 1 FROM geolocation g WHERE g.address = p.prox); -- and we limit it to keep the transaction runtime low, preferring many small transactions instead of one big one, since this is running SERIALIZABLE