Page MenuHomePhabricator
Authored By
swalker
Jun 5 2017, 9:07 AM
Size
2 KB
Referenced Files
None
Subscribers
None
-- set up some test data
create table numbers (n int(11) not null primary key);
insert into numbers values (1), (2), (3), (4), (5);
create table data (addresslist varchar(50));
insert into data values ('a'), ('b, c'), ('d, e, f'), ('g, h, i');
-- basically, we use the numbers table to provide offsets for substrings:
select addresslist,
'set 1', substring_index(addresslist, ',', 1) set1part1, substring_index(substring_index(addresslist, ',', 1), ',', -1) set1part2,
'set 2', substring_index(addresslist, ',', 2) set2part1, substring_index(substring_index(addresslist, ',', 2), ',', -1) set2part2,
'set 3', substring_index(addresslist, ',', 3) set3part1, substring_index(substring_index(addresslist, ',', 3), ',', -1) set3part2,
'set 4', substring_index(addresslist, ',', 4) set4part1, substring_index(substring_index(addresslist, ',', 4), ',', -1) set4part2
from data;
-- but of course we do it as a join, so we essentially loop over the numbers.
SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1))
FROM data r
CROSS JOIN numbers n;
-- The join condition makes sure we only pull back the relevant entry, rather than repeating a lot.
SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1))
FROM data r
INNER JOIN numbers n ON char_length(r.addresslist) - char_length(replace(r.addresslist, ',', '')) >= n.n - 1;
-- but of course, we're not using a special numbers table in this query, we're dynamically building it:
SELECT 1 n
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5;
-- and filtering to non-cleared data using a where ip <> localhost
SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox
FROM request r
INNER JOIN (SELECT 1 n
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5) n
ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1
WHERE ip <> '127.0.0.1'
-- the exists clause looks in a child resultset to see if there is at least one row, thus this will only return individual addresses where there is /not/ a geolocation record cached.
SELECT p.prox
FROM (
SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox
FROM request r
INNER JOIN (SELECT 1 n
UNION ALL
SELECT 2
UNION ALL SELECT 3
UNION ALL
SELECT 4
UNION ALL SELECT 5) n
ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1
WHERE ip <> '127.0.0.1'
) p
WHERE NOT EXISTS (SELECT 1 FROM geolocation g WHERE g.address = p.prox);
-- and we limit it to keep the transaction runtime low, preferring many small transactions instead of one big one, since this is running SERIALIZABLE

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
19436
Default Alt Text
(2 KB)

Event Timeline