Page MenuHomePhabricator
Paste P137

(An Untitled Masterwork)
ActivePublic

Authored by swalker on Jun 5 2017, 9:07 AM.
-- set up some test data
create table numbers (n int(11) not null primary key);
insert into numbers values (1), (2), (3), (4), (5);
create table data (addresslist varchar(50));
insert into data values ('a'), ('b, c'), ('d, e, f'), ('g, h, i');
-- basically, we use the numbers table to provide offsets for substrings:
select addresslist,
'set 1', substring_index(addresslist, ',', 1) set1part1, substring_index(substring_index(addresslist, ',', 1), ',', -1) set1part2,
'set 2', substring_index(addresslist, ',', 2) set2part1, substring_index(substring_index(addresslist, ',', 2), ',', -1) set2part2,
'set 3', substring_index(addresslist, ',', 3) set3part1, substring_index(substring_index(addresslist, ',', 3), ',', -1) set3part2,
'set 4', substring_index(addresslist, ',', 4) set4part1, substring_index(substring_index(addresslist, ',', 4), ',', -1) set4part2
from data;
-- but of course we do it as a join, so we essentially loop over the numbers.
SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1))
FROM data r
CROSS JOIN numbers n;
-- The join condition makes sure we only pull back the relevant entry, rather than repeating a lot.
SELECT trim(substring_index(substring_index(r.addresslist, ',', n.n), ',', -1))
FROM data r
INNER JOIN numbers n ON char_length(r.addresslist) - char_length(replace(r.addresslist, ',', '')) >= n.n - 1;
-- but of course, we're not using a special numbers table in this query, we're dynamically building it:
SELECT 1 n
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5;
-- and filtering to non-cleared data using a where ip <> localhost
SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox
FROM request r
INNER JOIN (SELECT 1 n
UNION ALL SELECT 2
UNION ALL SELECT 3
UNION ALL SELECT 4
UNION ALL SELECT 5) n
ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1
WHERE ip <> '127.0.0.1'
-- the exists clause looks in a child resultset to see if there is at least one row, thus this will only return individual addresses where there is /not/ a geolocation record cached.
SELECT p.prox
FROM (
SELECT trim(substring_index(substring_index(r.forwardedip, ',', n.n), ',', -1)) prox
FROM request r
INNER JOIN (SELECT 1 n
UNION ALL
SELECT 2
UNION ALL SELECT 3
UNION ALL
SELECT 4
UNION ALL SELECT 5) n
ON char_length(r.forwardedip) - char_length(replace(r.forwardedip, ',', '')) >= n.n - 1
WHERE ip <> '127.0.0.1'
) p
WHERE NOT EXISTS (SELECT 1 FROM geolocation g WHERE g.address = p.prox);
-- and we limit it to keep the transaction runtime low, preferring many small transactions instead of one big one, since this is running SERIALIZABLE

Event Timeline

swalker created this object with visibility "Public (No Login Required)".