Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Paste
P137
(An Untitled Masterwork)
Active
Public
Actions
Authored by
swalker
on Jun 5 2017, 9:07 AM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Tags
None
Referenced Files
F21230:
Jun 5 2017, 9:07 AM
2017-06-05 09:07:55 (UTC+0)
Subscribers
None
-- set up some test data
create
table
numbers
(
n
int
(
11
)
not
null
primary
key
);
insert
into
numbers
values
(
1
),
(
2
),
(
3
),
(
4
),
(
5
);
create
table
data
(
addresslist
varchar
(
50
));
insert
into
data
values
(
'a'
),
(
'b, c'
),
(
'd, e, f'
),
(
'g, h, i'
);
-- basically, we use the numbers table to provide offsets for substrings:
select
addresslist
,
'set 1'
,
substring_index
(
addresslist
,
','
,
1
)
set1part1
,
substring_index
(
substring_index
(
addresslist
,
','
,
1
),
','
,
-
1
)
set1part2
,
'set 2'
,
substring_index
(
addresslist
,
','
,
2
)
set2part1
,
substring_index
(
substring_index
(
addresslist
,
','
,
2
),
','
,
-
1
)
set2part2
,
'set 3'
,
substring_index
(
addresslist
,
','
,
3
)
set3part1
,
substring_index
(
substring_index
(
addresslist
,
','
,
3
),
','
,
-
1
)
set3part2
,
'set 4'
,
substring_index
(
addresslist
,
','
,
4
)
set4part1
,
substring_index
(
substring_index
(
addresslist
,
','
,
4
),
','
,
-
1
)
set4part2
from
data
;
-- but of course we do it as a join, so we essentially loop over the numbers.
SELECT
trim
(
substring_index
(
substring_index
(
r
.
addresslist
,
','
,
n
.
n
),
','
,
-
1
))
FROM
data
r
CROSS
JOIN
numbers
n
;
-- The join condition makes sure we only pull back the relevant entry, rather than repeating a lot.
SELECT
trim
(
substring_index
(
substring_index
(
r
.
addresslist
,
','
,
n
.
n
),
','
,
-
1
))
FROM
data
r
INNER
JOIN
numbers
n
ON
char_length
(
r
.
addresslist
)
-
char_length
(
replace
(
r
.
addresslist
,
','
,
''
))
>=
n
.
n
-
1
;
-- but of course, we're not using a special numbers table in this query, we're dynamically building it:
SELECT
1
n
UNION
ALL
SELECT
2
UNION
ALL
SELECT
3
UNION
ALL
SELECT
4
UNION
ALL
SELECT
5
;
-- and filtering to non-cleared data using a where ip <> localhost
SELECT
trim
(
substring_index
(
substring_index
(
r
.
forwardedip
,
','
,
n
.
n
),
','
,
-
1
))
prox
FROM
request
r
INNER
JOIN
(
SELECT
1
n
UNION
ALL
SELECT
2
UNION
ALL
SELECT
3
UNION
ALL
SELECT
4
UNION
ALL
SELECT
5
)
n
ON
char_length
(
r
.
forwardedip
)
-
char_length
(
replace
(
r
.
forwardedip
,
','
,
''
))
>=
n
.
n
-
1
WHERE
ip
<>
'127.0.0.1'
-- the exists clause looks in a child resultset to see if there is at least one row, thus this will only return individual addresses where there is /not/ a geolocation record cached.
SELECT
p
.
prox
FROM
(
SELECT
trim
(
substring_index
(
substring_index
(
r
.
forwardedip
,
','
,
n
.
n
),
','
,
-
1
))
prox
FROM
request
r
INNER
JOIN
(
SELECT
1
n
UNION
ALL
SELECT
2
UNION
ALL
SELECT
3
UNION
ALL
SELECT
4
UNION
ALL
SELECT
5
)
n
ON
char_length
(
r
.
forwardedip
)
-
char_length
(
replace
(
r
.
forwardedip
,
','
,
''
))
>=
n
.
n
-
1
WHERE
ip
<>
'127.0.0.1'
)
p
WHERE
NOT
EXISTS
(
SELECT
1
FROM
geolocation
g
WHERE
g
.
address
=
p
.
prox
);
-- and we limit it to keep the transaction runtime low, preferring many small transactions instead of one big one, since this is running SERIALIZABLE
Event Timeline
swalker
created this paste.
Jun 5 2017, 9:07 AM
2017-06-05 09:07:55 (UTC+0)
swalker
created this object with visibility "Public (No Login Required)".
Log In to Comment