Where in London does the Police Stop & Search most People?
Story of the visualisation
As starting point, I analysed changes in the number of Stop & Search operations and detected offenses over time from October 2017 until September 2020. While the plots showed a significant increase in the number of both over time, the percentage of detected offenses compared to the number of stop & search operations decreased from 31.3% to 24.9%. Furthermore, the distribution of objects of search and of ethnicity of the searched individuals did not change over time. Having detected this, I decided to not look at changes over time when it comes to these variables but rather at other variables. To analyse the geographical distribution of stop & search operations as well as of the detected offenses, I map the respective numbers for each ward. The distribution of searches and crimes is not homogeneous, but they preliminary happen in the centre of London. But, no significant difference between the distribution of stop & search operations and detected offenses can be seen. Therefore, in my further analysis I will focus solely on the geographical distribution of number of detected offenses. Finally, I plot the data on the map of London grouped by ethnicity and by object searched to detect patterns in the geographical distribution. From the created maps it is evident, that the detection of crimes related to offensive weapons is more geographically dispersed than related to drugs or stolen goods. Moreover, detected offences committed by white people are more concentrated in the centre of London compared to Black and Asians
Initial overview of the data
# Number of Stop & Search operations by gender
table1 <- stop_search_all %>%
count(gender, sort=TRUE)
kbl(table1,col.names=c("Gender","Count")) %>%
kable_styling()
| Gender | Count |
|---|---|
| Male | 636838 |
| Female | 45848 |
| NA | 9160 |
| Other | 385 |
# Number of Stop & Search operations by object of search
table2 <- stop_search_all %>%
count(object_of_search, sort=TRUE)
kbl(table2,col.names=c("Object of search","Count")) %>%
kable_styling()
| Object of search | Count |
|---|---|
| Controlled drugs | 418103 |
| Offensive weapons | 119754 |
| Stolen goods | 74515 |
| Evidence of offences under the Act | 32438 |
| Anything to threaten or harm anyone | 24843 |
| Articles for use in criminal damage | 14474 |
| Firearms | 4583 |
| NA | 1800 |
| Fireworks | 1721 |
# Number of Stop & Search operations by ethnicity
table3 <- stop_search_all %>%
count(officer_defined_ethnicity, sort=TRUE)
kbl(table3,col.names=c("Ethnicity","Count")) %>%
kable_styling()
| Ethnicity | Count |
|---|---|
| Black | 274058 |
| White | 257779 |
| Asian | 118568 |
| Other | 28232 |
| NA | 13594 |
# Number of Stop & Search operations by age
table4 <- stop_search_all %>%
count(age_range)
kbl(table4,col.names=c("Age","Count")) %>%
kable_styling()
| Age | Count |
|---|---|
| 10-17 | 116893 |
| 18-24 | 235540 |
| 25-34 | 144190 |
| over 34 | 104597 |
| under 10 | 126 |
| NA | 90885 |
#I read London Wards shapefile
london_wards_sf <- read_sf(here("data/London-wards-2018_ESRI/London_Ward.shp"))
# I control the type of geometry that the shapefile has
st_geometry(london_wards_sf)#Geometry is Polygon
## Geometry set for 657 features
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 504000 ymin: 156000 xmax: 562000 ymax: 201000
## projected CRS: OSGB 1936 / British National Grid
## First 5 geometries:
# I transfrom CRS to 4326
london_wgs84 <- london_wards_sf %>%
st_transform(4326) # transfrom CRS to WGS84, latitude/longitude
london_wgs84$geometry
## Geometry set for 657 features
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -0.51 ymin: 51.3 xmax: 0.334 ymax: 51.7
## geographic CRS: WGS 84
## First 5 geometries:
Filter data
# concentrate in top three searches, age_ranges, and officer defined ethnicities
which_searches <- c("Controlled drugs", "Offensive weapons","Stolen goods" )
which_ages <- c("10-17", "18-24","25-34", "over 34")
which_ethnicity <- c("White", "Black", "Asian")
stop_search_top <- stop_search_all %>%
#filter out rows with no latitude/longitude
filter(!is.na(lng)) %>%
filter(!is.na(lat)) %>%
# concentrate in top searches, age_ranges, and officer defined ethnicities
filter(object_of_search %in% which_searches) %>%
filter(age_range %in% which_ages) %>%
filter(officer_defined_ethnicity %in% which_ethnicity) %>%
# I relevel factors so everything appears in correct order
mutate(
object_of_search = fct_relevel(object_of_search,
c("Controlled drugs", "Offensive weapons","Stolen goods")),
age_range = fct_relevel(age_range,
c( "10-17","18-24", "25-34", "over 34")),
officer_defined_ethnicity = fct_relevel(officer_defined_ethnicity,
c("White", "Black", "Asian")),
offense= case_when(outcome %in% c("Nothing found - no further action", "A no further action disposal")~ "no",
TRUE ~ "yes"
))
Development over time
p1 <- stop_search_top %>%
filter(month %in% c(1,2,3,4,5,6,7,8,9)) %>%
filter(year != 2017) %>%
group_by(year, object_of_search) %>%
summarise(count=n()) %>%
mutate(percentage=((count/sum(count)))) %>%
ggplot(aes(y=count, x=year, fill=object_of_search))+
geom_col()+
labs(y="", x="", title= "Number of Stop & Search operations by object of search", fill= "Object of search")+
geom_text(aes(label=percent(round(percentage,4))),geom ='text', col = 'white', vjust=1.5, position= position_stack())+
theme_classic()+
scale_y_continuous(label=comma)+
scale_fill_manual(values=c("#12364E", "#750833", "#7D0263"))+
theme(legend.position='top',
legend.justification='left',
legend.direction='horizontal')+
NULL
p2 <- stop_search_top %>%
filter(month %in% c(1,2,3,4,5,6,7,8,9)) %>%
filter(year != 2017) %>%
group_by(year, officer_defined_ethnicity) %>%
summarise(count=n()) %>%
mutate(sum = sum(count), percentage=((count/sum))) %>%
ggplot(aes(y=count, x=year, fill=officer_defined_ethnicity))+
geom_col()+
labs(y="", x="", title= "Number of Stop & Search operations by ethnicity", fill= "Ethnicity")+
geom_text(aes(label=percent(round(percentage,3))),geom ='text', col = 'white',vjust=1.5, position= position_stack() )+
theme_classic()+
scale_y_continuous(label=comma)+
scale_fill_manual(values=c("#D27302", "#A43407", "#0C4C47"))+
theme(legend.position='top',
legend.justification='left',
legend.direction='horizontal')+
NULL
p3 <- stop_search_top %>%
filter(month %in% c(1,2,3,4,5,6,7,8,9)) %>%
filter(year != 2017) %>%
group_by(year, offense) %>%
summarise(count=n()) %>%
mutate(percentage=((count/sum(count)))) %>%
ggplot(aes(y=count, x=year, fill=offense))+
geom_col()+
labs(y="Number of Stop & Search operations", x="", title= "Number of Stop & Search operations by outcome", fill= "Offense")+
geom_text(aes(label=percent(round(percentage,4))),geom ='text', col = 'white',vjust=1.5, position= position_stack() )+
theme_classic()+
scale_y_continuous(label=comma)+
geom_text(
aes(label = stat(y), group = year),
stat = 'summary', fun = sum, vjust = -0.5, col= "black")+
scale_fill_manual(values=c("#37803C", "#9B211D"))+
theme(legend.position='top',
legend.justification='left',
legend.direction='horizontal')+
NULL
(p3 +p1 +p2) + plot_annotation(
title = 'Since 2018 the number of stop & search operations has increased significantly',
subtitle = 'Evolvement of the number of stop & search operations grouped by outcome, object of search and ethnicity from 2018 until 2020 (Jan-Sep*)',
caption = 'Source: data.police.uk, Note: * only data from January to September to ensure comparability', theme = theme(plot.title = element_text(size = 18, face= "bold"))
)

Maps: Geographical distributions
# Here we retrieve and apply the CRS of london_wgs84
stop_search_sf <- st_as_sf(stop_search_top,
coords=c('lng', 'lat'),
crs=st_crs(london_wgs84))
#glimpse(stop_search_sf) #now geometry added
# Count how many S&S happened inside each ward
london_all <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
stop_search_sf)))
p1 <- ggplot(data = london_all, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#37803C")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Stop & Search Operations in London") +
theme(axis.text = element_blank()) +
theme(strip.text = element_text(color = "white"))+
NULL
# filter out stop-and-search where no further action was taken
offense_sf <- stop_search_sf %>%
filter(offense == "yes")
# Count how many offenses happened inside each ward
london_offense <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
offense_sf)))
p2 <- ggplot(data = london_offense, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#9B211D")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Offense detected through Stop & Search Operation in London") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
(p1 + p2) + plot_annotation(
title = 'Wards with more stop & search operations detect more offenses',
subtitle = 'Geographical distribution of search & stop operations and detected offenses from October 2017 until September 2020',
caption = 'Source: data.police.uk', theme = theme(plot.title = element_text(size = 18, face= "bold"), plot.subtitle = element_text(size = 15)))

# filter out object of search 1
vis1a_sf <- offense_sf %>%
filter(object_of_search== "Controlled drugs")
# Count how many S&S happened inside each ward
london_vis1a <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis1a_sf)))
p1 <- ggplot(data = london_vis1a, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#12364E", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses when searched for Controlled drugs") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
# filter out object of search 2
vis1b_sf <- offense_sf %>%
filter(object_of_search== "Offensive weapons")
# Count how many S&S happened inside each ward
london_vis1b <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis1b_sf)))
p2 <- ggplot(data = london_vis1b, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#750833", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses when searched for Offensive weapons") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
# filter out object of search 3
vis1c_sf <- offense_sf %>%
filter(object_of_search== "Stolen goods")
# Count how many S&S happened inside each ward
london_vis1c <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis1c_sf)))
p3 <- ggplot(data = london_vis1c, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#7D0263", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses when searched for Stolen goods") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
(p1 + p2 +p3) + plot_annotation(
title = 'Offensive weapons is a more geographically dispersed crime than controlled drugs and stolen goods',
subtitle = 'Geographical distribution of detected crimes by object of search in London from October 2017 until September 2020',
caption = 'Source: data.police.uk', theme = theme(plot.title = element_text(size = 18, face= "bold"), plot.subtitle = element_text(size = 15)))

# filter out ethnicity 1
vis2a_sf <- offense_sf %>%
filter(officer_defined_ethnicity== "White")
# Count how many S&S happened inside each ward
london_vis2a <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis2a_sf)))
p1 <- ggplot(data = london_vis2a, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#B56200", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses commited by White") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
# filter out ethnicity 2
vis2b_sf <- offense_sf %>%
filter(officer_defined_ethnicity== "Black")
# Count how many S&S happened inside each ward
london_vis2b <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis2b_sf)))
p2 <- ggplot(data = london_vis2b, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#A43407", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses commited by Black") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
# filter out ethnicity 3
vis2c_sf <- offense_sf %>%
filter(officer_defined_ethnicity== "Asian")
# Count how many S&S happened inside each ward
london_vis2c <- london_wgs84 %>%
mutate(count = lengths(
st_contains(london_wgs84,
vis2c_sf)))
p3 <- ggplot(data = london_vis2c, aes(fill = count)) +
geom_sf() +
scale_fill_gradient(low = "white", high = "#0C4C47", na.value = "transparent")+
theme_minimal()+
coord_sf(datum = NA) + #remove coordinates
labs(title = "Detected offenses commited by Asian") +
theme(axis.text = element_blank(),
strip.text = element_text(color = "white")
)+
NULL
(p1 + p2 +p3) + plot_annotation(
title = 'Detected offences commited by white people are more concentraded in the centre of London compared to Black and Asians',
subtitle = 'Geographical distribution of detected crimes by ethnicity in London from October 2017 until September 2020',
caption = 'Source: data.police.uk', theme = theme(plot.title = element_text(size = 18, face= "bold"), plot.subtitle = element_text(size = 15)))
