Mini journal logo  Home Article Summary Main text Issue Contents

Re-discovering Archaeological Discoveries. Experiments with reproducing archaeological survey analysis

Néhémie Strupler

Cite this as: Strupler, N. 2021 Re-discovering Archaeological Discoveries. Experiments with reproducing archaeological survey analysis, Internet Archaeology 56. https://doi.org/10.11141/ia.56.6

Appendix: Code used to reproduce the analyses

The computational analyses for each dataset were carried out with the R language.

The Boeotia Project | The Sydney Cyprus Survey Project | The Pyla-Koutsopetria Archaeological Project

The Boeotia Project


# Annex: The Beotia project recoding

# Load the library
library(magrittr)
library(sf)
library(gdata)
library(maptools)
library(sp)
library(spatstat)

#
# ## Import the data
#
# Data from the ‘Appendix D’ of the CD-ROM i.e. ‘The
# Off-site Transect Collections’, file
# ‘01Boeotia.xls’.
#

datum <- ‘./111-beotia/2017-11-07’
file_name <- paste0(datum, ‘-Boeotia.xls’)
beotia <- gdata::read.xls(file_name,
                     header = TRUE, skip = 1,
                     stringsAsFactor = FALSE)

# Minimal cleaning to transform data in a strict tab
beotia <- beotia[-c(378:379), ]
beotia[, -1] <- as.data.frame(sapply(beotia[, -1],
                                     as.numeric))
beotia <- beotia[beotia$X_coord != 0L, ]

# Transforming the data in a spatial object
beotia_sf <- st_as_sf(beotia, coords = c(‘X_coord’, ‘Y_coord’))
beotia_proj <- ‘+proj=utm +zone=34S +ellps=WGS84’
st_crs(beotia_sf) <- beotia_proj

#'
#' ## Creating Polygons representing the transects
#'

beotia_sp <-  as(beotia_sf, ‘Spatial’)
hull <- rgeos::gBuffer(beotia_sp, byid = FALSE, width = 70)
hull <- rgeos::gBuffer(hull, byid = FALSE, width = 40)

win <- owin(xrange = c(min(beotia$X_coord),
                       max(beotia$X_coord)),
            yrange = c(min(beotia$Y_coord),
                       max(beotia$Y_coord)))
ppp <- ppp(beotia$X_coord, beotia$Y_coord, win)
beotia_voronoi <- dirichlet(ppp)
beotia_voronoi <- as(beotia_voronoi, ‘SpatialPolygons’)


beotia_voronoi <- rgeos::gIntersection(beotia_voronoi, hull, byid = TRUE)
proj4string(beotia_voronoi) <- CRS(beotia_proj)
beotia_voronoi <- SpatialPolygonsDataFrame(beotia_voronoi,
                                           beotia,
                                           match.ID = FALSE)
# Check the result with
#plot(beotia_voronoi)
#  title(main = ‘Tesselation’)

#'
#'
#' ### Plot of ID from the North of survey region
#'

dev.off()
png(‘rfig/beotia_id_north.png’,
     width = 580, height = 580,
     units = ‘px’, pointsize = 12,
      bg = ‘white’,  res = NA)

par(mfrow = c(1, 1), mar = c(0, 0, 0, 0))
plot(beotia_sp,
     pch = 1,
     col = ‘#FFFFFF’,
     xlim = c(14225, 15900),
     ylim = c(3300, 5376))
text(x = jitter(unname(coordinates(beotia_sp)[, 1])),
       y = unname(coordinates(beotia_sp)[, 2]),
       labels = beotia_sp$Transect, cex = 0.7)
dev.off()
#'
#' ### Plot of ID from the North of survey region
#'

par(mfrow = c(1, 1), mar = c(2, 2, 2, 2))
plot(beotia_sp,
     pch = 1,
     col = ‘#FFFFFF’,
     xlim = c(14100, 15800),
     ylim = c(2500, 3600))
text(x = jitter(unname(coordinates(beotia_sp)[, 1])),
       y = unname(coordinates(beotia_sp)[, 2]),
       labels = beotia_sp$Transect, cex = 0.7)

#'
#' ### Plot of ID from the South East of survey region
#'

png(‘rfig/beotia_id_se.png’,
     width = 680, height = 680,
     units = ‘px’, pointsize = 12,
      bg = ‘white’,  res = NA)
plot(beotia_sp,
     pch = 1,
     col = ‘#FFFFFF’,
     xlim = c(15800, 17355),
     ylim = c(2500, 3600))
text(x = jitter(unname(coordinates(beotia_sp)[, 1])),
       y = unname(coordinates(beotia_sp)[, 2]),
       labels = beotia_sp$Transect, cex = 0.7)
dev.off()

#'
#' ### Sherds density
#'

# Rewrite of function get_col from rkeos
  bgyor.pal <-  c(‘blue’, ‘green’, ‘yellow’, ‘orange’, ‘red’)
  bgyor <- grDevices::colorRampPalette(bgyor.pal, space = ‘rgb’)
  sherds_col <-
    classInt::findColours(
       classInt::classIntervals(beotia_voronoi$Ceram,
                                10,
                                style = ‘fisher’),
                          pal = bgyor(10)
    )

png(‘rfig/beotia_sherd_density.png’,
     width = 580, height = 580,
     units = ‘px’, pointsize = 12,
      bg = ‘white’,  res = NA)
plot(beotia_voronoi, col = sherds_col)
  title(main = ‘Survey Units’)
  legend(‘topright’,
         title = ‘Sherd Density’,
         fill = sherds_col %>%
           attr(., ‘palette’),
         legend = sherds_col %>%
           attr(., ‘table’) %>%
           names
         )
dev.off()

The Sydney Cyprus Survey Project


# Annex: The SCSP

# Load libraries
library(sf)
library(magrittr)
library(raster)
library(dplyr)
old_par <- par()

# Fixed_values
#
# From the explanation we learn the GIS projection
projection <- c(paste(‘+proj=utm +zone=36 +north +ellps=WGS84’))


# Import the data

     datum <- ‘./112-scsp/’ # Set your folder with the files
 zip_file <- paste0(datum, ‘Units.zip’)
# Download the data
options(HTTPUserAgent='Backward_Repro')

if(!file.exists(zip_file)){
  download.file(‘http://archaeologydataservice.ac.uk/catalogue/adsdata/arch-323-1/dissemination/zip/Units.zip’,
              zip_file)
  unzip(zip_file, exdir=datum)
}


 file_name <- paste0(datum, ‘Units.shp’)
#
# Units
scsp_units <- sf::st_read(file_name, quiet  = TRUE)
sf::st_crs(scsp_units) <- projection
scsp_units$UNIT <- as.character(scsp_units$UNIT)



#
# ## First Look
#
# Ploting `SHERD_COUN` with a background image
# provided by the European Spatial Agency through its
# Copernicus hub (Details can be found on my gitlab
# project `copernicus-data` )
# 
#'

#+ SCSP_sherd_density_plot, fig.height=6, fig.width=11
bg_path <- paste0(‘./copernicus-data/’,
                  ‘data/sydney_cyprus_survey_project/scsp_bg.tif’)
scsp_bg <-  brick(bg_path)

png(‘rfig/scsp_sherd_density_plot.png’, width=1100, height=500)
plotRGB(scsp_bg, 3, 2, 1, stretch = ‘hist’, interpolate = TRUE,
        alpha = 150)


# Rewrite of function get_col from rkeos
  bgyor.pal <-  c(‘blue’, ‘green’, ‘yellow’, ‘orange’, ‘red’)
  bgyor <- grDevices::colorRampPalette(bgyor.pal, space = ‘rgb’)
  sherds_col <-
    classInt::findColours(
       classInt::classIntervals(scsp_units$SHERD_COUN,
                                10,
                                style = ‘fisher’),
                          pal = bgyor(10)
    )

plot(as(scsp_units,’Spatial’), add = TRUE,
     col = as.character(sherds_col))

legend(‘bottomleft’,
         title = ‘Sherds Count per unit’,
         fill = attr(sherds_col, ‘palette’),
         legend = names(attr(sherds_col, ‘table’))
         )
dev.off()

#
# ## 'Ground' and 'Adjusted' visibility
#

png(‘rfig/scsp_visibilities_relation.png’,
    width=1500, height=1500,
    res=200)
plot(scsp_units$GROUND_VIS, scsp_units$ADJVIZ, ann = FALSE,
     xlim = c(-0,105), xaxs=‘i’)
abline(lm(scsp_units$ADJVIZ~scsp_units$GROUND_VIS))
sumlm <- summary(lm(scsp_units$ADJVIZ~scsp_units$GROUND_VIS))
sumlm$coefficients <- round(sumlm$coefficients,2)
title(main = ‘Relation between the \’Ground Visibility\’ and
      the \’Adjusted Visibility\’’,
      xlab = ‘Ground Visibility’,
      ylab = ‘Adjusted Visibility’)
text(60, 80, paste(‘Adjusted Visibility’,
                    ‘≈’,
                    sumlm$coefficients[1,1],
                    ‘+’,
                    sumlm$coefficients[2,1],
                   ‘*’,
                   ‘Ground Visibility’))
text(35, 98,
     paste(‘R²’,
                    ‘=‘,
                    round(sumlm$r.squared,3)))
dev.off()


#
# ## Adjusted pottery count
#

## vc = correction for visibility
## cp = counted pottery
## v = visibility

vc <- function(cp, v) {
  (cp * (100/(100 - ((100 - v)/4)) )) - cp
}

## bg = background confusion
## bgc =  background confusion correction
bgc <- function(cp, bg) {
  (cp * (100/bg )) - cp
}
## Equation from book that doesn't work
#  (cp * (100/(100 - (100-bg) ))) - cp

## acp = adjusted counted pottery
acp <- function(cp,v,bg) {
  round(cp + vc(cp,v) + bgc(cp,bg),0)
}

# Add new column to scsp with to control weighting data
scsp_units %>%
  mutate(acp = acp(TOTALSHERD,ADJVIZ,ADJBG)) -> scsp_units




#'
#' Plot Pottery Counts
#'

png(‘rfig/scsp_pottery_counts_relation.png’,
    width=1500, height=1500,
    res=200)
palette(rainbow(3))
plot(scsp_units$TOTALSHERD, scsp_units$acp, col = grey(0.1, 0.5),
     ylim = c(0, 6248), ann = FALSE)
points(scsp_units$TOTALSHERD, scsp_units$PROJSHERD, pch = 21,
     bg = as.factor(scsp_units$ADJBG))
title(main = ‘Relation between Pottery Count and
      Adjusted Pottery Count’,
      xlab = ‘Pottery Count’,
      ylab = ‘Adjusted Pottery Count’)
legend(‘topleft’,
       legend = c(‘High bgc’,
                  ‘Middle bgc’,
                  ‘Low bgc’,
                  ‘Reproduction’),
       pt.bg = c(palette(),’white’),
       pch = c(21,21,21,21),
       col = c(‘black’,’black’, ‘black’, grey(0.1,0.5)))
palette(‘default’)
dev.off()


#+ SCSP_sherd_density_proj, fig.height=6, fig.width=11

png(‘rfig/scsp_weighted_sherds.png’, width=1100, height=500)
plotRGB(scsp_bg, 3, 2, 1, stretch = ‘hist’, interpolate = TRUE,
        alpha = 150)

# Rewrite of function get_col from rkeos
  sherds_col <-
    classInt::findColours(
       classInt::classIntervals(scsp_units$PROJSHERD,
                                10,
                                style = ‘fisher’),
                          pal = bgyor(10)
    )


plot(as(scsp_units,’Spatial’), add = TRUE,
     col = as.character(sherds_col))

legend(‘bottomleft’,
         title = ‘Sherds Count per unit’,
         fill = attr(sherds_col, ‘palette’),
         legend = names(attr(sherds_col, ‘table’))
         )
dev.off()


#+ SCSP_sherd_density_proj_recoded, fig.height=6, fig.width=11

png(‘rfig/scsp_weighted_sherds_recoded.png’, width=1100, height=500)
plotRGB(scsp_bg, 3, 2, 1, stretch = ‘hist’, interpolate = TRUE,
        alpha = 150)


# Rewrite of function get_col from rkeos
  sherds_col <-
    classInt::findColours(
       classInt::classIntervals(scsp_units$acp,
                                10,
                                style = ‘fisher’),
                          pal = bgyor(10)
    )

plot(as(scsp_units,’Spatial’), add = TRUE,
     col = as.character(sherds_col))

legend(‘bottomleft’,
         title = ‘Sherds Count per unit’,
         fill = attr(sherds_col, ‘palette’),
         legend = names(attr(sherds_col, ‘table’))
         )
dev.off()

The Pyla-Koutsopetria Archaeological Project


# Annex: The PKAP 

# Load the library
library(magrittr)
library(sf)
library(gdata)
library(maptools)
library(sp)

# Fixed values
projection <- c(paste(‘+proj=utm +zone=35n +datum=WGS84 +units=m’,
                      ‘+no_defs +ellps=WGS84 +towgs84=0,0,0’))
epsg <- 32635
old_par <- par()
datum <- ‘./113-pkap/’


#
# Download the data
options(HTTPUserAgent='Backward_Repro')

data_units  <- paste0(datum, ‘data_Unit.csv’)

if(!file.exists(data_units)){
  download.file(‘http://artiraq.org/static/opencontext/revised-tables/09c85ef2dd6784a0569fdc283a7d550c.csv’,
              data_units)
}

data_finds  <- paste0(datum, ‘data_Master_Finds.csv’)

if(!file.exists(data_finds)){
download.file(‘http://artiraq.org/static/opencontext/revised-tables/745df886025114d1fa7afdb464ac5ac6.csv’,
              data_finds)
}
#
# Import the data
#


## Units

#+ read_units
file_name <- paste0(datum, ‘data_Unit.csv’)
units <- read.csv(file_name, check.names = TRUE,
                  fileEncoding = ‘UTF-8’)
colnames(units) <- gsub(‘\\.’,’’, colnames(units))
units <- st_as_sf(units, coords = c(‘LongitudeWGS84’,’LatitudeWGS84’))
st_crs(units) <- 4326
units <- st_transform(units, epsg)
# Delete unused long variables and empty data
units <- units[,!grepl(‘Project’, colnames(units))]
units <- units[,!grepl(‘ItemCategory’, colnames(units))]
units <- units[,!grepl(‘SurveyUnit’, colnames(units))]
units <- units[,!grepl(‘Geospatialnote’, colnames(units))]
units <- units[,!grepl(‘link’, colnames(units))]
units <- units[,!grepl(‘Hasnote’, colnames(units))]
units <- Filter(function(x)!all(is.na(x)), units)

# Rename URI in Unit URI for matching with batch files
# (see below), removes other *URI*
units$UnitURI <- units$URI
units <- subset(units, select = c(-URI))
units$UnitURI <- as.character(units$UnitURI)
units <- units[order(units$UnitURI),]

#+ read_Batches
file_name <- paste0(datum, ‘data_Master_Finds.csv’)
batches <- read.csv(file_name,
                 check.names = FALSE,
                 fileEncoding = ‘UTF-8’)

# Deleting weird characters
sanitizing_colnames <- function(vec) {
  vec <- gsub(‘[(|)]’,’’, vec)
  vec <- gsub(‘BCE/CE’,’’, vec)
  vec <- gsub(‘-’,’’, vec)
  vec <- gsub(‘ ‘,’’, vec)
}
colnames(batches) <- sanitizing_colnames(colnames(batches))

# Limit subset to most important variables
batches <- base::subset(batches, select =
                       c(‘Label’,’Authorship’,
                         ‘LatitudeWGS84’,’LongitudeWGS84’,
                         ‘EarlyDate’,’LateDate’,’Context3’,’Context4’,
                         ‘Context7’,’Hastype[Label]’,
                         ‘Hastype[Source]’,’ReaderNumber’,’Quantity’,
                         ‘Weight’,’DateRead’,’ExtantPart’,’FindColor’,
                         ‘Chronotype’,’Material’,’Fabricgroup’,
                         ‘Period’,’Un’,’Startdate’,
                         ‘Enddate’,’Duration’,’Recordedby’,
                         ‘Decoration’,’Comments’,
                         ‘Contains’,’Shape’,’Shapenote’,’DateRead’,
                         ‘ContextURI’))

# Change name of Context URI to make it matches later with Unit
names(batches)[names(batches) == ‘ContextURI’] <- ‘UnitURI’
batches$UnitURI <- as.character(batches$UnitURI)
batches <- batches[order(batches$UnitURI),]


# Exlude 'experimental batches' (hoovering)
batches <- batches[grep(‘exp’,batches$Label, invert = TRUE),]

# Check results ---
sum(batches$Quantity, na.rm = TRUE)
# To compare with the 16784 artifacts mentioned in Caraher 2014:174

# Make a spatial batches
batches_pts <- st_as_sf(batches,
                        coords = c(‘LongitudeWGS84’,’LatitudeWGS84’))
st_crs(batches_pts) <- 4326
batches_pts <- st_transform(batches_pts, epsg)


sp_units <-  as(units, ‘Spatial’)
# Make a hull by making a buffer and remove holes
hull <- rgeos::gBuffer(sp_units,byid = FALSE, width = 75, quadsegs = 1)

# Removes holes (2 lines code from Spacedman https://stackoverflow.com/a/12665795)
outerRings = Filter(function(f){f@ringDir == 1},hull@polygons[[1]]@Polygons)
hull = SpatialPolygons(list(Polygons(outerRings,ID = 1)))
proj4string(hull) <- projection

# Make tessellation from a point pattern dataset
library(spatstat)
wind <- owin(xrange = c(bbox(sp_units)[1,1],bbox(sp_units)[1,2]),
             yrang = c(bbox(sp_units)[2,1],bbox(sp_units)[2,2]))
ppp <- ppp(coordinates(sp_units)[,1],coordinates(sp_units)[,2], wind)
tesseled <- dirichlet(ppp)
tesseled <- as(tesseled, ‘SpatialPolygons’)
proj4string(tesseled) <- projection

# Intersect tessellation with hull and add data
units <- rgeos::gIntersection(tesseled, hull, byid = TRUE)
units <- SpatialPolygonsDataFrame(units, sp_units@data, match.ID = FALSE)
units <- st_as_sf(units)

#
# With this transformation, it is now possible to check the results
# with an output of the publication, let's see if it's working this
# time.
#

wb <- grDevices::colorRampPalette(c(‘white’,’black’))
vis_col <- classInt::findColours(classInt::classIntervals(units$PercentVisible,10), pal = wb(10))

png(‘rfig/pkap_visibility_plot.png’,
    width=1500, height=1500,
    res=200)

plot(units[‘PercentVisible’], col = vis_col)
  legend(‘topright’,
         title(main = ‘Visibility Percentages’),
         fill = attr(vis_col, ‘palette’),
         legend = names(attr(vis_col, ‘table’))
         )

dev.off()


#sherds_col <- rkeos::get_col(units$Sherds)
#plot(units[‘Sherds’], col = sherds_col)
#  legend(‘topright’,
#         title(main = ‘Sherds density’),
#         fill = attr(sherds_col, ‘palette’),
#         legend = names(attr(sherds_col, ‘table’))
#         )
#



# ## Batches
#
# Each batch has the same coordinates as the unit. This
# code attributes random coordinates inside each
# unit .
#'
source(paste0(datum, ‘rkeos_function_prk_creat_pts.R’))

sp_batches <- prk_create_pts(batches, units, common_var = ‘UnitURI’)


#' As each batch is the representation of multiple
#' artefacts, we create
#' a spatial MULTIPOINTS object for each ′batched' artefacts

expand_batches <- function(quantity) {
  dated_sherd <- data.frame(Batch = rep(batches$Label, quantity),
                            EarlyDate = rep(batches$EarlyDate, quantity),
                            LateDate = rep(batches$LateDate, quantity),
                            Period = rep(batches$Period, quantity),
                            UnitURI = rep(batches$UnitURI, quantity),
                            Context4 = rep(batches$Context4, quantity),
                            Context7 = rep(batches$Context7, quantity),
                            Material = rep(batches$Material, quantity),
                            Part = rep(batches$ExtantPart, quantity))
}

batches[is.na(batches$Quantity), ‘Quantity’] <- 0L # Change NA to 0
dated_sherd <- expand_batches(batches$Quantity)

sp_dated_sherd <- prk_create_pts(dated_sherd,
                             units, common_var = ‘UnitURI’)

#'
#' this, I concentrated on the example of the Bronze
#' Age. There is table in the book that recapitulates
#' the data:
#'
#' It shows that we could expect 205 sherds
#' (7+6+19+173) for the Bronze Age, classed in 4
#' categories. This is what we have in the data
#'

#+ book_BAtable, echo=FALSE
ba_perdiods <- c(‘Late Cypriot’, ‘Late Cypriot II’,
                 ‘Late Cypriot II-Late Cypriot III’,
                 ‘Bronze Age’)



ba_sherds <- dated_sherd[dated_sherd$Period %in% ba_perdiods &
              dated_sherd$Material %in% ‘pottery’ ,]

ba_sherds <- droplevels(ba_sherds)
knitr::kable(addmargins(table(ba_sherds$Period,ba_sherds$Part),1:2))


#'
#' Yes it works fine, with the same results. Now let us see the map
#' produced for the publication
#'
#+ book_BAmap, echo=FALSE

#'
#' Now, let's see a plot of the data
#'

png(‘rfig/pkap_lba_plot.png’,
    width=1500, height=1500,
    res=200)
plot(sf::as_Spatial(units[‘Ceramicdensity’]), col = ‘white’,
     main = ‘Distribution map of Bronze Age periods’)
plot(sp_dated_sherd[sp_dated_sherd$Period %in%
                      grep(‘Cypri|Bronze’,unique(sp_dated_sherd$Period),
                           value = TRUE),4],
     cex = 0.3,
     pch = 19,
     col = ‘black’, add = TRUE)

dev.off()


Internet Archaeology is an open access journal based in the Department of Archaeology, University of York. Except where otherwise noted, content from this work may be used under the terms of the Creative Commons Attribution 3.0 (CC BY) Unported licence, which permits unrestricted use, distribution, and reproduction in any medium, provided that attribution to the author(s), the title of the work, the Internet Archaeology journal and the relevant URL/DOI are given.

Terms and Conditions | Legal Statements | Privacy Policy | Cookies Policy | Citing Internet Archaeology

Internet Archaeology content is preserved for the long term with the Archaeology Data Service. Help sustain and support open access publication by donating to our Open Access Archaeology Fund.