2018-02-09

  • In R
  • In Python
  • API
  • Web

In R

Getting started

Additional libraries

Occurrence

occurrence(scientificname = NULL, year = NULL, obisid = NULL, aphiaid = NULL,
  groupid = NULL, resourceid = NULL, nodeid = NULL, areaid = NULL, startdate = NULL,
  enddate = NULL, startdepth = NULL, enddepth = NULL, geometry = NULL, qc = NULL,
  fields = NULL, verbose = FALSE)
ptevol <- occurrence("Pterois volitans") # red lionfish
## 
Retrieved 2000 records of 2436 (82%)
Retrieved 2436 records of 2436 (100%)
colnames(ptevol)
##  [1] "id"                            "decimalLongitude"             
##  [3] "decimalLatitude"               "depth"                        
##  [5] "institutionCode"               "collectionCode"               
##  [7] "catalogNumber"                 "individualCount"              
##  [9] "datasetName"                   "phylum"                       
## [11] "order"                         "family"                       
## [13] "genus"                         "scientificName"               
## [15] "originalScientificName"        "scientificNameAuthorship"     
## [17] "obisID"                        "resourceID"                   
## [19] "species"                       "qc"                           
## [21] "aphiaID"                       "speciesID"                    
## [23] "scientificNameID"              "class"                        
## [25] "eventDate"                     "yearcollected"                
## [27] "basisOfRecord"                 "locality"                     
## [29] "accessRights"                  "collectionID"                 
## [31] "habitat"                       "higherClassification"         
## [33] "higherGeography"               "language"                     
## [35] "locationRemarks"               "modified"                     
## [37] "occurrenceID"                  "recordedBy"                   
## [39] "recordNumber"                  "rights"                       
## [41] "rightsHolder"                  "specificEpithet"              
## [43] "stateProvince"                 "type"                         
## [45] "identifiedBy"                  "geodeticDatum"                
## [47] "bibliographicCitation"         "minimumDepthInMeters"         
## [49] "maximumDepthInMeters"          "coordinateUncertaintyInMeters"
## [51] "eventID"                       "footprintWKT"                 
## [53] "occurrenceRemarks"             "occurrenceStatus"             
## [55] "waterBody"                     "fieldNumber"                  
## [57] "footprintSRS"                  "references"                   
## [59] "taxonRank"                     "county"                       
## [61] "lifestage"                     "dynamicProperties"            
## [63] "dateIdentified"                "vernacularName"               
## [65] "coordinatePrecision"           "eventTime"                    
## [67] "associatedMedia"               "datasetID"                    
## [69] "identificationRemarks"         "eventRemarks"                 
## [71] "ownerInstitutionCode"          "taxonomicStatus"              
## [73] "island"                        "islandGroup"                  
## [75] "materialSampleID"              "continent"

Filter by year

occurrence("Pterois volitans", year = 1991, 
           fields = c("id", "decimalLongitude", "decimalLatitude", "year"))
## 
Retrieved 11 records of 11 (100%)
##           id decimalLongitude decimalLatitude year
## 1    8317355         153.6300       -27.60000   NA
## 2   10918348          55.6000       -20.86667   NA
## 3  702259173         153.6670       -28.40000   NA
## 4  702263624         153.0330       -30.80000   NA
## 5  702428002         145.8500        -5.16667   NA
## 6  703286139         153.6330       -28.39166   NA
## 7  703690415         117.7808       -19.99333   NA
## 8  703690793         116.9650       -19.76167   NA
## 9  703690899         116.7983       -19.62833   NA
## 10 703691118         118.7017       -18.94334   NA
## 11 703691119         117.7317       -20.01917   NA

Filter by date

occurrence("Pterois volitans", startdate = '1980-03-01', enddate = '1980-10-30', 
           fields = c("id", "decimalLongitude", "decimalLatitude", "eventDate"))
## 
Retrieved 3 records of 3 (100%)
##          id decimalLongitude decimalLatitude           eventDate
## 1    868932         158.1986        7.008333 1980-09-05 07:00:00
## 2 399697683        -177.8700      -29.240000 1980-08-06 10:00:00
## 3 702370348         116.0660      -20.283000 1980-05-31 10:00:00

Filter by depth

occurrence("Pterois volitans", startdepth = 100, enddepth = 200,
           fields = c("id", "decimalLongitude", "decimalLatitude", "depth"))
## 
Retrieved 10 records of 10 (100%)
##           id decimalLongitude decimalLatitude depth
## 1  703597136         127.6836      -12.391670 116.0
## 2  703597267         127.9425      -12.925975 100.0
## 3  703597268         127.9511      -12.750695 102.5
## 4  703597370         134.9856       -8.843335 107.0
## 5  703598330         113.3336      -23.551390 120.0
## 6  703690903         114.9292      -20.414165 122.5
## 7  703691121         117.5433      -19.020835 131.0
## 8  703691470         118.1525      -18.963335 119.5
## 9  703691925         117.7925      -19.168335 101.5
## 10 732957007         120.1400       13.880000 131.5

Filter by polygon

With http://iobis.org/maptool) we can create a WKT for the Perhentian and Redang Islands and use this to query all occurrences.

islands <- occurrence(geometry = 
      paste("POLYGON ((102.68921 6.05862","102.57111 5.95346",
                      "103.07785 5.49980","103.25226 5.62555",
                      "103.07648 5.87970","102.68921 6.05862))",sep=", "))
## 
Retrieved 532 records of 532 (100%)

Filter by polygon

leafletmap(islands)

Visualize data through time

islands$year <- as.numeric(format(as.Date(islands$eventDate), "%Y"))
ggplot(islands[!is.na(islands$year),], aes(x = year, fill = phylum)) +
  geom_histogram(binwidth = 5) +
  scale_fill_brewer(palette = "Paired")

Visualize multiple facets

lag <- occurrence("Lagis", resourceid=c(4312, 222))
ggplot() +
 geom_histogram(data = lag, aes(x = yearcollected), binwidth = 2) +
 facet_grid(resourceID ~ species)

## 
Retrieved 668 records of 668 (100%)

Filter by dataset

datasets <- dataset(q="Adriatic-INTERREG")
## 
Retrieved 6 records of 6 (100%)
datasets$name
## [1] "Meiobenthos North Adriatic-INTERREG-FVG-Projects" 
## [2] "Mesozooplankton North Adriatic-INTERREG Project"  
## [3] "Microzooplankton North Adriatic-INTERREG Project" 
## [4] "Microphytobenthos North Adriatic-INTERREG Project"
## [5] "Macrobenthos North Adriatic-INTERREG-FVG Project" 
## [6] "Phytoplankton North Adriatic-INTERREG-FVG Project"

Filter by dataset

occ <- occurrence(resourceid = datasets$id, year = 1998,
           fields = c("id", "decimalLongitude", "decimalLatitude", "resourceID"))
## 
Retrieved 1553 records of 1553 (100%)
head(occ)
##          id decimalLongitude decimalLatitude resourceID
## 1 719561828         13.56467        45.69350       3494
## 2 719561980         13.59617        45.66383       3494
## 3 719561996         13.56467        45.69350       3494
## 4 719561997         13.56467        45.69350       3494
## 5 719561998         13.56467        45.69350       3494
## 6 719561999         13.56467        45.69350       3494

Filter by quality control

Vandepitte L., Bosch S., Tyberghein L., Waumans F., Vanhoorne B., Hernandez F., De Clerck O., & Mees J. (2015) Fishing for data and sorting the catch: assessing the data quality, completeness and fitness for use of data in marine biogeographic databases. Database, 2015. http://dx.doi.org/10.1093/database/bau125

?qcflags

Categories of quality control flags:

  • Data format
  • Taxonomy
  • Completeness
  • Geography
  • Depth
  • Outliers

Some filters: quality control

  • 3. Taxonomy: is the taxon level genus or lower?
  • 4. Geography (lat/lon): are the latitude/longitude values different from zero?
  • 5. Geography (lat/lon): are the latitude/longitude values within their possible boundaries? (world coordinates)
  • 7. Completeness (date/time): is the sampling year (start/end) completed and valid?
qc3457 <- occurrence(resourceid = datasets$id, qc = c(3,4,5,7), year = 1998)
## 
Retrieved 1005 records of 1005 (100%)

Visualize QC

  • 28. Species outliers (geography): is the observation within three IQRs from the first & third quartile distance to the geographic centroid of this taxon?
acistu <- occurrence("Acipenser sturio")
## 
Retrieved 66 records of 66 (100%)
acistu$qcnum <- qcflags(acistu$qc, c(28))
colors <- c("#ee3300", "#86b300")[acistu$qcnum + 1]
popup <- paste0(acistu$datasetName, "<br/>", acistu$catalogNumber, 
                "<br/><a href=\"http://www.iobis.org/explore/#/dataset/", 
                acistu$resourceID, "\">OBIS dataset page</a>")

Visualize QC

leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    popup = popup, radius = 3.5, weight = 0, fillColor = colors, fillOpacity = 1,
    lat = acistu$decimalLatitude, lng = acistu$decimalLongitude)

Visualize QC

acistu$qctxt <- c("Not ok", "Ok")[acistu$qcnum + 1]
ggplot() +
  geom_polygon(data=map_data("world"),aes(x=long,y=lat,group=group),fill="#dddddd")+
  geom_point(data=acistu, aes(x=decimalLongitude, y=decimalLatitude, color=qctxt))