Skip to contents

Class representing pairwise distances between multiple multidimensional distributions, when the distance is calculated as a sum of marginal distribution distances.

Usage

# S4 method for class 'DistSum'
show(object)

# S4 method for class 'matrix'
DistSum(object)

# S4 method for class 'list'
DistSum(object)

# S4 method for class 'DistSum'
dim(x)

# S4 method for class 'DistSum'
dimnames(x)

# S4 method for class 'DistSum,list'
dimnames(x) <- value

# S4 method for class 'DistSum,ANY'
dimnames(x) <- value

# S4 method for class 'DistSum'
ncol(x)

# S4 method for class 'DistSum'
colnames(x)

# S4 method for class 'DistSum'
colnames(x) <- value

# S4 method for class 'DistSum'
nrow(x)

# S4 method for class 'DistSum'
rownames(x)

# S4 method for class 'DistSum'
rownames(x) <- value

nFeatures(x)

# S4 method for class 'DistSum'
featureNames(object)

# S4 method for class 'DistSum'
featureNames(object) <- value

# S4 method for class 'DistSum,ANY,ANY,ANY'
x[i, j, ..., drop = TRUE]

# S4 method for class 'DistSum,ANY,ANY,missing'
x[i, j, ..., drop = TRUE]

# S4 method for class 'DistSum,ANY,missing,ANY'
x[i, j, ..., drop = TRUE]

# S4 method for class 'DistSum,ANY,missing,missing'
x[i, j, ..., drop = TRUE]

# S4 method for class 'DistSum'
as.matrix(x, whichFeatures = NULL)

distByFeature(distObj)

Arguments

object

a DistSum object

x

a DistSum object

value

the new feature names to be assigned

i

the array index

j

the column index

...

other arguments (not used)

drop

not supported (set to FALSE)

whichFeatures

either an array of feature names, or an array of feature indices, or NULL If NULL, the full distance (for all features) will be returned If not NULL, whichFeatures array should not contain duplicates

distObj

a DistSum object

Value

nothing

a data.frame, with 3 columns:

  • featureName : self explainatory

  • distanceContrib : unidimensional distance along the corresponding feature

  • percentage : percentage of feture distance w.r.t. full distance

Slots

pwDistPerFeature

A list of matrix objects storing the contribution of each feature (dimension) of the multidimensional distributions to the full pairwise distance matrix. Note these matrices are not necessarily square symmetric matrices, as the DistSum could be occasionally used to store a given block of a bigger distance matrix.

Examples


# create a dummy distance matrix 
# to do this we use `nPoints` points 
# in an euclidian space of `nFeat` dimensions
nPoints <- 5
nFeat <- 7
M <- matrix(data = rnorm(nPoints * nFeat), ncol = nFeat)
rownames(M) <- paste0("point", 1:nPoints)
colnames(M) <- paste0("feat", 1:nFeat)

DList <- lapply(colnames(M),
FUN = function(colName) {
    D <- as.matrix(dist(
        M[, colName, drop = FALSE]))
    D
})

D <- Reduce(x = DList, f = function(A, B) A + B)

names(DList) <- colnames(M)

# Example of creating of a DistSum object based on the full distance matrix
distObj1 <- DistSum(D)
show(distObj1)
#> `DistSum` object containing pairwise distances between distributions
#>  and their decomposition as a sum of feature contributions
#> Matrix dimensions:  5 5 
#> Nb of features:  1 
#> Feature names:  
#> Full distance matrix: 
#>          point1   point2    point3    point4    point5
#> point1 0.000000 8.851750  9.407961  7.957533  9.126406
#> point2 8.851750 0.000000  8.827115  7.094430  4.580522
#> point3 9.407961 8.827115  0.000000 10.959452 10.329413
#> point4 7.957533 7.094430 10.959452  0.000000  7.613967
#> point5 9.126406 4.580522 10.329413  7.613967  0.000000

# Example of creation of a DistSum object based on a list of matrices
# representing the additive contribution of each feature
distObj2 <- DistSum(DList)

show(distObj2)
#> `DistSum` object containing pairwise distances between distributions
#>  and their decomposition as a sum of feature contributions
#> Matrix dimensions:  5 5 
#> Nb of features:  7 
#> Feature names:  feat1 feat2 feat3 feat4 feat5 feat6 feat7 
#> Full distance matrix: 
#>          point1   point2    point3    point4    point5
#> point1 0.000000 8.851750  9.407961  7.957533  9.126406
#> point2 8.851750 0.000000  8.827115  7.094430  4.580522
#> point3 9.407961 8.827115  0.000000 10.959452 10.329413
#> point4 7.957533 7.094430 10.959452  0.000000  7.613967
#> point5 9.126406 4.580522 10.329413  7.613967  0.000000

# getting dimensions
myDim <- dim(distObj2) # c(nPoints, nPoints)
ncols <- ncol(distObj2) # nPoints
nrows <- nrow(distObj2) # nPoints
nFeats <- nFeatures(distObj2) # nFeat
myFeatNames <- featureNames(distObj2) # paste0("feat", 1:nFeat)
myRowNames <- rownames(distObj2) # paste0("point", 1:nPoints)
myRowNames <- colnames(distObj2) # paste0("point", 1:nPoints)

# get full distance matrix
dd <- as.matrix(distObj2)

# get partial distance matrix for feature 1
dd1 <- as.matrix(distObj2, whichFeatures = 1)

# same thing, using feature name
dd1bis <- as.matrix(distObj2, whichFeatures = "feat1")

# getting partial distance for feature 1 & 2

ddPart <- as.matrix(distObj2, whichFeatures = colnames(M)[1:2])

# getting distance by feature
DF <- distByFeature(distObj2)