Discrepancy in IS12_speaker_trait.conf
So I used the process described in #109 to extract IS12_speaker_trait functional features using the wrapper. When I do this, I end up with 5757 features instead of the 6125 features mentioned in the paper. When I use IS12_speaker_trait_compat with SMILExtract I get the correct number of features -- but this does not work with the opensmile wrapper.
@ureichel @maxschmitt do you have any clue why this happens or if I've done something wrong?
MWE:
smile = opensmile.Smile(f"<path-to-opensmile-repository>/config/is09-13/IS12_speaker_trait.conf")
x = np.random.rand(16000)
print(smile(x).values.shape) # (5757)
///////////////////////////////////////////////////////////////////////////////////////
///////// > openSMILE configuration file for IS12 features < //////////////////
///////// //////////////////
///////// (c) 2013-2016 by audEERING //////////////////
///////// All rights reserved. See file COPYING for details. //////////////////
///////////////////////////////////////////////////////////////////////////////////////
// This version is an updated version which contains some fixes
// The feature are not compatible with the original IS12 features, but should
// be preferred in new designes / studies.
// Use IS12_speaker_trait_compat.conf for numerically compatible features
;;;;;;; component list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[dataMemory].type=cDataMemory
printLevelStats=0
;;;;;;;;;;;;;;;;;;;;;;;;;;;; main section ;;;;;;;;;;;;;;;;;;;;;;;;;;;
\{\cm[source{?}:include external source]}
;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[frame60].type=cFramer
instance[win60].type=cWindower
instance[fft60].type=cTransformFFT
instance[fftmp60].type=cFFTmagphase
[frame60:cFramer]
reader.dmLevel=wave
writer.dmLevel=frame60
frameSize = 0.060
frameStep = 0.010
frameCenterSpecial = left
[win60:cWindower]
reader.dmLevel=frame60
writer.dmLevel=winG60
winFunc=gauss
gain=1.0
sigma=0.4
[fft60:cTransformFFT]
reader.dmLevel=winG60
writer.dmLevel=fftcG60
; for compatibility with 2.2.0 and older versions
zeroPadSymmetric = 0
[fftmp60:cFFTmagphase]
reader.dmLevel=fftcG60
writer.dmLevel=fftmagG60
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[frame25].type=cFramer
instance[pe25].type=cVectorPreemphasis
instance[win25].type=cWindower
instance[fft25].type=cTransformFFT
instance[fftmp25].type=cFFTmagphase
[frame25:cFramer]
reader.dmLevel=wave
writer.dmLevel=frame25
frameSize = 0.020
frameStep = 0.010
frameCenterSpecial = left
[pe25:cVectorPreemphasis]
reader.dmLevel=frame25
writer.dmLevel=frame25pe
k=0.97
de=0
[win25:cWindower]
reader.dmLevel=frame25
writer.dmLevel=winH25
winFunc=hamming
[fft25:cTransformFFT]
reader.dmLevel=winH25
writer.dmLevel=fftcH25
; for compatibility with 2.2.0 and older versions
zeroPadSymmetric = 0
[fftmp25:cFFTmagphase]
reader.dmLevel=fftcH25
writer.dmLevel=fftmagH25
;;;;;;;;;;;;;;;;;;;; HPS pitch
[componentInstances:cComponentManager]
instance[scale].type=cSpecScale
instance[shs].type=cPitchShs
[scale:cSpecScale]
reader.dmLevel=fftmagG60
writer.dmLevel=hpsG60
copyInputName = 1
processArrayFields = 0
scale=octave
sourceScale = lin
// logScaleBase = 2
// logSourceScaleBase = 2
// firstNote = 55
interpMethod = spline
minF = 25
maxF = -1
nPointsTarget = 0
specSmooth = 1
specEnhance = 1
auditoryWeighting = 1
[shs:cPitchShs]
reader.dmLevel=hpsG60
writer.dmLevel=pitchShsG60
writer.levelconf.nT=100
copyInputName = 1
processArrayFields = 0
maxPitch = 620
minPitch = 52
nCandidates = 4
scores = 1
voicing = 1
F0C1 = 0
voicingC1 = 0
F0raw = 1
voicingClip = 1
voicingCutoff = 0.700000
inputFieldSearch = Mag_octScale
octaveCorrection = 0
nHarmonics = 15
compressionFactor = 0.850000
; TODO: Viterbi smoothing with volume clipping!
;;;;; Pitch with Viterbi smoother
[componentInstances:cComponentManager]
instance[energy60].type=cEnergy
[energy60:cEnergy]
reader.dmLevel=winG60
writer.dmLevel=e60
; This must be > than buffersize of viterbi smoother
writer.levelconf.nT=100
rms=1
log=0
[componentInstances:cComponentManager]
instance[pitchSmoothViterbi].type=cPitchSmootherViterbi
[pitchSmoothViterbi:cPitchSmootherViterbi]
reader.dmLevel=pitchShsG60
reader2.dmLevel=pitchShsG60
writer.dmLevel=pitchG60_viterbi
copyInputName = 1
bufferLength=30
F0final = 1
F0finalEnv = 0
voicingFinalClipped = 0
voicingFinalUnclipped = 1
F0raw = 0
voicingC1 = 0
voicingClip = 0
wTvv=14.0
;wTvv=10.0
wTvuv=25.0
wRange=2.0
[componentInstances:cComponentManager]
instance[volmerge].type = cValbasedSelector
[volmerge:cValbasedSelector]
reader.dmLevel = e60;pitchG60_viterbi
writer.dmLevel = pitchG60
idx=0
threshold=0.005
removeIdx=1
zeroVec=1
outputVal=0.0
;;;;;;;;;;;;;;;;;;;;; Energy / loudness
[componentInstances:cComponentManager]
instance[energy].type=cEnergy
instance[melspec1].type=cMelspec
instance[audspec].type=cPlp
instance[audspecRasta].type=cPlp
instance[audspecSum].type=cVectorOperation
instance[audspecRastaSum].type=cVectorOperation
[energy:cEnergy]
reader.dmLevel = frame25
writer.dmLevel = energy
log=0
rms=1
[melspec1:cMelspec]
reader.dmLevel=fftmagH25
writer.dmLevel=melspec1
; htk compatible sample value scaling
htkcompatible = 0
nBands = 26
; use power spectrum instead of magnitude spectrum
usePower = 1
lofreq = 20
hifreq = 8000
specScale = mel
; perform auditory weighting of spectrum
[audspec:cPlp]
reader.dmLevel=melspec1
writer.dmLevel=audspec
firstCC = 0
lpOrder = 5
cepLifter = 22
compression = 0.33
htkcompatible = 0
doIDFT = 0
doLpToCeps = 0
doLP = 0
doInvLog = 0
doAud = 1
doLog = 0
newRASTA=0
RASTA=0
; perform RASTA style filtering of auditory spectra
[audspecRasta:cPlp]
reader.dmLevel=melspec1
writer.dmLevel=audspecRasta
nameAppend = Rfilt
firstCC = 0
lpOrder = 5
cepLifter = 22
compression = 0.33
htkcompatible = 0
doIDFT = 0
doLpToCeps = 0
doLP = 0
doInvLog = 0
doAud = 1
doLog = 0
newRASTA=1
RASTA=0
[audspecSum:cVectorOperation]
reader.dmLevel = audspec
writer.dmLevel = audspecSum
// nameAppend =
copyInputName = 1
processArrayFields = 0
operation = ll1
nameBase = audspec
[audspecRastaSum:cVectorOperation]
reader.dmLevel = audspecRasta
writer.dmLevel = audspecRastaSum
// nameAppend =
copyInputName = 1
processArrayFields = 0
operation = ll1
nameBase = audspecRasta
;;;;;;;;;;;;;;; spectral
[componentInstances:cComponentManager]
instance[spectral].type=cSpectral
[spectral:cSpectral]
reader.dmLevel=fftmagH25
writer.dmLevel=spectral
bands[0]=250-650
bands[1]=1000-4000
rollOff[0] = 0.25
rollOff[1] = 0.50
rollOff[2] = 0.75
rollOff[3] = 0.90
flux=1
centroid=0
maxPos=0
minPos=0
entropy=1
variance=1
skewness=1
kurtosis=1
slope=1
harmonicity=1
sharpness=1
;;;;;;;;;;;;;;; mfcc
[componentInstances:cComponentManager]
instance[melspecMfcc].type=cMelspec
instance[mfcc].type=cMfcc
[melspecMfcc:cMelspec]
reader.dmLevel=fftmagH25
writer.dmLevel=melspecMfcc
copyInputName = 1
processArrayFields = 1
; htk compatible sample value scaling
htkcompatible = 1
nBands = 26s
; use power spectrum instead of magnitude spectrum
usePower = 1
lofreq = 20
hifreq = 8000
specScale = mel
inverse = 0
[mfcc:cMfcc]
reader.dmLevel=melspecMfcc
writer.dmLevel=mfcc1_12
copyInputName = 1
processArrayFields = 1
firstMfcc = 1
lastMfcc = 14
cepLifter = 22.0
htkcompatible = 1
;;;;;;;;;;;;;;;; zcr
[componentInstances:cComponentManager]
instance[mzcr].type=cMZcr
[mzcr:cMZcr]
reader.dmLevel = frame25
writer.dmLevel = zcr
copyInputName = 1
processArrayFields = 1
zcr = 1
mcr = 0
amax = 0
maxmin = 0
dc = 0
;;;;;;;;;;;;;;;;; rasta-plp ?
;;;;;;;;;;;;;;;;;;;; smoothing
[componentInstances:cComponentManager]
instance[smoNz].type=cContourSmoother
instance[smoA].type=cContourSmoother
instance[smoB].type=cContourSmoother
instance[f0sel].type=cDataSelector
[smoNz:cContourSmoother]
reader.dmLevel = pitchG60
writer.dmLevel = lld_nzsmo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
noZeroSma = 1
[f0sel:cDataSelector]
reader.dmLevel = lld_nzsmo
writer.dmLevel = lld_f0_nzsmo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = ff0
selected = F0final_sma
[smoA:cContourSmoother]
reader.dmLevel = audspecSum;audspecRastaSum;energy;zcr
writer.dmLevel = lldA_smo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
[smoB:cContourSmoother]
reader.dmLevel = audspecRasta;spectral;mfcc1_12
writer.dmLevel = lldB_smo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
;;;;;;;;; deltas
[componentInstances:cComponentManager]
instance[deNz].type=cDeltaRegression
instance[deA].type=cDeltaRegression
instance[deB].type=cDeltaRegression
instance[def0sel].type=cDeltaRegression
[deNz:cDeltaRegression]
reader.dmLevel = lld_nzsmo
writer.dmLevel = lld_nzsmo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
onlyInSegments = 1
[deA:cDeltaRegression]
reader.dmLevel = lldA_smo
writer.dmLevel = lldA_smo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
[deB:cDeltaRegression]
reader.dmLevel = lldB_smo
writer.dmLevel = lldB_smo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
[def0sel:cDeltaRegression]
reader.dmLevel = lld_f0_nzsmo
writer.dmLevel = lld_f0_nzsmo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
;;;;;;;;; functionals / statistics
[componentInstances:cComponentManager]
instance[functionalsA].type=cFunctionals
instance[functionalsB].type=cFunctionals
instance[functionalsF0].type=cFunctionals
instance[functionalsNz].type=cFunctionals
; shared functionals for LLD
instance[functionalsLLD].type=cFunctionals
; shared functionals for Delta LLD
instance[functionalsDelta].type=cFunctionals
; functionals for energy related lld
[functionalsA:cFunctionals]
reader.dmLevel = lldA_smo;lldA_smo_de
writer.dmLevel = functionalsA
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Segments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Extremes.range = 1
Extremes.norm = segment
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = relTh
Segments.thresholds = 0.25
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Moments.doRatioLimit = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for spectrum related lld
[functionalsB:cFunctionals]
reader.dmLevel = lldB_smo;lldB_smo_de
writer.dmLevel = functionalsB
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Segments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = relTh
Segments.thresholds = 0.25 ; 0.75
Segments.rangeRelThreshold = 0.200000
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Moments.doRatioLimit = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for pitch onsets/offsets
[functionalsF0:cFunctionals]
reader.dmLevel = lld_f0_nzsmo
writer.dmLevel = functionalsF0
//nameAppend = ff0
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means ; Segments ; Times
Means.amean = 0
Means.absmean = 0
Means.qmean = 0
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.nzgmean = 0
Means.nnz = 1
Means.norm = segment
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = nonX
Segments.X = 0.0
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Times.upleveltime25 = 0
Times.downleveltime25 = 0
Times.upleveltime50 = 0
Times.downleveltime50 = 0
Times.upleveltime75 = 0
Times.downleveltime75 = 0
Times.upleveltime90 = 0
Times.downleveltime90 = 0
Times.risetime = 0
Times.falltime = 0
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = second
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for pitch and vq related lld in voiced regions
[functionalsNz:cFunctionals]
reader.dmLevel = lld_nzsmo;lld_nzsmo_de
writer.dmLevel = functionalsNz
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Extremes.range = 1
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Moments.doRatioLimit = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 1
Times.rightctime = 1
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 1
masterTimeNorm = segment
[functionalsLLD:cFunctionals]
reader.dmLevel = lldA_smo;lldB_smo;lld_nzsmo
writer.dmLevel = functionalsLLD
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means ; Peaks2 ; Regression
Means.amean = 1
Means.posamean = 0
Means.absmean = 0
Means.qmean = 0
Means.rqmean = 1
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.posrqmean = 0
Means.nzgmean = 0
Means.nnz = 0
Means.flatness = 1
Means.norm = frames
Regression.linregc1 = 1
Regression.linregc2 = 1
Regression.linregerrA = 0
Regression.linregerrQ = 1
Regression.qregc1 = 1
Regression.qregc2 = 1
Regression.qregc3 = 1
Regression.qregerrA = 0
Regression.qregerrQ = 1
Regression.oldBuggyQerr = 0
Regression.centroid = 1
Regression.normRegCoeff = 0
Regression.centroidRatioLimit = 0
Regression.centroidUseAbsValues = 0
Regression.doRatioLimit = 0
Peaks2.doRatioLimit = 0
Peaks2.numPeaks = 0
Peaks2.meanPeakDist = 1
Peaks2.meanPeakDistDelta = 0
Peaks2.peakDistStddev = 1
Peaks2.peakRangeAbs = 1
Peaks2.peakRangeRel = 1
Peaks2.peakMeanAbs = 1
Peaks2.peakMeanMeanDist = 1
Peaks2.peakMeanRel = 1
Peaks2.ptpAmpMeanAbs = 0
Peaks2.ptpAmpMeanRel = 0
Peaks2.ptpAmpStddevAbs = 0
Peaks2.ptpAmpStddevRel = 0
Peaks2.minRangeAbs = 0
Peaks2.minRangeRel = 1
Peaks2.minMeanAbs = 0
Peaks2.minMeanMeanDist = 0
Peaks2.minMeanRel = 0
Peaks2.mtmAmpMeanAbs = 0
Peaks2.mtmAmpMeanRel = 0
Peaks2.mtmAmpStddevAbs = 0
Peaks2.mtmAmpStddevRel = 0
Peaks2.meanRisingSlope = 1
Peaks2.maxRisingSlope = 0
Peaks2.minRisingSlope = 0
Peaks2.stddevRisingSlope = 1
Peaks2.meanFallingSlope = 1
Peaks2.maxFallingSlope = 0
Peaks2.minFallingSlope = 0
Peaks2.stddevFallingSlope = 1
Peaks2.norm = seconds
Peaks2.relThresh = 0.100000
Peaks2.dynRelThresh = 0
;Peaks2.posDbgOutp = minmax.txt
Peaks2.posDbgAppend = 0
Peaks2.consoleDbg = 0
[functionalsDelta:cFunctionals]
reader.dmLevel = lldA_smo_de;lldB_smo_de;lld_nzsmo_de
writer.dmLevel = functionalsDelta
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means
Means.amean = 0
Means.posamean = 1
Means.absmean = 0
Means.qmean = 0
Means.rqmean = 1
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.posrqmean = 0
Means.nzgmean = 0
Means.nnz = 0
Means.flatness = 1
Means.norm = frames
;;;;;;;;; prepare features for standard output module
[componentInstances:cComponentManager]
instance[lldconcat].type=cVectorConcat
instance[llddeconcat].type=cVectorConcat
instance[funcconcat].type=cVectorConcat
[lldconcat:cVectorConcat]
reader.dmLevel = lld_nzsmo;lldA_smo;lldB_smo
writer.dmLevel = lld
includeSingleElementFields = 1
[llddeconcat:cVectorConcat]
reader.dmLevel = lld_nzsmo_de;lldA_smo_de;lldB_smo_de
writer.dmLevel = lld_de
includeSingleElementFields = 1
[funcconcat:cVectorConcat]
reader.dmLevel = functionalsA;functionalsB;functionalsNz;functionalsF0;functionalsLLD;functionalsDelta
writer.dmLevel = func
includeSingleElementFields = 1
\{\cm[sink{?}:include external sink]}
The reason why IS12_speaker_trait has less features is that it has been updated (more recommended than the original feature set), but the _compat version is the "original" one.
However, IS12_speaker_trait_compat is working for me also with the Python wrapper (see details below).
///////////////////////////////////////////////////////////////////////////////////////
///////// > openSMILE configuration file for IS12 challenge < //////////////////
///////// This file produces backwards compatible features //////////////////
///////// (to version 1.x) with openSMILE version 2.1 //////////////////
///////// //////////////////
///////// (c) 2013-2016 by audEERING //////////////////
///////// All rights reserved. See file COPYING for details. //////////////////
///////////////////////////////////////////////////////////////////////////////////////
;;;;;;; component list ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[dataMemory].type=cDataMemory
printLevelStats=0
;;;;;;;;;;;;;;;;;;;;;;;;;;;; main section ;;;;;;;;;;;;;;;;;;;;;;;;;;;
\{\cm[source{?}:include external source]}
;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[frame60].type=cFramer
instance[win60].type=cWindower
instance[fft60].type=cTransformFFT
instance[fftmp60].type=cFFTmagphase
[frame60:cFramer]
reader.dmLevel=wave
writer.dmLevel=frame60
frameSize = 0.060
frameStep = 0.010
frameCenterSpecial = left
[win60:cWindower]
reader.dmLevel=frame60
writer.dmLevel=winG60
winFunc=gauss
gain=1.0
sigma=0.4
[fft60:cTransformFFT]
reader.dmLevel=winG60
writer.dmLevel=fftcG60
; for compatibility with 2.2.0 and older versions
zeroPadSymmetric = 0
[fftmp60:cFFTmagphase]
reader.dmLevel=fftcG60
writer.dmLevel=fftmagG60
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
[componentInstances:cComponentManager]
instance[frame25].type=cFramer
instance[pe25].type=cVectorPreemphasis
instance[win25].type=cWindower
instance[fft25].type=cTransformFFT
instance[fftmp25].type=cFFTmagphase
[frame25:cFramer]
reader.dmLevel=wave
writer.dmLevel=frame25
frameSize = 0.020
frameStep = 0.010
frameCenterSpecial = left
[pe25:cVectorPreemphasis]
reader.dmLevel=frame25
writer.dmLevel=frame25pe
k=0.97
de=0
[win25:cWindower]
reader.dmLevel=frame25
writer.dmLevel=winH25
winFunc=hamming
[fft25:cTransformFFT]
reader.dmLevel=winH25
writer.dmLevel=fftcH25
; for compatibility with 2.2.0 and older versions
zeroPadSymmetric = 0
[fftmp25:cFFTmagphase]
reader.dmLevel=fftcH25
writer.dmLevel=fftmagH25
;;;;;;;;;;;;;;;;;;;; HPS pitch
[componentInstances:cComponentManager]
instance[scale].type=cSpecScale
instance[shs].type=cPitchShs
instance[pitchSmooth].type=cPitchSmoother
instance[pitchSmooth2].type=cPitchSmoother
[scale:cSpecScale]
reader.dmLevel=fftmagG60
writer.dmLevel=hpsG60
copyInputName = 1
processArrayFields = 0
scale=octave
sourceScale = lin
// logScaleBase = 2
// logSourceScaleBase = 2
// firstNote = 55
interpMethod = spline
minF = 25
maxF = -1
nPointsTarget = 0
specSmooth = 1
specEnhance = 1
auditoryWeighting = 1
[shs:cPitchShs]
reader.dmLevel=hpsG60
writer.dmLevel=pitchShsG60
copyInputName = 1
processArrayFields = 0
maxPitch = 620
minPitch = 52
nCandidates = 4
scores = 1
voicing = 1
F0C1 = 0
voicingC1 = 0
F0raw = 1
voicingClip = 1
voicingCutoff = 0.700000
inputFieldSearch = Mag_octScale
octaveCorrection = 0
nHarmonics = 15
compressionFactor = 0.850000
[pitchSmooth:cPitchSmoother]
reader.dmLevel=pitchShsG60
writer.dmLevel=pitchG60
copyInputName = 1
processArrayFields = 0
medianFilter0 = 0
postSmoothing = 0
postSmoothingMethod = simple
; note: octave correction is too agressive, thus we disable it..
octaveCorrection = 0
F0final = 1
F0finalEnv = 0
no0f0 = 0
voicingFinalClipped = 0
voicingFinalUnclipped = 1
F0raw = 0
voicingC1 = 0
voicingClip = 0
[pitchSmooth2:cPitchSmoother]
reader.dmLevel=pitchShsG60
writer.dmLevel=pitchF
F0raw = 0
F0final = 1
F0finalEnv = 0
voicingFinalUnclipped = 0
medianFilter0 = 0
postSmoothingMethod = simple
octaveCorrection = 0
;;;;;;;;;;;;;;;;;;; VQ
[componentInstances:cComponentManager]
;instance[acf].type=cAcf
;instance[hnr].type=cHnr
instance[pitchJitter].type=cPitchJitter
;[acf:cAcf]
;reader.dmLevel=fftmagG60
;writer.dmLevel=acfG60
;[hnr:cHnr]
;reader.dmLevel=pitchShsG60;acfG60
;writer.dmLevel=hnr
;hnrAvg = 1
;hnr1 = 1
;hnr2
;hnr3
;hnr4
;hnr5
[pitchJitter:cPitchJitter]
reader.dmLevel = wave
writer.dmLevel = jitterShimmer
// nameAppend =
copyInputName = 1
; is pitchF really necessary, or can we use pitchG60 ?
F0reader.dmLevel = pitchF
F0field = F0final
searchRangeRel = 0.250000
jitterLocal = 1
jitterDDP = 1
jitterLocalEnv = 0
jitterDDPEnv = 0
shimmerLocal = 1
shimmerLocalEnv = 0
onlyVoiced = 0
logHNR = 1
;periodLengths = 0
;periodStarts = 0
useBrokenJitterThresh = 1
;;;;;;;;;;;;;;;;;;;;; Energy / loudness
[componentInstances:cComponentManager]
instance[energy].type=cEnergy
instance[melspec1].type=cMelspec
instance[audspec].type=cPlp
instance[audspecRasta].type=cPlp
instance[audspecSum].type=cVectorOperation
instance[audspecRastaSum].type=cVectorOperation
[energy:cEnergy]
reader.dmLevel = frame25
writer.dmLevel = energy
log=0
rms=1
[melspec1:cMelspec]
reader.dmLevel=fftmagH25
writer.dmLevel=melspec1
; htk compatible sample value scaling
htkcompatible = 0
nBands = 26
; use power spectrum instead of magnitude spectrum
usePower = 1
lofreq = 20
hifreq = 8000
specScale = mel
; perform auditory weighting of spectrum
[audspec:cPlp]
reader.dmLevel=melspec1
writer.dmLevel=audspec
firstCC = 0
lpOrder = 5
cepLifter = 22
compression = 0.33
htkcompatible = 0
doIDFT = 0
doLpToCeps = 0
doLP = 0
doInvLog = 0
doAud = 1
doLog = 0
newRASTA=0
RASTA=0
; perform RASTA style filtering of auditory spectra
[audspecRasta:cPlp]
reader.dmLevel=melspec1
writer.dmLevel=audspecRasta
nameAppend = Rfilt
firstCC = 0
lpOrder = 5
cepLifter = 22
compression = 0.33
htkcompatible = 0
doIDFT = 0
doLpToCeps = 0
doLP = 0
doInvLog = 0
doAud = 1
doLog = 0
newRASTA=1
RASTA=0
[audspecSum:cVectorOperation]
reader.dmLevel = audspec
writer.dmLevel = audspecSum
// nameAppend =
copyInputName = 1
processArrayFields = 0
operation = ll1
nameBase = audspec
[audspecRastaSum:cVectorOperation]
reader.dmLevel = audspecRasta
writer.dmLevel = audspecRastaSum
// nameAppend =
copyInputName = 1
processArrayFields = 0
operation = ll1
nameBase = audspecRasta
;;;;;;;;;;;;;;; spectral
[componentInstances:cComponentManager]
instance[spectral].type=cSpectral
[spectral:cSpectral]
reader.dmLevel=fftmagH25
writer.dmLevel=spectral
bands[0]=250-650
bands[1]=1000-4000
rollOff[0] = 0.25
rollOff[1] = 0.50
rollOff[2] = 0.75
rollOff[3] = 0.90
flux=1
centroid=0
maxPos=0
minPos=0
entropy=1
variance=1
skewness=1
kurtosis=1
slope=1
harmonicity=1
sharpness=1
;;;;;;;;;;;;;;; mfcc
[componentInstances:cComponentManager]
instance[melspecMfcc].type=cMelspec
instance[mfcc].type=cMfcc
[melspecMfcc:cMelspec]
reader.dmLevel=fftmagH25
writer.dmLevel=melspecMfcc
copyInputName = 1
processArrayFields = 1
; htk compatible sample value scaling
htkcompatible = 1
nBands = 26s
; use power spectrum instead of magnitude spectrum
usePower = 1
lofreq = 20
hifreq = 8000
specScale = mel
inverse = 0
[mfcc:cMfcc]
reader.dmLevel=melspecMfcc
writer.dmLevel=mfcc1_12
copyInputName = 1
processArrayFields = 1
firstMfcc = 1
lastMfcc = 14
cepLifter = 22.0
htkcompatible = 1
;;;;;;;;;;;;;;;; zcr
[componentInstances:cComponentManager]
instance[mzcr].type=cMZcr
[mzcr:cMZcr]
reader.dmLevel = frame25
writer.dmLevel = zcr
copyInputName = 1
processArrayFields = 1
zcr = 1
mcr = 0
amax = 0
maxmin = 0
dc = 0
;;;;;;;;;;;;;;;;; rasta-plp ?
;;;;;;;;;;;;;;;;;;;; smoothing
[componentInstances:cComponentManager]
instance[smoNz].type=cContourSmoother
instance[smoA].type=cContourSmoother
instance[smoB].type=cContourSmoother
instance[f0sel].type=cDataSelector
[smoNz:cContourSmoother]
reader.dmLevel = pitchG60;jitterShimmer
writer.dmLevel = lld_nzsmo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
noZeroSma = 1
[f0sel:cDataSelector]
reader.dmLevel = lld_nzsmo
writer.dmLevel = lld_f0_nzsmo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = ff0
selected = F0final_sma
[smoA:cContourSmoother]
reader.dmLevel = audspecSum;audspecRastaSum;energy;zcr
writer.dmLevel = lldA_smo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
[smoB:cContourSmoother]
reader.dmLevel = audspecRasta;spectral;mfcc1_12
writer.dmLevel = lldB_smo
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
nameAppend = sma
copyInputName = 1
noPostEOIprocessing = 0
smaWin = 3
;;;;;;;;; deltas
[componentInstances:cComponentManager]
instance[deNz].type=cDeltaRegression
instance[deA].type=cDeltaRegression
instance[deB].type=cDeltaRegression
instance[def0sel].type=cDeltaRegression
[deNz:cDeltaRegression]
reader.dmLevel = lld_nzsmo
writer.dmLevel = lld_nzsmo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
[deA:cDeltaRegression]
reader.dmLevel = lldA_smo
writer.dmLevel = lldA_smo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
[deB:cDeltaRegression]
reader.dmLevel = lldB_smo
writer.dmLevel = lldB_smo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
[def0sel:cDeltaRegression]
reader.dmLevel = lld_f0_nzsmo
writer.dmLevel = lld_f0_nzsmo_de
writer.levelconf.isRb=0
writer.levelconf.growDyn=1
;;;;;;;;; functionals / statistics
[componentInstances:cComponentManager]
instance[functionalsA].type=cFunctionals
instance[functionalsB].type=cFunctionals
instance[functionalsF0].type=cFunctionals
instance[functionalsNz].type=cFunctionals
; shared functionals for LLD
instance[functionalsLLD].type=cFunctionals
; shared functionals for Delta LLD
instance[functionalsDelta].type=cFunctionals
; functionals for energy related lld
[functionalsA:cFunctionals]
reader.dmLevel = lldA_smo;lldA_smo_de
writer.dmLevel = functionalsA
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Segments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = relTh
Segments.thresholds = 0.25
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Moments.doRatioLimit = 0
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for spectrum related lld
[functionalsB:cFunctionals]
reader.dmLevel = lldB_smo;lldB_smo_de
writer.dmLevel = functionalsB
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Segments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = relTh
Segments.thresholds = 0.25 ; 0.75
Segments.rangeRelThreshold = 0.200000
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Moments.doRatioLimit = 0
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for pitch onsets/offsets
[functionalsF0:cFunctionals]
reader.dmLevel = lld_f0_nzsmo
writer.dmLevel = functionalsF0
//nameAppend = ff0
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means ; Segments ; Times
Means.amean = 0
Means.absmean = 0
Means.qmean = 0
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.nzgmean = 0
Means.nnz = 1
Means.norm = segment
Segments.maxNumSeg = 100
Segments.segmentationAlgorithm = nonX
Segments.X = 0.0
Segments.numSegments = 0
Segments.meanSegLen = 1
Segments.maxSegLen = 1
Segments.minSegLen = 1
Segments.segLenStddev = 1
Segments.norm = second
Times.upleveltime25 = 0
Times.downleveltime25 = 0
Times.upleveltime50 = 0
Times.downleveltime50 = 0
Times.upleveltime75 = 0
Times.downleveltime75 = 0
Times.upleveltime90 = 0
Times.downleveltime90 = 0
Times.risetime = 0
Times.falltime = 0
Times.leftctime = 0
Times.rightctime = 0
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = second
nonZeroFuncts = 0
masterTimeNorm = segment
; functionals for pitch and vq related lld in voiced regions
[functionalsNz:cFunctionals]
reader.dmLevel = lld_nzsmo;lld_nzsmo_de
writer.dmLevel = functionalsNz
// nameAppend =
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Extremes ; Percentiles ; Moments ; Times ; Lpc
Extremes.max = 0
Extremes.min = 0
Extremes.maxpos = 1
Extremes.minpos = 1
Extremes.maxameandist = 0
Extremes.minameandist = 0
Moments.doRatioLimit = 0
Moments.variance = 0
Moments.stddev = 1
Moments.skewness = 1
Moments.kurtosis = 1
Moments.amean = 0
Percentiles.quartiles = 1
Percentiles.iqr = 1
Percentiles.percentile[0] = 0.01
Percentiles.percentile[1] = 0.99
Percentiles.pctlrange[0] = 0-1
Percentiles.interp = 1
Times.upleveltime25 = 1
Times.downleveltime25 = 1
Times.upleveltime50 = 1
Times.downleveltime50 = 1
Times.upleveltime75 = 1
Times.downleveltime75 = 1
Times.upleveltime90 = 1
Times.downleveltime90 = 1
Times.risetime = 1
Times.falltime = 1
Times.leftctime = 1
Times.rightctime = 1
Times.duration = 0
Times.buggySecNorm = 0
Times.norm = segment
Lpc.lpGain = 1
Lpc.lpc = 1
Lpc.firstCoeff = 0
Lpc.order = 5
nonZeroFuncts = 1
masterTimeNorm = segment
[functionalsLLD:cFunctionals]
reader.dmLevel = lldA_smo;lldB_smo;lld_nzsmo
writer.dmLevel = functionalsLLD
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means ; Peaks2 ; Regression
Means.amean = 1
Means.posamean = 0
Means.absmean = 0
Means.qmean = 0
Means.rqmean = 1
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.posrqmean = 0
Means.nzgmean = 0
Means.nnz = 0
Means.flatness = 1
Means.norm = frames
Regression.centroidUseAbsValues = 0
Regression.centroidRatioLimit = 0
Regression.doRatioLimit = 0
Regression.linregc1 = 1
Regression.linregc2 = 1
Regression.linregerrA = 0
Regression.linregerrQ = 1
Regression.qregc1 = 1
Regression.qregc2 = 1
Regression.qregc3 = 1
Regression.qregerrA = 0
Regression.qregerrQ = 1
Regression.oldBuggyQerr = 0
Regression.centroid = 1
Regression.normRegCoeff = 0
Peaks2.doRatioLimit = 0
Peaks2.numPeaks = 0
Peaks2.meanPeakDist = 1
Peaks2.meanPeakDistDelta = 0
Peaks2.peakDistStddev = 1
Peaks2.peakRangeAbs = 1
Peaks2.peakRangeRel = 1
Peaks2.peakMeanAbs = 1
Peaks2.peakMeanMeanDist = 1
Peaks2.peakMeanRel = 1
Peaks2.ptpAmpMeanAbs = 0
Peaks2.ptpAmpMeanRel = 0
Peaks2.ptpAmpStddevAbs = 0
Peaks2.ptpAmpStddevRel = 0
Peaks2.minRangeAbs = 0
Peaks2.minRangeRel = 1
Peaks2.minMeanAbs = 0
Peaks2.minMeanMeanDist = 0
Peaks2.minMeanRel = 0
Peaks2.mtmAmpMeanAbs = 0
Peaks2.mtmAmpMeanRel = 0
Peaks2.mtmAmpStddevAbs = 0
Peaks2.mtmAmpStddevRel = 0
Peaks2.meanRisingSlope = 1
Peaks2.maxRisingSlope = 0
Peaks2.minRisingSlope = 0
Peaks2.stddevRisingSlope = 1
Peaks2.meanFallingSlope = 1
Peaks2.maxFallingSlope = 0
Peaks2.minFallingSlope = 0
Peaks2.stddevFallingSlope = 1
Peaks2.norm = seconds
Peaks2.relThresh = 0.100000
Peaks2.dynRelThresh = 0
;Peaks2.posDbgOutp = minmax.txt
Peaks2.posDbgAppend = 0
Peaks2.consoleDbg = 0
[functionalsDelta:cFunctionals]
reader.dmLevel = lldA_smo_de;lldB_smo_de;lld_nzsmo_de
writer.dmLevel = functionalsDelta
copyInputName = 1
frameMode = full
frameSize = 0
frameStep = 0
frameCenterSpecial = left
functionalsEnabled = Means
Means.amean = 0
Means.posamean = 1
Means.absmean = 0
Means.qmean = 0
Means.rqmean = 1
Means.nzamean = 0
Means.nzabsmean = 0
Means.nzqmean = 0
Means.posrqmean = 0
Means.nzgmean = 0
Means.nnz = 0
Means.flatness = 1
Means.norm = frames
;;;;;;;;; prepare features for standard output module
[componentInstances:cComponentManager]
instance[lldconcat].type=cVectorConcat
instance[llddeconcat].type=cVectorConcat
instance[funcconcat].type=cVectorConcat
[lldconcat:cVectorConcat]
reader.dmLevel = lld_nzsmo;lldA_smo;lldB_smo
writer.dmLevel = lld
includeSingleElementFields = 1
[llddeconcat:cVectorConcat]
reader.dmLevel = lld_nzsmo_de;lldA_smo_de;lldB_smo_de
writer.dmLevel = lld_de
includeSingleElementFields = 1
[funcconcat:cVectorConcat]
reader.dmLevel = functionalsA;functionalsB;functionalsNz;functionalsF0;functionalsLLD;functionalsDelta
writer.dmLevel = func
includeSingleElementFields = 1
\{\cm[sink{?}:include external sink]}
@ATriantafyllopoulos resolved?