Dazu lade ich den code zuerst in ein dofile und compile ihn durch run
meine zu vergleichenden Variablen heißen {neu,alt} wie lade ich diese in das programm?
- Code: Alles auswählen
mata:
/******************************************************************************
Terminology Note
key: string to measure each observation against
trymatch: one of many potential matches to be measured against the key
TRIES: Nx1 vector of all "trymatch"s
******************************************************************************/
void matalev1var(string scalar varname, string scalar key,
string scalar newvar , string scalar touse) {
TRIES = st_sdata(. , varname , touse) // Nx1 string vector with potential matches
dist = J(rows(TRIES),1,.) // Nx1 real vector to hold lev distances to each match
for (t = 1 ; t <= rows(TRIES) ; t++) {
dist[t] = matalev(key,TRIES[t,1]) // save distance
}
st_store(. , st_addvar("int", newvar) , touse , dist)
}
void matalev2var(string scalar var1 , string scalar var2,
string scalar newvar , string scalar touse) {
KEYS = st_sdata(. , var1 , touse)
TRIES = st_sdata(. , var2 , touse) // Nx1 string vector with potential matches
dist = J(rows(TRIES),1,.) // Nx1 real vector to hold lev distances to each match
for (t = 1 ; t <= rows(TRIES) ; t++) {
dist[t] = matalev(KEYS[t,1],TRIES[t,1])
}
st_store(. , st_addvar("int", newvar) , touse , dist)
}
real scalar matalev(string scalar key, string scalar trymatch) {
keylength = strlen(key)
trylength = strlen(trymatch)
// declare distance matrix
D = J(keylength , trylength , .)
// Add starting penalties in first column
D = ((1::keylength) , D)
// Add starting penalties in first row
D = ((0..trylength) \ D)
// add penalty for each operation required to reconcile the two strings
for (i = 1 ; i <= keylength ; i++ ) {
for (j = 1 ; j <= trylength ; j++ ) {
if (substr(key, i, 1) == substr(trymatch, j, 1)) {
D[i+1,j+1] = D[i,j]
}
else { // (deletion , insertion , substition)
D[i+1,j+1] = min((D[i,j+1]+1 , D[i+1,j]+1 , D[i,j]+1 ))
}
}
}
return(D[i,j])
}
end