Conditional entropy optimisation

Haskell commentary on the implementation of Tractable and Practicable Inducers/Haskell Implementation/Conditional entropy optimisation

Sections

Conditional entropy tuple set builder

Conditional entropy fud decomper

Conditional entropy tuple set builder

Define the limited-dimension conditional entropy tuple set list minimiser \[ Z_{P,A,\mathrm{L}} = \mathrm{maximiseLister}(X_{P,A,\mathrm{L}},P_{P,A,\mathrm{L}},\mathrm{top}(\mathrm{omax}),R_{P,A,\mathrm{L}}) \] where (i) the optimiser function is \[ \begin{eqnarray} X_{P,A,\mathrm{L}} &=& \{(K,-(\mathrm{entropy}(A~\%~(K \cup L))-\mathrm{entropy}(A~\%~K))) : K \subseteq V \setminus L\} \end{eqnarray} \] and (ii) the neighbourhood function is \[ \begin{eqnarray} &&P_{P,A,\mathrm{L}}(M) = \{(J,X_{P,A,\mathrm{L}}(J)) : \\ &&\hspace{2em}(K,\cdot) \in M,~w \in V \setminus L \setminus K,~J = K \cup \{w\},~|J| \leq \mathrm{kmax}\} \end{eqnarray} \] and (iii) the initial subset is \[ \begin{eqnarray} R_{P,A,\mathrm{L}} &=& \{(\{w\},X_{P,A,\mathrm{L}}(\{w\})) : w \in V \setminus L\} \end{eqnarray} \] Then the conditional entropy optimised limited-dimension conditional entropy tuple set list, $M$, is (Text) \[ \begin{eqnarray} M &=& \mathrm{topd}(\mathrm{qmax})(\mathrm{elements}(Z_{P,A,\mathrm{L}})) \subset \mathrm{P}(V \setminus L) \end{eqnarray} \] The conditional entropy tuple set builder parametersBuilderConditionalVars is defined in module AlignmentPracticable.

parametersBuilderConditionalVars :: 
  Integer -> Integer -> Integer -> Set.Set Variable -> Histogram -> 
  Maybe (Map.Map (Set.Set Variable) Double)

parametersBuilderConditionalVars kmax omax qmax ll aa
  | kmax < 0 || omax < 0 || qmax < 0 = Nothing
  | otherwise = Just $ bot qmax $ buildc rr rr
  where
    vvk = vars aa `minus` ll
    rr = bot omax $ llmm [(sgl w, ent (aa `red` (ll `add` w)) - ent (aa `red` (sgl w))) | w <- qqll vvk]
    buildc qq nn = if mm /= Map.empty then buildc mm (nn `Map.union` mm) else nn
      where
        pp = llqq [jj | (kk,e) <- mmll qq, e > 0, w <- qqll (vvk `minus` kk), let jj = kk `add` w]
        mm = bot omax $ llmm [(jj, ent (aa `red` (ll `union` jj)) - ent (aa `red` jj)) |
          jj <- qqll pp, card jj <= kmax]
  ...

For example,

let buildcond vvl aa kmax omax qmax  = sort $ map (\(a,b) -> (b,a)) $ Map.toList $ fromJust $ parametersBuilderConditionalVars kmax omax qmax vvl aa

let (kmax,omax,qmax) = (1, 5, 5)

rpln $ buildcond vvl aa kmax omax qmax
"(6.445777995546464e-2,{odor})"
"(0.3593018375305004,{spore-print-color})"
"(0.4034743011923405,{gill-color})"
"(0.47206538234114515,{ring-type})"
"(0.49514434957356657,{stalk-surface-above-ring})"

let (kmax,omax) = (2, 10, 10)

rpln $ buildcond vvl aa kmax omax qmax
"(2.082990753054048e-2,{odor,spore-print-color})"
"(3.6279340021870166e-2,{cap-color,odor})"
"(3.849645066616292e-2,{gill-color,odor})"
"(4.566068343589347e-2,{odor,stalk-shape})"
"(4.619210440231036e-2,{odor,stalk-color-below-ring})"
"(4.6657422349796196e-2,{habitat,odor})"
"(4.909372014653002e-2,{odor,stalk-surface-below-ring})"
"(5.1254420042391224e-2,{odor,population})"
"(5.16165100197532e-2,{cap-shape,odor})"
"(5.3577275552557424e-2,{odor,stalk-color-above-ring})"

let (kmax,omax) = (3, 10, 10)

rpln $ buildcond vvl aa kmax omax qmax
"(6.893838773649907e-3,{habitat,odor,spore-print-color})"
"(8.190808632908553e-3,{gill-size,odor,spore-print-color})"
"(8.190808632908997e-3,{odor,ring-number,spore-print-color})"
"(8.951100722878191e-3,{odor,spore-print-color,stalk-surface-below-ring})"
"(9.454387885879711e-3,{habitat,odor,stalk-root})"
"(9.511843838571288e-3,{cap-color,odor,spore-print-color})"
"(9.58215032701304e-3,{cap-color,odor,stalk-root})"
"(9.812725049147542e-3,{odor,spore-print-color,stalk-color-below-ring})"
"(1.1450498672664011e-2,{bruises,habitat,odor})"
"(1.2142896254066393e-2,{cap-color,odor,population})"

Conditional entropy fud decomper

The limited-dimension conditional entropy fud decompositions tree searcher chooses an immediate super-decomposition from the conditional entropy optimised tuple set lists of the children slices until the leaf slices have zero conditional entropy. The fuds are singletons of the self partition transforms of the conditional entropy tuples. Define the limited-dimension conditional entropy fud decompositions tree searcher \[ Z_{P,A,\mathrm{L,D,F}} = \mathrm{searchTreer}(\mathcal{D}_{\mathrm{F},\infty,U,V},P_{P,A,\mathrm{L,D,F}},R_{P,A,\mathrm{L,D,F}}) \] where the neighbourhood function returns a singleton \[ \begin{eqnarray} &&P_{P,A,\mathrm{L,D,F}}(D) = \{E : \\ &&\hspace{2em}(\cdot,S,G,L) \in \mathrm{maxd}(\mathrm{order}(D_{\mathbf{Q} \times \mathrm{S} \times \mathcal{X}^2},\{(e,S,G,L) : \\ &&\hspace{4em}(L,Y) \in \mathrm{places}(D),\\ &&\hspace{4em}R_L = \bigcup \mathrm{dom}(\mathrm{set}(L)),~H_L = \bigcup \mathrm{ran}(\mathrm{set}(L)),\\ &&\hspace{4em}(\cdot,F) = L_{|L|},~W=\mathrm{der}(F),\\ &&\hspace{4em}S \in W^{\mathrm{CS}} \setminus \mathrm{dom}(\mathrm{dom}(Y)),\\ &&\hspace{4em}B = \mathrm{apply}(V_A,V_A,\mathrm{his}(H_L) \cup \{\{R_L \cup S\}^{\mathrm{U}}\},A),\\ &&\hspace{4em}e = \mathrm{size}(B) * \mathrm{entropy}(B~\%~L),~e>0,\\ &&\hspace{4em}K \in \mathrm{maxd}(\mathrm{elements}(Z_{P,A,\mathrm{L}})),\\ &&\hspace{4em}G = \{K^{\mathrm{CS\{\}T}}\}\})),\\ &&\hspace{2em}M=L \cup \{(|L|+1,(S,G))\},\\ &&\hspace{2em}E = \mathrm{tree}(\mathrm{paths}(D) \setminus \{L\} \cup \{M\})\} \end{eqnarray} \] where \[ \begin{eqnarray} &&R_{P,A,\mathrm{L,D,F}} = \{\{((\emptyset,G),\emptyset)\} : \\ &&\hspace{2em}G \in \mathrm{maxd}(\mathrm{order}(D_{\mathrm{F}},\{G : \\ &&\hspace{4em}K \in \mathrm{maxd}(\mathrm{elements}(Z_{P,A,\mathrm{L}})),\\ &&\hspace{4em}G = \{K^{\mathrm{CS\{\}T}}\}\}))\} \end{eqnarray} \] The fud decomposition is $D$ where $\{D\} = \mathrm{leaves}(\mathrm{tree}(Z_{P,A,\mathrm{L,D,F}}))$, (Text).

The decomper is defined in module AlignmentPracticable,

parametersSystemsDecomperConditional :: 
  Integer -> Integer -> System -> Set.Set Variable -> Histogram -> 
  Maybe (System, DecompFud)

as

parametersSystemsDecomperConditional kmax omax uu ll aa
  ...
  | otherwise = Just $ decomp uu emptyTree 1
  where
    decomp uu zz f s
      | zz == emptyTree && nnr == [] = (uu, decompFudEmpty)
      | zz == emptyTree = decomp uur zzr (f+1)
      | mm == [] = (uu, zzdf (zztrim zz)) 
      | otherwise = decomp uuc zzc (f+1)
      where
        nnr = lenter kmax omax 1 ll aa
        [(kkr,_)] = nnr
        ffr = if nnr /= [] then vvff uu kkr f else fudEmpty
        uur = uu `uunion` fsys ffr
        zzr = tsgl (stateEmpty,ffr)
        mm = [(e,nn,ss,bb) | (nn,yy) <- qqll (treesPlaces zz), 
                 let rrc = llsthis nn, let hhc = llfhis nn, let (_,ff) = last nn, ff /= fudEmpty,
                 ss <- qqll (cart uu (fder ff) `minus` dom (treesRoots yy)),
                 let xx = hhc `union` rrc `add` unit (sgl ss),
                 let bb = apply vv vv xx aa,
                 let e = fromRational (size bb) * ent (bb `red` ll)
                 e > 0]
        (_,nn,ss,bb) = last $ sort mm
        nnc = lenter kmax omax 1 ll bb
        [(kkc,_)] = nnc
        ffc = if nnr /= [] then vvff uu kkc f else fudEmpty
        uuc = uu `uunion` fsys ffc
        zzc = pathsTree $ treesPaths zz `add` (nn ++ [(ss,ffc)])
    lenter kmax omax qmax ll aa = mmll $ fromJust $ parametersBuilderConditionalVars kmax omax qmax ll aa
    ...

For example,

let decompercond vvl aa kmax omax  = fromJust $ parametersSystemsDecomperConditional kmax omax (sys aa) vvl aa

let (kmax,omax) = (1, 2)

let (uu1,df) = decompercond vvl aa kmax omax

rp $ dfund df
"{cap-color,gill-size,habitat,odor,spore-print-color}"

rp $ fder $ dfff df
"{<<1,1>,1>,<<2,1>,1>,<<3,1>,1>,<<4,1>,1>,<<5,1>,1>}"

let dfll = qqll . treesPaths . dfzz 

rpln $ map (map (fund.snd)) $ dfll df
"[{odor},{spore-print-color},{habitat},{cap-color}]"
"[{odor},{spore-print-color},{habitat},{gill-size}]"

rpln $ map (map (fvars.snd)) $ dfll df
"[{odor,<<1,1>,1>},{spore-print-color,<<2,1>,1>},{habitat,<<3,1>,1>},{cap-color,<<4,1>,1>}]"
"[{odor,<<1,1>,1>},{spore-print-color,<<2,1>,1>},{habitat,<<3,1>,1>},{gill-size,<<5,1>,1>}]"

let ff = fromJust $ systemsDecompFudsNullablePracticable uu1 df 1

let uu2 = uu `uunion` (fsys ff)

let hh = aahr uu aa

let hhb = hrfmul uu2 ff hh

let hrlent uu hh ww vvl = ent (hhaa $ hrhh uu $ hh `hrhrred` (ww `union` vvl)) - ent (hhaa $ hrhh uu $ hh `hrhrred` ww)

hrlent uu2 hhb (fder ff) vvl
-4.440892098500626e-16

rpln $ qqll $ ssplit (fvars ff) $ states $ hhaa $ hrhh uu2 $ hhb `hrhrred` (fder ff `union` vvl)
"({(<<1,n>,1>,1),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,2),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,3),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,4),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,5),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,6),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,1),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,2),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,3),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,4),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,5),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,6),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,1),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,1),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,3),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,9),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,10),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,4),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,6),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,7),(<<4,n>,1>,null),(<<5,n>,1>,1)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,7),(<<4,n>,1>,null),(<<5,n>,1>,2)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,9),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,8),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,9),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"

or, re-arranged to show the decomposition,

"({(<<1,n>,1>,1),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,2),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,3),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,4),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,5),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,6),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,8),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,9),(<<2,n>,1>,null),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"

"({(<<1,n>,1>,7),(<<2,n>,1>,1),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,2),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,3),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,4),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,5),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,6),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,9),(<<3,n>,1>,null),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"

"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,1),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,4),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,6),(<<4,n>,1>,null),(<<5,n>,1>,null)},{(edible,edible)})"

"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,1),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,3),(<<5,n>,1>,null)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,9),(<<5,n>,1>,null)},{(edible,poisonous)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,2),(<<4,n>,1>,10),(<<5,n>,1>,null)},{(edible,poisonous)})"

"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,7),(<<4,n>,1>,null),(<<5,n>,1>,1)},{(edible,edible)})"
"({(<<1,n>,1>,7),(<<2,n>,1>,8),(<<3,n>,1>,7),(<<4,n>,1>,null),(<<5,n>,1>,2)},{(edible,poisonous)})"

top