FIPS IUT and MIP queues#

from itertools import takewhile
from operator import itemgetter

from sec_certs.dataset.fips_mip import MIPDataset
from sec_certs.dataset.fips_iut import IUTDataset
from sec_certs.sample.fips_mip import MIPStatus
from sec_certs.model.fips_matching import FIPSProcessMatcher
from sec_certs.dataset.fips import FIPSDataset
from sec_certs.configuration import config
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import matplotlib.ticker as mtick
import warnings

plt.style.use("seaborn-whitegrid")
sns.set_palette("deep")
sns.set_context("notebook") # Set to "paper" for use in paper :)

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
fips = FIPSDataset.from_web_latest()

IUT dataset#

iut_dset = IUTDataset.from_web_latest()
iut_global_df = pd.DataFrame(iut_dset.snapshots, columns=["timestamp", "displayed", "not_displayed", "total"])
iut_global_df.info()
iut_melt = iut_global_df.melt("timestamp", var_name="type", value_name="count")
line = sns.lineplot(data=iut_melt, x="timestamp", y="count", hue="type")
line.set(xlabel="Date", ylabel="Number of entries", title="Entries in IUT list over time")
line.legend(title="Type", bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
s0 = iut_dset.snapshots[0]
print(s0.__dict__.keys())
print(next(iter(s0.entries)))
def iut_key(entry):
    return entry.module_name, entry.vendor_name, entry.standard

iut_first_seen = {}
iut_last_seen = {}
for snapshot in tqdm(sorted(iut_dset.snapshots, key=lambda x: x.timestamp)):
    snapshot_date = snapshot.timestamp.date()
    for entry in snapshot.entries:
        entry_key = entry # iut_key(entry) # or entry here
        if entry_key not in iut_first_seen:
            iut_first_seen[entry_key] = snapshot_date
        if entry_key not in iut_last_seen or iut_last_seen[entry_key] < snapshot_date:
            iut_last_seen[entry_key] = snapshot_date

#iut_local_df = pd.DataFrame([(entry[0], entry[1], entry[2], iut_first_seen[entry], iut_last_seen[entry], iut_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=("name", "vendor", "standard", "first_seen", "last_seen", "present"))
iut_local_df = pd.DataFrame([(entry.module_name, entry.vendor_name, entry.standard, entry.iut_date, iut_first_seen[entry], iut_last_seen[entry], iut_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=("name", "vendor", "standard", "iut_date", "first_seen", "last_seen", "present"))
iut_local_df = iut_local_df.astype({"standard": "category", "iut_date": "datetime64[ns]", "first_seen": "datetime64[ns]", "last_seen": "datetime64[ns]", "present": "bool"}).fillna(value=np.nan)
iut_local_df["seen_for_iut"] = (iut_local_df.last_seen - iut_local_df.iut_date).dt.days
iut_local_df["seen_for"] = (iut_local_df.last_seen - iut_local_df.first_seen).dt.days

print("All:")
print("Average seen for", np.mean(iut_local_df.seen_for))
print("Average seen for (FIPS 140-2)", np.mean(iut_local_df.loc[iut_local_df.standard == "FIPS 140-2"].seen_for))
print("Average seen for (FIPS 140-3)", np.mean(iut_local_df.loc[iut_local_df.standard == "FIPS 140-3"].seen_for))
print("Average seen for[iut]", np.mean(iut_local_df.seen_for_iut))
print("Average seen for[iut] (FIPS 140-2)", np.mean(iut_local_df.loc[iut_local_df.standard == "FIPS 140-2"].seen_for_iut))
print("Average seen for[iut] (FIPS 140-3)", np.mean(iut_local_df.loc[iut_local_df.standard == "FIPS 140-3"].seen_for_iut))

print("Only not present:")
print("Average seen for", np.mean(iut_local_df.loc[~iut_local_df.present].seen_for))
print("Average seen for (FIPS 140-2)", np.mean(iut_local_df.loc[(iut_local_df.standard == "FIPS 140-2") & ~iut_local_df.present].seen_for))
print("Average seen for (FIPS 140-3)", np.mean(iut_local_df.loc[(iut_local_df.standard == "FIPS 140-3") & ~iut_local_df.present].seen_for))
print("Average seen for[iut]", np.mean(iut_local_df.loc[~iut_local_df.present].seen_for_iut))
print("Average seen for[iut] (FIPS 140-2)", np.mean(iut_local_df.loc[(iut_local_df.standard == "FIPS 140-2") & ~iut_local_df.present].seen_for_iut))
print("Average seen for[iut] (FIPS 140-3)", np.mean(iut_local_df.loc[(iut_local_df.standard == "FIPS 140-3") & ~iut_local_df.present].seen_for_iut))
scatter = sns.relplot(kind="scatter", data=iut_local_df, x="iut_date", y="seen_for_iut", hue="standard", style="present", aspect=1)
scatter = sns.relplot(kind="scatter", data=iut_local_df, x="iut_date", y="last_seen", hue="standard", style="present", aspect=1)
hist = sns.histplot(data=iut_local_df[~iut_local_df.present], x="seen_for_iut", hue="standard")
hist = sns.histplot(data=iut_local_df[~iut_local_df.present], x="seen_for", hue="standard")
iut_local_df.vendor.value_counts()

IUT - Certificate mapping#

first_snapshot = iut_dset.snapshots[-1]
matches = FIPSProcessMatcher.match_snapshot(first_snapshot, fips)

MIP dataset#

mip_dset = MIPDataset.from_web_latest()
mip_global_df = pd.DataFrame(mip_dset.snapshots, columns=["timestamp", "displayed", "not_displayed", "total"])
mip_global_df.info()
mip_melt = mip_global_df.melt("timestamp", var_name="type", value_name="count")
line = sns.lineplot(data=mip_melt, x="timestamp", y="count", hue="type")
line.set(xlabel="Date", ylabel="Number of entries", title="Entries in MIP list over time")
line.legend(title="Type", bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);
m0 = mip_dset.snapshots[0]
print(m0.__dict__.keys())
print(next(iter(m0.entries)))
def mip_key(entry):
    return entry.module_name, entry.vendor_name, entry.standard, entry.status

mip_first_seen = {}
mip_last_seen = {}
for snapshot in sorted(mip_dset.snapshots, key=lambda x: x.timestamp):
    snapshot_date = snapshot.timestamp.date()
    for entry in snapshot.entries:
        entry_key = entry # mip_key(entry) # or entry here
        if entry_key not in mip_first_seen:
            mip_first_seen[entry_key] = snapshot_date
        if entry_key not in mip_last_seen or mip_last_seen[entry_key] < snapshot_date:
            mip_last_seen[entry_key] = snapshot_date

#mip_local_df = pd.DataFrame([(entry[0], entry[1], entry[2], entry[3], mip_first_seen[entry], mip_last_seen[entry], mip_last_seen[entry] == snapshot_date) for entry in iut_first_seen.keys()], columns=("name", "vendor", "standard", "status", first_seen", "last_seen", "present"))
mip_local_df = pd.DataFrame([(entry.module_name, entry.vendor_name, entry.standard, entry.status, entry.status_since, mip_first_seen[entry], mip_last_seen[entry], mip_last_seen[entry] == snapshot_date) for entry in mip_first_seen.keys()], columns=("name", "vendor", "standard", "status", "status_since", "first_seen", "last_seen", "present"))
mip_local_df = mip_local_df.astype({"standard": "category", "status": "category", "status_since": "datetime64[ns]", "first_seen": "datetime64[ns]", "last_seen": "datetime64[ns]", "present": "bool"}).fillna(value=np.nan)
mip_local_df["seen_for_status"] = (mip_local_df.last_seen - mip_local_df.status_since).dt.days
mip_local_df["seen_for"] = (mip_local_df.last_seen - mip_local_df.first_seen).dt.days
mip_local_df.head()
scatter = sns.relplot(kind="scatter", data=mip_local_df.loc[mip_local_df.status == MIPStatus.REVIEW_PENDING], x="first_seen", y="seen_for", hue="standard")
scatter = sns.relplot(kind="scatter", data=mip_local_df.loc[mip_local_df.status == MIPStatus.IN_REVIEW], x="first_seen", y="seen_for", hue="standard")
scatter = sns.relplot(kind="scatter", data=mip_local_df.loc[mip_local_df.status == MIPStatus.COORDINATION], x="first_seen", y="seen_for", hue="standard")
scatter = sns.relplot(kind="scatter", data=mip_local_df.loc[mip_local_df.status == MIPStatus.FINALIZATION], x="first_seen", y="seen_for", hue="standard")
for status in MIPStatus:
    print(status)
    print("All:")
    print("Average seen for", np.mean(mip_local_df.loc[mip_local_df.status == status].seen_for))
    print("Average seen for (FIPS 140-2)", np.mean(mip_local_df.loc[(mip_local_df.status == status) & (mip_local_df.standard == "FIPS 140-2")].seen_for))
    print("Average seen for (FIPS 140-3)", np.mean(mip_local_df.loc[(mip_local_df.status == status) & (mip_local_df.standard == "FIPS 140-3")].seen_for))
    
    print("Only not present:")
    print("Average seen for", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status)].seen_for))
    print("Average seen for (FIPS 140-2)", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status) & (mip_local_df.standard == "FIPS 140-2")].seen_for))
    print("Average seen for (FIPS 140-3)", np.mean(mip_local_df.loc[~(mip_local_df.present) & (mip_local_df.status == status) & (mip_local_df.standard == "FIPS 140-3")].seen_for))
    print()
with sns.plotting_context("notebook", font_scale=0.75):
    g = sns.FacetGrid(mip_local_df.loc[~mip_local_df.present], col="status", hue="standard", col_wrap=2, height=2, ylim=(0,300))
    g.map(sns.histplot, "seen_for")
    g.set_titles("{col_name}")
    plt.show()

MIP - Certificate matching#

first_snapshot = mip_dset.snapshots[-1]
matches = FIPSProcessMatcher.match_snapshot(first_snapshot, fips)