import random
import string
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
import datetime
login_data = pd.read_csv('./example/login_data.csv')
web_data = pd.read_csv('./example/web_data.csv')
file_data = pd.read_csv('./example/file_data.csv')
employee_data = pd.read_csv('./example/employee_data.csv')
#email_data = pd.read_csv('./example/mail_data.csv')
#usb_data = pd.read_csv('./example/usb_data.csv')
login_data['datetime'] = pd.to_datetime(login_data['datetime'])
web_data['datetime'] = pd.to_datetime(web_data['datetime'])
file_data['datetime'] = pd.to_datetime(file_data['datetime'])
# email_data['datetime'] = pd.to_datetime(email_data['datetime'])
# usb_data['datetime'] = pd.to_datetime(usb_data['datetime'])
display(employee_data)
Unnamed: 0 | user | role | pc | ||
---|---|---|---|---|---|
0 | 0 | usr-uda | Security | usr-uda@lockdown-lockups.com | pc0 |
1 | 1 | usr-hhe | Security | usr-hhe@lockdown-lockups.com | pc1 |
2 | 2 | usr-vxr | Finance | usr-vxr@lockdown-lockups.com | pc2 |
3 | 3 | usr-nba | Finance | usr-nba@lockdown-lockups.com | pc3 |
4 | 4 | usr-hqt | Finance | usr-hqt@lockdown-lockups.com | pc4 |
... | ... | ... | ... | ... | ... |
244 | 244 | usr-jwo | Finance | usr-jwo@lockdown-lockups.com | pc244 |
245 | 245 | usr-hiz | Security | usr-hiz@lockdown-lockups.com | pc245 |
246 | 246 | usr-svz | Services | usr-svz@lockdown-lockups.com | pc246 |
247 | 247 | usr-ndr | HR | usr-ndr@lockdown-lockups.com | pc247 |
248 | 248 | usr-eie | Finance | usr-eie@lockdown-lockups.com | pc248 |
249 rows × 5 columns
display(login_data)
Unnamed: 0 | datetime | user | action | pc | |
---|---|---|---|---|---|
0 | 0 | 2020-01-01 00:21:33 | usr-hyo | login | pc205 |
1 | 1 | 2020-01-01 00:21:39 | usr-ipd | login | pc230 |
2 | 2 | 2020-01-01 00:34:25 | usr-nrx | login | pc169 |
3 | 3 | 2020-01-01 00:35:10 | usr-hfz | login | pc111 |
4 | 4 | 2020-01-01 00:39:04 | usr-hhe | login | pc1 |
... | ... | ... | ... | ... | ... |
166825 | 166825 | 2020-11-30 23:42:54 | usr-ays | logoff | pc167 |
166826 | 166826 | 2020-11-30 23:44:18 | usr-alj | logoff | pc168 |
166827 | 166827 | 2020-11-30 23:51:29 | usr-nic | logoff | pc180 |
166828 | 166828 | 2020-11-30 23:56:34 | usr-vul | logoff | pc54 |
166829 | 166829 | 2020-11-30 23:57:32 | usr-lnn | logoff | pc64 |
166830 rows × 5 columns
display(file_data)
Unnamed: 0 | datetime | user | filename | |
---|---|---|---|---|
0 | 0 | 2020-01-01 00:42:25.544227 | usr-ipd | /policy |
1 | 1 | 2020-01-01 00:50:48.627385 | usr-hyo | /do_not_delete |
2 | 2 | 2020-01-01 01:01:38.409035 | usr-hyo | /newsletter |
3 | 3 | 2020-01-01 01:14:49.310254 | usr-hyo | /tech |
4 | 4 | 2020-01-01 01:17:22.914953 | usr-hyo | /newsletter |
... | ... | ... | ... | ... |
3799265 | 3799265 | 2020-11-30 23:40:54.695141 | usr-lnn | /newsletter/general |
3799266 | 3799266 | 2020-11-30 23:48:35.828385 | usr-nic | /secret |
3799267 | 3799267 | 2020-11-30 23:49:02.955959 | usr-vul | /company_profile |
3799268 | 3799268 | 2020-11-30 23:49:28.216624 | usr-nic | /private/staffreview |
3799269 | 3799269 | 2020-11-30 23:49:33.944279 | usr-nic | /private/staffreview |
3799270 rows × 4 columns
We know that users are assigned a role. We make the assumption that users in the same job role will share some commonalities in how they work and what resources they use. Therefore, we can compare users against peers in the same job role, as well as against their own former activity.
# Example as given to students for getting role/user hierarchy
user_set = {}
user_set_emails = {}
all_roles = employee_data['role'].unique()
for role in all_roles:
user_set[role] = list(employee_data[employee_data['role'] == role]['user'].values)
user_set_emails[role] = list(employee_data[employee_data['role'] == role]['email'].values)
user_set_emails
{'Security': ['usr-uda@lockdown-lockups.com', 'usr-hhe@lockdown-lockups.com', 'usr-zay@lockdown-lockups.com', 'usr-mdl@lockdown-lockups.com', 'usr-yjc@lockdown-lockups.com', 'usr-hvk@lockdown-lockups.com', 'usr-ybh@lockdown-lockups.com', 'usr-ryi@lockdown-lockups.com', 'usr-duj@lockdown-lockups.com', 'usr-scc@lockdown-lockups.com', 'usr-cyr@lockdown-lockups.com', 'usr-ngr@lockdown-lockups.com', 'usr-hzp@lockdown-lockups.com', 'usr-rnt@lockdown-lockups.com', 'usr-hfz@lockdown-lockups.com', 'usr-naf@lockdown-lockups.com', 'usr-kzn@lockdown-lockups.com', 'usr-olj@lockdown-lockups.com', 'usr-jmr@lockdown-lockups.com', 'usr-pzz@lockdown-lockups.com', 'usr-hpt@lockdown-lockups.com', 'usr-hui@lockdown-lockups.com', 'usr-nbg@lockdown-lockups.com', 'usr-alj@lockdown-lockups.com', 'usr-nrx@lockdown-lockups.com', 'usr-kvv@lockdown-lockups.com', 'usr-xwd@lockdown-lockups.com', 'usr-wyj@lockdown-lockups.com', 'usr-tzd@lockdown-lockups.com', 'usr-uka@lockdown-lockups.com', 'usr-npr@lockdown-lockups.com', 'usr-ipd@lockdown-lockups.com', 'usr-hiz@lockdown-lockups.com'], 'Finance': ['usr-vxr@lockdown-lockups.com', 'usr-nba@lockdown-lockups.com', 'usr-hqt@lockdown-lockups.com', 'usr-gyk@lockdown-lockups.com', 'usr-tiz@lockdown-lockups.com', 'usr-eqp@lockdown-lockups.com', 'usr-avx@lockdown-lockups.com', 'usr-zjh@lockdown-lockups.com', 'usr-hsh@lockdown-lockups.com', 'usr-gro@lockdown-lockups.com', 'usr-xkb@lockdown-lockups.com', 'usr-qcf@lockdown-lockups.com', 'usr-zuq@lockdown-lockups.com', 'usr-rjv@lockdown-lockups.com', 'usr-nra@lockdown-lockups.com', 'usr-wer@lockdown-lockups.com', 'usr-sgi@lockdown-lockups.com', 'usr-utk@lockdown-lockups.com', 'usr-zge@lockdown-lockups.com', 'usr-inp@lockdown-lockups.com', 'usr-ssv@lockdown-lockups.com', 'usr-lhu@lockdown-lockups.com', 'usr-uby@lockdown-lockups.com', 'usr-nvl@lockdown-lockups.com', 'usr-vmk@lockdown-lockups.com', 'usr-oza@lockdown-lockups.com', 'usr-xgk@lockdown-lockups.com', 'usr-uyp@lockdown-lockups.com', 'usr-jwo@lockdown-lockups.com', 'usr-eie@lockdown-lockups.com'], 'Legal': ['usr-gwu@lockdown-lockups.com', 'usr-nho@lockdown-lockups.com', 'usr-utl@lockdown-lockups.com', 'usr-wcb@lockdown-lockups.com', 'usr-uct@lockdown-lockups.com', 'usr-miu@lockdown-lockups.com', 'usr-bde@lockdown-lockups.com', 'usr-zvn@lockdown-lockups.com', 'usr-cka@lockdown-lockups.com', 'usr-exs@lockdown-lockups.com', 'usr-gdd@lockdown-lockups.com', 'usr-xbv@lockdown-lockups.com', 'usr-rdb@lockdown-lockups.com', 'usr-xzr@lockdown-lockups.com', 'usr-shs@lockdown-lockups.com', 'usr-tcn@lockdown-lockups.com', 'usr-ime@lockdown-lockups.com', 'usr-cao@lockdown-lockups.com', 'usr-ibc@lockdown-lockups.com', 'usr-hha@lockdown-lockups.com', 'usr-yil@lockdown-lockups.com', 'usr-ibd@lockdown-lockups.com', 'usr-cvk@lockdown-lockups.com', 'usr-vxf@lockdown-lockups.com', 'usr-sgw@lockdown-lockups.com', 'usr-hga@lockdown-lockups.com', 'usr-hep@lockdown-lockups.com', 'usr-sgo@lockdown-lockups.com', 'usr-ylf@lockdown-lockups.com', 'usr-fjd@lockdown-lockups.com', 'usr-wpu@lockdown-lockups.com', 'usr-rkl@lockdown-lockups.com', 'usr-syq@lockdown-lockups.com', 'usr-fja@lockdown-lockups.com', 'usr-mpc@lockdown-lockups.com', 'usr-dyg@lockdown-lockups.com', 'usr-now@lockdown-lockups.com'], 'HR': ['usr-wnk@lockdown-lockups.com', 'usr-rxh@lockdown-lockups.com', 'usr-zkm@lockdown-lockups.com', 'usr-uoe@lockdown-lockups.com', 'usr-iqy@lockdown-lockups.com', 'usr-qtg@lockdown-lockups.com', 'usr-qod@lockdown-lockups.com', 'usr-wub@lockdown-lockups.com', 'usr-ivp@lockdown-lockups.com', 'usr-kzd@lockdown-lockups.com', 'usr-fio@lockdown-lockups.com', 'usr-gmb@lockdown-lockups.com', 'usr-pqq@lockdown-lockups.com', 'usr-chl@lockdown-lockups.com', 'usr-xre@lockdown-lockups.com', 'usr-eui@lockdown-lockups.com', 'usr-lsr@lockdown-lockups.com', 'usr-zbl@lockdown-lockups.com', 'usr-xzh@lockdown-lockups.com', 'usr-bph@lockdown-lockups.com', 'usr-adi@lockdown-lockups.com', 'usr-oov@lockdown-lockups.com', 'usr-pmh@lockdown-lockups.com', 'usr-dyn@lockdown-lockups.com', 'usr-xpo@lockdown-lockups.com', 'usr-qpu@lockdown-lockups.com', 'usr-cee@lockdown-lockups.com', 'usr-wew@lockdown-lockups.com', 'usr-mmw@lockdown-lockups.com', 'usr-swu@lockdown-lockups.com', 'usr-uow@lockdown-lockups.com', 'usr-kfp@lockdown-lockups.com', 'usr-zik@lockdown-lockups.com', 'usr-qgc@lockdown-lockups.com', 'usr-hmw@lockdown-lockups.com', 'usr-iji@lockdown-lockups.com', 'usr-ryw@lockdown-lockups.com', 'usr-gxp@lockdown-lockups.com', 'usr-dgj@lockdown-lockups.com', 'usr-vbe@lockdown-lockups.com', 'usr-tzt@lockdown-lockups.com', 'usr-ivc@lockdown-lockups.com', 'usr-frc@lockdown-lockups.com', 'usr-ndr@lockdown-lockups.com'], 'Services': ['usr-ord@lockdown-lockups.com', 'usr-viw@lockdown-lockups.com', 'usr-upm@lockdown-lockups.com', 'usr-xsn@lockdown-lockups.com', 'usr-mlh@lockdown-lockups.com', 'usr-mmt@lockdown-lockups.com', 'usr-qir@lockdown-lockups.com', 'usr-oax@lockdown-lockups.com', 'usr-yfr@lockdown-lockups.com', 'usr-adl@lockdown-lockups.com', 'usr-evp@lockdown-lockups.com', 'usr-fiq@lockdown-lockups.com', 'usr-coz@lockdown-lockups.com', 'usr-rac@lockdown-lockups.com', 'usr-ami@lockdown-lockups.com', 'usr-sqk@lockdown-lockups.com', 'usr-jok@lockdown-lockups.com', 'usr-qie@lockdown-lockups.com', 'usr-eid@lockdown-lockups.com', 'usr-mki@lockdown-lockups.com', 'usr-naq@lockdown-lockups.com', 'usr-rvx@lockdown-lockups.com', 'usr-vdx@lockdown-lockups.com', 'usr-gnv@lockdown-lockups.com', 'usr-con@lockdown-lockups.com', 'usr-way@lockdown-lockups.com', 'usr-rtk@lockdown-lockups.com', 'usr-otx@lockdown-lockups.com', 'usr-nro@lockdown-lockups.com', 'usr-ikk@lockdown-lockups.com', 'usr-mys@lockdown-lockups.com', 'usr-xfo@lockdown-lockups.com', 'usr-tra@lockdown-lockups.com', 'usr-une@lockdown-lockups.com', 'usr-miy@lockdown-lockups.com', 'usr-xrb@lockdown-lockups.com', 'usr-jsv@lockdown-lockups.com', 'usr-pdn@lockdown-lockups.com', 'usr-fxq@lockdown-lockups.com', 'usr-itz@lockdown-lockups.com', 'usr-hci@lockdown-lockups.com', 'usr-qln@lockdown-lockups.com', 'usr-eib@lockdown-lockups.com', 'usr-rjw@lockdown-lockups.com', 'usr-okf@lockdown-lockups.com', 'usr-sjc@lockdown-lockups.com', 'usr-qmo@lockdown-lockups.com', 'usr-tbt@lockdown-lockups.com', 'usr-svz@lockdown-lockups.com'], 'Technical': ['usr-mcr@lockdown-lockups.com', 'usr-lfl@lockdown-lockups.com', 'usr-gsw@lockdown-lockups.com', 'usr-qat@lockdown-lockups.com', 'usr-wgw@lockdown-lockups.com', 'usr-udb@lockdown-lockups.com', 'usr-zoj@lockdown-lockups.com', 'usr-peg@lockdown-lockups.com', 'usr-cvh@lockdown-lockups.com', 'usr-gvw@lockdown-lockups.com', 'usr-orw@lockdown-lockups.com', 'usr-tgw@lockdown-lockups.com', 'usr-iba@lockdown-lockups.com', 'usr-ebj@lockdown-lockups.com', 'usr-hvd@lockdown-lockups.com', 'usr-zwd@lockdown-lockups.com', 'usr-mvr@lockdown-lockups.com', 'usr-lxz@lockdown-lockups.com', 'usr-tqd@lockdown-lockups.com', 'usr-xvg@lockdown-lockups.com', 'usr-qjv@lockdown-lockups.com', 'usr-stu@lockdown-lockups.com', 'usr-yhv@lockdown-lockups.com', 'usr-ocw@lockdown-lockups.com', 'usr-ays@lockdown-lockups.com', 'usr-fbi@lockdown-lockups.com', 'usr-gok@lockdown-lockups.com', 'usr-szb@lockdown-lockups.com', 'usr-xsi@lockdown-lockups.com', 'usr-dry@lockdown-lockups.com', 'usr-jsn@lockdown-lockups.com', 'usr-jbh@lockdown-lockups.com'], 'Director': ['usr-dmi@lockdown-lockups.com', 'usr-hxr@lockdown-lockups.com', 'usr-kdj@lockdown-lockups.com', 'usr-vul@lockdown-lockups.com', 'usr-bkc@lockdown-lockups.com', 'usr-lnn@lockdown-lockups.com', 'usr-hfn@lockdown-lockups.com', 'usr-bsx@lockdown-lockups.com', 'usr-sdc@lockdown-lockups.com', 'usr-vui@lockdown-lockups.com', 'usr-mwj@lockdown-lockups.com', 'usr-jjg@lockdown-lockups.com', 'usr-vcj@lockdown-lockups.com', 'usr-zwq@lockdown-lockups.com', 'usr-rhd@lockdown-lockups.com', 'usr-zov@lockdown-lockups.com', 'usr-nnk@lockdown-lockups.com', 'usr-zth@lockdown-lockups.com', 'usr-nic@lockdown-lockups.com', 'usr-ypo@lockdown-lockups.com', 'usr-hyo@lockdown-lockups.com', 'usr-twi@lockdown-lockups.com', 'usr-vsl@lockdown-lockups.com', 'usr-asj@lockdown-lockups.com']}
login_data['hour'] = login_data['datetime'].dt.hour
plt.figure(figsize=(20,10))
for employee in employee_data['user'].unique():
d = login_data[login_data['user'] == employee]
d_login = d[d['action'] == 'login']
d_logoff = d[d['action'] == 'logoff']
plt.plot(d_login['hour'])
plt.plot(d_logoff['hour'])
login_data['hour'] = login_data['datetime'].dt.hour
for role in employee_data['role'].unique():
employees = employee_data[employee_data['role'] == role]
for employee in employees['user'].unique():
d = login_data[login_data['user'] == employee]
d_login = d[d['action'] == 'login']
d_logoff = d[d['action'] == 'logoff']
plt.plot(d_login['hour'])
plt.plot(d_logoff['hour'])
print ("Role: ", role)
plt.show()
Role: Security
Role: Finance
Role: Legal
Role: HR
Role: Services
Role: Technical
Role: Director
Let's now start investigating other data available to use.
plt.figure(figsize=(20,10))
plt.scatter(file_data['user'], file_data['filename'])
plt.show()
for role in employee_data['role'].unique():
employees = employee_data[employee_data['role'] == role]['user'].values
print ("Role:", role)
d = file_data[ file_data['user'].isin(employees) ]
plt.scatter(d['user'], d['filename'])
plt.show()
Role: Security
Role: Finance
Role: Legal
Role: HR
Role: Services
Role: Technical
Role: Director
for role in employee_data['role'].unique():
employees = employee_data[employee_data['role'] == role]['user'].values
print ("Role:", role)
d = web_data[ web_data['user'].isin(employees) ]
plt.scatter(d['user'], d['website'], c='orange')
plt.show()
Role: Security
Role: Finance
Role: Legal
Role: HR
Role: Services
Role: Technical
Role: Director
Splitting these up on a per role basis helps to identify a case where one user in a role has accessed items that no other user in the same role has accessed previously.
Which user in our Director group accessed legaleagle.com and linkedin.com, and also accessed the /docs/clients file?
d = web_data[ web_data['user'].isin(user_set['Director']) ] # First we filter by Director
d = d[ d['website']=='http://www.legaleagle.com' ] # Then we filter by website
d
Unnamed: 0 | datetime | user | website | |
---|---|---|---|---|
812005 | 812005 | 2020-07-17 01:07:09.540920 | usr-zth | http://www.legaleagle.com |
812006 | 812006 | 2020-07-17 02:11:42.631109 | usr-zth | http://www.legaleagle.com |
812031 | 812031 | 2020-07-17 04:29:03.617451 | usr-zth | http://www.legaleagle.com |
812077 | 812077 | 2020-07-17 05:14:16.857018 | usr-zth | http://www.legaleagle.com |
812272 | 812272 | 2020-07-17 06:52:56.156405 | usr-zth | http://www.legaleagle.com |
d = web_data[ web_data['user'].isin(user_set['Director']) ] # First we filter by Director
d = d[ d['website']=='http://www.linkedin.com' ] # Then we filter by website
d
Unnamed: 0 | datetime | user | website | |
---|---|---|---|---|
812004 | 812004 | 2020-07-17 00:53:37.055608 | usr-zth | http://www.linkedin.com |
812028 | 812028 | 2020-07-17 04:25:33.392512 | usr-zth | http://www.linkedin.com |
812839 | 812839 | 2020-07-17 09:23:35.234721 | usr-zth | http://www.linkedin.com |
816030 | 816030 | 2020-07-17 21:39:10.690242 | usr-zth | http://www.linkedin.com |
816117 | 816117 | 2020-07-17 22:57:07.885162 | usr-zth | http://www.linkedin.com |
d = file_data[ file_data['user'].isin(user_set['Director']) ] # First we filter by Director
d = d[ d['filename']=='/docs/clients' ] # Then we filter by website
d
Unnamed: 0 | datetime | user | filename | |
---|---|---|---|---|
2190398 | 2190398 | 2020-07-12 06:53:29.890276 | usr-zth | /docs/clients |
2190627 | 2190627 | 2020-07-12 07:30:25.409428 | usr-zth | /docs/clients |
2191567 | 2191567 | 2020-07-12 09:21:23.039452 | usr-zth | /docs/clients |
2191792 | 2191792 | 2020-07-12 09:36:44.177858 | usr-zth | /docs/clients |
2191809 | 2191809 | 2020-07-12 09:38:04.282103 | usr-zth | /docs/clients |
2191854 | 2191854 | 2020-07-12 09:41:55.085457 | usr-zth | /docs/clients |
2192243 | 2192243 | 2020-07-12 10:02:56.145382 | usr-zth | /docs/clients |
2193443 | 2193443 | 2020-07-12 11:08:36.577518 | usr-zth | /docs/clients |
2193521 | 2193521 | 2020-07-12 11:13:20.456921 | usr-zth | /docs/clients |
2193660 | 2193660 | 2020-07-12 11:20:19.920699 | usr-zth | /docs/clients |
2194191 | 2194191 | 2020-07-12 11:49:42.566164 | usr-zth | /docs/clients |
2194623 | 2194623 | 2020-07-12 12:14:38.361616 | usr-zth | /docs/clients |
2194692 | 2194692 | 2020-07-12 12:18:27.239191 | usr-zth | /docs/clients |
2194811 | 2194811 | 2020-07-12 12:24:52.732327 | usr-zth | /docs/clients |
2194907 | 2194907 | 2020-07-12 12:30:38.901351 | usr-zth | /docs/clients |
2196484 | 2196484 | 2020-07-12 13:55:26.269073 | usr-zth | /docs/clients |
2198224 | 2198224 | 2020-07-12 15:32:52.938053 | usr-zth | /docs/clients |
2198229 | 2198229 | 2020-07-12 15:33:15.637331 | usr-zth | /docs/clients |
2198702 | 2198702 | 2020-07-12 16:07:00.856864 | usr-zth | /docs/clients |
2199010 | 2199010 | 2020-07-12 16:31:29.532586 | usr-zth | /docs/clients |
2199508 | 2199508 | 2020-07-12 17:23:56.485406 | usr-zth | /docs/clients |
2199524 | 2199524 | 2020-07-12 17:26:10.024584 | usr-zth | /docs/clients |
2199760 | 2199760 | 2020-07-12 18:01:02.698330 | usr-zth | /docs/clients |
2199860 | 2199860 | 2020-07-12 18:16:34.908082 | usr-zth | /docs/clients |
2199861 | 2199861 | 2020-07-12 18:16:39.691649 | usr-zth | /docs/clients |
2199881 | 2199881 | 2020-07-12 18:19:05.794704 | usr-zth | /docs/clients |
2200060 | 2200060 | 2020-07-12 18:53:36.295046 | usr-zth | /docs/clients |
2200268 | 2200268 | 2020-07-12 19:41:20.607524 | usr-zth | /docs/clients |
2200278 | 2200278 | 2020-07-12 19:43:52.054121 | usr-zth | /docs/clients |
2200312 | 2200312 | 2020-07-12 19:56:59.358838 | usr-zth | /docs/clients |
We have a potential suspect: usr-zth.
This person is the only Director to access this file, and the only Director to access these two URLs.
Let us try examine their login data further...
d = login_data[login_data['user'] == 'usr-zth']
d_login = d[d['action'] == 'login']
d_logoff = d[d['action'] == 'logoff']
plt.plot(d_login['hour'].values[180:210]) # Range set to the month of suspicious activity
plt.plot(d_logoff['hour'].values[180:210]) # Range set to the month of suspicious activity
plt.show()
Login doesn't reveal anything... not all insider threat cases are the person who stays late at night!
Here we have used the knowledge of the role group to assess where users are acting similar or different to each other. Users in the same role would likely require access to the same information (files, websites, etc), or have similar email contacts. Therefore, by looking for when an individual changes their own behaviour, and whether this behaviour differs from the others in the same group, can help to identify cases that may be of interest.
There are some other threat indicators in this dataset - can you find them?
If you want another challenge, try the R2 dataset from the Carnegie Melon University Insider Threat Test Dataset. The dataset is structured in the same format as this example, however there are 1000 employees, and the activity is much harder to spot...! :)