This repository has been archived by the owner on Jan 17, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CreateDatasets.py
133 lines (121 loc) · 4.42 KB
/
CreateDatasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import argparse
import json
from random import *
def parse_args():
parser = argparse.ArgumentParser(description="Create a dataset or an anomalous dataset for testing")
data_parser = parser.add_mutually_exclusive_group(required=False)
data_parser.add_argument("--type", type=str, required=False, default="NULL",
help="<normal> for a normal dataset or <anomalous> whit a anomalous day")
data_parser.add_argument("--days", type=int, required=False, default=5,
help = "number of days")
data_parser.add_argument("--pcap", type=str, required=False, default="NULL",
help = "condenses the input pcap into a dataset with 5m intervals")
return parser.parse_args()
# Create a dataset for a day
def createDataset():
dataset = []
for i in range(108): # 00 to 09
dataset.append(randint(0,10))
for i in range(48): # 09 to 13 (Work)
elem = randint(80,90)
dataset.append(elem)
for i in range(12): # 13 to 14
elem = randint(0,10)
dataset.append(elem)
for i in range(48): # 14 to 18 (Work)
elem = randint(70,80)
dataset.append(elem)
for i in range(36): # 18 to 21
elem = randint(0,10)
dataset.append(elem)
for i in range(24): # 21 to 23 (It's time for Netflix)
elem = randint(340,350)
dataset.append(elem)
for i in range(12): # 23 to 24
dataset.append(randint(0,10))
return dataset
# Create a anomalous dataser for a day
def createAnomalousDataset():
dataset = []
for i in range(108): # 00 to 09
dataset.append(randint(0,10))
for i in range(48): # 09 to 13 (Work)
elem = randint(340,350)
dataset.append(elem)
for i in range(12): # 13 to 14
elem = randint(0,10)
dataset.append(elem)
for i in range(48): # 14 to 18 (Work)
elem = randint(70,80)
dataset.append(elem)
for i in range(36): # 18 to 21
elem = randint(0,10)
dataset.append(elem)
for i in range(24): # 21 to 23 (It's time for Netflix)
elem = randint(340,350)
dataset.append(elem)
for i in range(12): # 23 to 24
dataset.append(randint(0,10))
return dataset
def dataToJson(dataset, filename):
outfile = open(filename, "w")
json.dump(dataset, outfile, indent=4)
outfile.close()
def dataFromJson(filename):
infile = open(filename, "r")
dataset = json.load(infile)
return dataset
args = parse_args()
datasetType = args.type
pcap = args.pcap
numdays = args.days
dataset = []
if (datasetType == "series"):
for i in range(numdays):
dataset += createDataset()
print("Dataset created")
dataToJson(dataset, "dataset.json")
elif(datasetType == "anomalous"):
dataset = createAnomalousDataset()
print("Anomalous day Dataset created")
dataToJson(dataset, "anomalousDay.json")
elif(datasetType == "normal"):
dataset = createDataset()
print("Normal day Dataset created")
dataToJson(dataset, "normalDay.json")
elif(pcap != "NULL"):
import pyshark
from datetime import datetime
dates, series, interval = [], [], 300
cap = pyshark.FileCapture(pcap)
for pkt in cap:
try:
dates.append(float(pkt.frame_info.time_epoch))
series.append(int(pkt.length) / 1000)
print("\r\033[F\033[KReading " + str(series[-1]) + " " + datetime.fromtimestamp(dates[-1]).strftime("%Y-%m-%d %H:%M"))
except AttributeError:
continue
print("\n\tFrom " + datetime.fromtimestamp(dates[0]).strftime("%Y-%m-%d %H:%M") + " to " + datetime.fromtimestamp(dates[-1]).strftime("%Y-%m-%d %H:%M")+"\n\n")
intervals = []
newseries = []
start = -1
sum = 0
for i in range(len(dates)):
if start == -1:
start = i
sum += series[i]
else:
elapsed = datetime.fromtimestamp(dates[i]) - datetime.fromtimestamp(dates[start])
sum += series[i]
if elapsed.total_seconds() > 300:
newseries.append(sum)
intervals.append(datetime.fromtimestamp(dates[i]))
lastdate = datetime.fromtimestamp(dates[i])
sum = 0
start = -1
print("\r\033[F\033[KCondensating " + str(i - start) + " points: " + str(newseries[-1]) + "\n")
series = newseries
dates = intervals
dataToJson(series, pcap + ".json")
else:
print("Dataset not created")