-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata preprocessing
111 lines (80 loc) · 2.91 KB
/
data preprocessing
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#datapreprocessing
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
#os.environ["CUDA_VISIBLE_DEVICES"]="0"
#speed values_2013
data_2013=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/2013_sw_speed.csv") ####change location
data_2014=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/2014_sw_speed.csv") ####change location
data_2015=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/sw_speed_2015.csv") ####change location
data_2018=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/sw_speed_2018.csv") ####change location
data=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/solar_wind_latest_2017.csv") ####change location
#to check lcount=12 in each day
import pandas as pd
data=pd.read_csv("D:\\HEMA_IIT\\paper_1\\sw_speed_2018_or.csv")
i=1
miss=0
miss_v=[]
miss_h=[]
while i <366:
l=data[data['day']==i]
h=len(l)
if h !=12:
print(i)
miss=miss+1
miss_v.append(i)
miss_h.append(h)
i+=1
print(miss)
#to check if anyof the speed has negative values
##removing the negative and zero values
import pandas as pd
data=pd.read_csv("D:\\HEMA_IIT\\paper_1\\sw_speed_2017_or.csv")
incorrect1=data[data['speed']<=0]
incorrect=data.index[data['speed']<=0]
print(incorrect)
print(len(incorrect1))
#data_y2=data_y1.drop(np.array(incorrect))
#speedvalues
data_y_2013=data_2013[72:4380]
duration_2013=data_y_2013.iloc[:,4].values
data_speed_2013=data_y_2013.iloc[:,3].values
print(data_y_2013.shape)
#speed values_2014
data_2014=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/2014_sw_speed.csv") ####change location
#speedvalues
data_y_2014=data_2014[49:4380]
duration_2014=data_y_2014.iloc[:,4].values
data_speed_2014=data_y_2014.iloc[:,3].values
#speed values_2015
data_2015=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/sw_speed_2015.csv") ####change location
#speedvalues
data_y_2015=data_2015[49:4380]
duration_2015=data_y_2015.iloc[:,4].values
data_speed_2015=data_y_2015.iloc[:,3].values
#speed values 2017
data=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/solar_wind_latest_2017.csv") ####change location
#speedvalues
data_y_2017=data[49:4380]
duration=data_y_2017.iloc[:,4].values
data_speed=data_y_2017.iloc[:,3].values
#speed_values_2018
data_2018=pd.read_csv("/home/hemapriya/hema_data/sw_speed_2013_18/sw_speed_2018.csv") ####change location
#speedvalues
data_y_2018=data_2018[49:4380]
duration_2018=data_y_2018.iloc[:,4].values
data_speed_2018=data_y_2018.iloc[:,3].values
#merging all data
train_y1=data_y_2013.append(data_y_2014)
train_y2=train_y1.append(data_y_2015)
train_y3=train_y2.append(data_y_2017)
#train_y1=train_y_inter.append(data_y_2015)
trainy_speed=train_y3.iloc[:,3].values
trainy_speed.shape