-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
289 lines (238 loc) · 9.85 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
from collections import defaultdict
import pandas as pd
import numpy as np
import plotly.graph_objs as go
PI = np.pi
def create_matrix(dataframe, code=0, groups=None):
""" Generates the analysis matrix for our SES metadata CIRCOS figure.
Input:
dataframe - this is the datafame generated from
the Circos_DATA.csv file.
code - A code (1-3) that specifies which subset of the
manuscripts to analyze. Default is zero.
0 = Analyze all manuscripts
1 = Analyze manuscripts that are coded as Social
Impacts on Environmental Systems
2 = Analyze manuscripts that are coded as Environmental
Impacts on Social Systems
3 = Analyze manuscripts that are coded as Coupled
Systems Dynamics
"""
if code == 0:
df = dataframe.loc[dataframe['Code'] > 0]
else:
# Filter the data based on the value of the code argument.
df = dataframe.loc[dataframe['Code'] == code]
# Reorder the columns as necessary, dropping Record ID and code
if groups == None:
# This is easy, just sort all the data by abundance of data:
counts = df.sum()[2:].sort_values(ascending=False)
df = df[counts.index]
elif groups:
# This requires us to sort each group separately.
# 1. Get index of sort order for environmental data:
group1 = list(df[groups[0]].sum().sort_values(ascending=False).index)
group2 = list(df[groups[1]].sum().sort_values(ascending=False).index)
df = df[group1 + group2]
# Initialize the matrix with zeros in every location.
circos_matrix= {}
datatypes = list(df.columns)
for datatype in datatypes:
circos_matrix[datatype] = {}
othertypes = list(df.columns)
othertypes.remove(datatype)
circos_matrix[datatype][datatype] = 0
for othertype in othertypes:
circos_matrix[datatype][othertype] = 0
# Now analyze the matrix to assign valid values
if groups == None:
datatypes = list(df.columns)
for datatype in datatypes:
# 1. Assign all types to othertypes
# NOTE: We can't use same list as in for loop!
othertypes = list(df.columns)
# 2. Remove the current datatype from othertypes
othertypes.remove(datatype)
# 3. Iterate over all the remaining othertypes:
for othertype in othertypes:
# 4. Find all papers containing this combination of types
matches = len(df.loc[(df[datatype] == 1) & (df[othertype] == 1)])
# 5. Assign the # of matches to the current combination of types
circos_matrix[datatype][othertype] = matches
elif groups:
group1 = list(groups[0])
group2 = list(groups[1])
for d in group1 + group2:
circos_matrix[d][d] = 0
for datatype in group1:
othertypes = list(groups[1])
for othertype in othertypes:
matches = len(df.loc[(df[datatype] == 1) & (df[othertype] == 1)])
circos_matrix[datatype][othertype] = matches
circos_matrix[othertype][datatype] = matches
# Return our result as a pandas dataframe instead of a dict.
# Catch all the NaN here instead of in the code above.
return pd.DataFrame(circos_matrix).fillna(0), len(df)
def moduloAB(x, a, b):
""" Maps a real number onto the unit circle
identified with the interval [a,b), b-a=2*PI.
"""
if a>=b:
raise ValueError('Incorret interval ends')
y = (x-a)%(b-a)
return y+b if y<0 else y+a
def test_2PI(x):
return 0<= x < 2*np.pi
def check_data(data_matrix):
L, M=data_matrix.shape
if L!=M:
raise ValueError('Data array must have (n,n) shape')
return L
def get_ideogram_ends(ideogram_len, gap):
ideo_ends=[]
left=0
for k in range(len(ideogram_len)):
right=left+ideogram_len[k]
ideo_ends.append([left, right])
left=right+gap
return ideo_ends
def make_ideogram_arc(R, phi, a=50):
# R is the circle radius
# phi is the list of ends angle coordinates of an arc
# a is a parameter that controls the number of points to be evaluated on an arc
if not test_2PI(phi[0]) or not test_2PI(phi[1]):
phi=[moduloAB(t, 0, 2*PI) for t in phi]
length=(phi[1]-phi[0])% 2*PI
nr=5 if length<=PI/4 else int(a*length/PI)
if phi[0] < phi[1]:
theta=np.linspace(phi[0], phi[1], nr)
else:
phi=[moduloAB(t, -PI, PI) for t in phi]
theta=np.linspace(phi[0], phi[1], nr)
return R*np.exp(1j*theta)
def map_data(data_matrix, row_value, ideogram_length):
L = check_data(data_matrix)
mapped=np.zeros(data_matrix.shape)
for j in range(L):
mapped[:, j]=ideogram_length*data_matrix[:,j]/row_value
return mapped
def make_ribbon_ends(mapped_data, ideo_ends, idx_sort):
L=mapped_data.shape[0]
ribbon_boundary=np.zeros((L,L+1))
for k in range(L):
start=ideo_ends[k][0]
ribbon_boundary[k][0]=start
for j in range(1,L+1):
J=idx_sort[k][j-1]
ribbon_boundary[k][j]=start+mapped_data[k][J]
start=ribbon_boundary[k][j]
return [[(ribbon_boundary[k][j],ribbon_boundary[k][j+1] ) for j in range(L)] for k in range(L)]
def control_pts(angle, radius):
#angle is a 3-list containing angular coordinates of the control points b0, b1, b2
#radius is the distance from b1 to the origin O(0,0)
if len(angle)!=3:
raise InvalidInputError('angle must have len =3')
b_cplx=np.array([np.exp(1j*angle[k]) for k in range(3)])
b_cplx[1]=radius*b_cplx[1]
return zip(b_cplx.real, b_cplx.imag)
def ctrl_rib_chords(l, r, radius):
# this function returns a 2-list containing control poligons of the two quadratic Bezier
#curves that are opposite sides in a ribbon
#l (r) the list of angular variables of the ribbon arc ends defining
#the ribbon starting (ending) arc
# radius is a common parameter for both control polygons
if len(l)!=2 or len(r)!=2:
raise ValueError('the arc ends must be elements in a list of len 2')
return [control_pts([l[j], (l[j]+r[j])/2, r[j]], radius) for j in range(2)]
def make_q_bezier(b):# defines the Plotly SVG path for a quadratic Bezier curve defined by the
#list of its control points
test_data = list(b)
if len(test_data)!=3:
raise ValueError('control poligon must have 3 points')
A, B, C=b
return 'M '+str(A[0])+',' +str(A[1])+' '+'Q '+\
str(B[0])+', '+str(B[1])+ ' '+\
str(C[0])+', '+str(C[1])
def make_ribbon_arc(theta0, theta1):
if test_2PI(theta0) and test_2PI(theta1):
if theta0 < theta1:
theta0= moduloAB(theta0, -PI, PI)
theta1= moduloAB(theta1, -PI, PI)
if theta0*theta1>0:
raise ValueError('incorrect angle coordinates for ribbon')
nr=int(40*(theta0-theta1)/PI)
if nr<=2: nr=3
theta=np.linspace(theta0, theta1, nr)
pts=np.exp(1j*theta)# points on arc in polar complex form
string_arc=''
for k in range(len(theta)):
string_arc+='L '+str(pts.real[k])+', '+str(pts.imag[k])+' '
return string_arc
else:
raise ValueError('the angle coordinates for an arc side of a ribbon must be in [0, 2*pi]')
def make_layout(title, plot_size):
axis=dict(showline=False, # hide axis line, grid, ticklabels and title
zeroline=False,
showgrid=False,
showticklabels=False,
title=''
)
return go.Layout(title=title,
xaxis=dict(axis),
yaxis=dict(axis),
showlegend=False,
width=plot_size,
height=plot_size,
margin=dict(t=25, b=25, l=25, r=25),
hovermode='closest',
shapes=[]# to this list one appends below the dicts defining the ribbon,
#respectively the ideogram shapes
)
def make_ideo_shape(path, line_color, fill_color):
#line_color is the color of the shape boundary
#fill_collor is the color assigned to an ideogram
return dict(
line=dict(
color=line_color,
width=0.45
),
path= path,
type='path',
fillcolor=fill_color,
layer='below'
)
def make_ribbon(l, r, line_color, fill_color, radius=0.2):
#l=[l[0], l[1]], r=[r[0], r[1]] represent the opposite arcs in the ribbon
#line_color is the color of the shape boundary
#fill_color is the fill color for the ribbon shape
poligon=ctrl_rib_chords(l,r, radius)
# Need to use map to coerce the zip iterables into lists.
b,c = list(map(list, poligon))
return dict(
line=dict(
color=line_color, width=0.5
),
path= make_q_bezier(b)+make_ribbon_arc(r[0], r[1])+
make_q_bezier(c[::-1])+make_ribbon_arc(l[1], l[0]),
type='path',
fillcolor=fill_color,
layer='below'
)
def make_self_rel(l, line_color, fill_color, radius):
#radius is the radius of Bezier control point b_1
b=control_pts([l[0], (l[0]+l[1])/2, l[1]], radius)
return dict(
line=dict(
color=line_color, width=0.5
),
path= make_q_bezier(b)+make_ribbon_arc(l[1], l[0]),
type='path',
fillcolor=fill_color,
layer='below'
)
def invPerm(perm):
# function that returns the inverse of a permutation, perm
inv = [0] * len(perm)
for i, s in enumerate(perm):
inv[s] = i
return inv