File size: 7,165 Bytes
12ac14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import data_utils as du
import sql_utils as su
from tqdm.notebook import tqdm_notebook


def create_nanomaterial_entity_tables(connection):

    NanoEntDes_str = "CREATE TABLE IF NOT EXISTS `NanoEntDes` " + \
        "(`ID` INT, `NanoEntity` VARCHAR(255), `Description` TEXT);"

    NanoEntCom_str = "CREATE TABLE IF NOT EXISTS NanoEntCom " + \
        "(ID INT, NanoEntity VARCHAR(255), Composition VARCHAR(100), " + \
        "CompositionType VARCHAR(100), MolecularWeight VARCHAR(150), " + \
        "PubChemID VARCHAR(255));"

    table_creation_querys = [
        NanoEntDes_str,
        NanoEntCom_str
        ]

    for query in table_creation_querys:
        results = su.simple_querry(
            connection,
            query
            )
        if len(results) == 0:
            print("Table Exists")
        else:
            print(results)
    su.show_tables(connection)


def nanomaterialentity_to_sql(
        composition_dt,
        connection
):

    nanomaterialentity_dt = {}

    du.parse_dictionary(
        composition_dt,
        "nanomaterialentity",
        nanomaterialentity_dt
        )

    total_ids = len(nanomaterialentity_dt)

    with tqdm_notebook(
        total=total_ids,
        desc='Processing',
        unit='ID'
    ) as progress_bar:

        for ID in nanomaterialentity_dt:
            sample_info = nanomaterialentity_dt[ID]
            for NanoEntity in sample_info:
                NanoEntity_info = sample_info[NanoEntity]
                for Entry in NanoEntity_info:
                    # Create a cursor object
                    cursor = connection.cursor()

                    # Write to NanoEntDes
                    NanoEntDes_insert = (ID, NanoEntity, Entry['Description'])

                    # Execute a SELECT statement to check
                    # if the entry already exists

                    search_query = "SELECT COUNT(*) FROM NanoEntDes" + \
                        " WHERE ID = ? AND NanoEntity = ?" + \
                        " AND Description = ?"

                    cursor.execute(search_query, NanoEntDes_insert)
                    count = cursor.fetchone()[0]

                    # Check the count to determine if the entry exists
                    if count == 0:
                        # Entry does not exist, proceed with insertion
                        insert_query = "INSERT INTO NanoEntDes " + \
                                        "(ID, NanoEntity, Description) " + \
                                        "VALUES (?, ?, ?)"
                        cursor.execute(insert_query, NanoEntDes_insert)
                        connection.commit()
                    # else:
                    #     # Entry already exists, skip
                    #     print("Entry already exists, skipping...")

                    # Commit the changes
                    connection.commit()

                    for composition in Entry['ComposingElements']:
                        if 'DisplayName' in composition and not composition[
                            'DisplayName'
                        ]:

                            composition_type = "NULL"
                            composition_name = "NULL"
                            composition_MolecularWeight = "NULL"

                        else:
                            # Extract composition_type
                            displayname = composition['DisplayName']
                            index_open = displayname.find("(")
                            composition_type = displayname[
                                :index_open
                                ].strip() if index_open != -1 else "NULL"

                            # Extract composition_name and composition_quantity
                            index_name = displayname.find(
                                "name: "
                            ) + len("name: ")
                            index_amount = displayname.find(", amount: ")

                            if index_name != -1:
                                if index_amount != -1:
                                    composition_name = displayname[
                                        index_name:index_amount
                                        ].strip()
                                    composition_MolecularWeight = displayname[
                                        index_amount + len(", amount: "):-1
                                        ].strip()
                                else:
                                    composition_name = displayname[
                                        index_name:-1
                                        ].strip()
                                    composition_MolecularWeight = "NULL"
                            else:
                                composition_name = "NULL"
                                composition_MolecularWeight = "NULL"

                        if 'PubChemId' in composition and not composition[
                            'PubChemId'
                        ]:
                            PubChemID = "Null"
                        else:
                            PubChemID = composition['PubChemId']

                    # Write to NanoEntCom
                        NanoEntCom_insert = (
                            ID,
                            NanoEntity,
                            composition_name,
                            composition_type,
                            composition_MolecularWeight,
                            PubChemID
                            )

                        # Execute a SELECT statement to check
                        # if the entry already exists
                        search_query = "SELECT COUNT(*) " + \
                            "FROM NanoEntCom WHERE " + \
                            "ID = ? AND NanoEntity = ? " + \
                            "AND Composition = ? " + \
                            "AND CompositionType = ? " + \
                            "AND MolecularWeight = ? " + \
                            "AND PubChemID = ?"

                        cursor.execute(search_query, NanoEntCom_insert)
                        count = cursor.fetchone()[0]

                        # Check the count to determine if the entry exists
                        if count == 0:
                            # Entry does not exist, proceed with insertion
                            insert_query = "INSERT INTO NanoEntCom " + \
                                            "(ID, NanoEntity, " + \
                                            "Composition, " + \
                                            "CompositionType, " + \
                                            "MolecularWeight, PubChemID) " + \
                                            "VALUES (?, ?, ?, ?, ?, ?)"
                            cursor.execute(insert_query, NanoEntCom_insert)
                            connection.commit()
                        # else:
                        #     # Entry already exists, skip
                        #     print("Entry already exists, skipping...")
            progress_bar.update(1)
    cursor.close()