Initialize Senzing

Version 2.x Version 3.x

This notebook initializes the Senzing Engine configuration in the database for running Senzing notebooks.

import json
import os
import sys
import time

System path

data_dir = os.environ.get("SENZING_DATA_DIR", "/opt/senzing/data")
etc_dir = os.environ.get("SENZING_ETC_DIR", "/etc/opt/senzing")
g2_dir = os.environ.get("SENZING_G2_DIR", "/opt/senzing/g2")
python_path = "{0}/python".format(g2_dir)
sys.path.append(python_path)

Create Senzing configuration

Create, senzing_config_json, a JSON string. Although there are default values, they can be over-written by operating system environment variables. It will be stored for use by other notebooks.

config_path = etc_dir
support_path = os.environ.get("SENZING_DATA_VERSION_DIR", data_dir)
resource_path = "{0}/resources".format(g2_dir)

sql_connection = os.environ.get(
    "SENZING_SQL_CONNECTION", "sqlite3://na:na@/var/opt/senzing/sqlite/G2C.db")

senzing_config_dictionary = {
    "PIPELINE": {
        "CONFIGPATH": config_path,
        "SUPPORTPATH": support_path,
        "RESOURCEPATH": resource_path
    },
    "SQL": {
        "CONNECTION": sql_connection,
    }
}

senzing_config_json = json.dumps(senzing_config_dictionary)
%store senzing_config_json
Stored 'senzing_config_json' (str)

Initialize variables

module_name = 'pyG2'
verbose_logging = False

Initialize a G2ConfigMgr instance

from senzing import G2ConfigMgr, G2Exception
g2_configuration_manager = G2ConfigMgr()
try:
    g2_configuration_manager.init(
        module_name,
        senzing_config_json,
        verbose_logging)

except G2Exception.G2ModuleGenericException as err:
    print(g2_configuration_manager.getLastException())

Ensure a default configuration exists

If a Senzing default configuration does not exist in the database, create a G2Config instance to be used in creating a default configuration.

config_id_bytearray = bytearray()
try:
    g2_configuration_manager.getDefaultConfigID(config_id_bytearray)
    if config_id_bytearray:
        print("Default config already set")
        %store config_id_bytearray

    else:
        print("No default configuration set, creating one in the Senzing repository")
        # Create a G2Config instance.

        from G2Config import G2Config
        g2_config = G2Config()
        try:
            g2_config.initV2(module_name, senzing_config_json, verbose_logging)

            # Create configuration from template file.

            config_handle = g2_config.create()

            # Save Senzing configuration to string.

            response_bytearray = bytearray()
            g2_config.save(config_handle, response_bytearray)
            senzing_model_config_str = response_bytearray.decode()

        except G2Exception.G2ModuleGenericException as err:
            print(g2_config.getLastException())

        # Externalize Senzing configuration to the database.

        config_comment = "senzing-init.ipynb added at {0}".format(time.time())
        config_id_bytearray = bytearray()
        try:
            g2_configuration_manager.addConfig(
                senzing_model_config_str,
                config_comment,
                config_id_bytearray)

            # Set new configuration as the default.

            g2_configuration_manager.setDefaultConfigID(config_id_bytearray)
        except G2Exception.G2ModuleGenericException as err:
            print(g2_configuration_manager.getLastException())

except G2Exception.G2ModuleGenericException as err:
    print(g2_configuration_manager.getLastException())
Default config already set
Stored 'config_id_bytearray' (bytearray)