6.0.0-git
2024-10-15
Last Modified 2015-11-20 by Guest

How to convert Serialized Data to UTF-8

The following script was used at the University of Tübingen, Germany, to convert the serialized data from ISO-8859-15 to UTF-8,
in the process of migrating from Horde 3 to Horde 5. As it was not intended to be used at other sites, it is tailored to
the environment of the University of Tübingen which runs PostgreSQL. You should adjust the script to your own needs.

  • THIS SCRIPT COMES WITH NO WARRANTY
  • BACKUP YOUR DATABASE

This script expects that the database is converted to utf-8 already, and will fix the serialization of non-ASCII data

#!/usr/bin/php
# Copyright: 2015  Michael Menge michael.menge(at)uni-tuebingen(dot)de
# http://www.gnu.org/licenses/gpl
<?php

define('LOG_LEVEL', LOG_INFO);

$errors = array(LOG_ERR=>0, LOG_WARNING=>0, LOG_NOTICE=>0, LOG_INFO=>0, LOG_DEBUG=>0);

function printLog ($level, $string) {
    global $errors;
    $errors[$level]++;
    if ($level <= LOG_LEVEL) {
        echo $string;
    }
}

$ar=array("horde_alarms"=>array("cols" => array("alarm_internal","alarm_methods","alarm_params")),
"turba_objects"=>array("key" => "object_id", "cols" => array("object_members")),
"turba_shares"=>array("key" => "share_id", "cols" => array("attribute_params")),
"kronolith_events"=>array("key" => "event_id", "cols" => array("event_attendees")),
"ingo_rules"=>array("key" => "rule_id", "cols" => array("rule_conditions")),
"horde_histories"=>array("key"=>"history_id", "cols" => array("history_extra")),
"horde_prefs"=>array("cols" => array("pref_value")));

printLog(LOG_INFO, mb_internal_encoding()."\n");
mb_internal_encoding("UTF-8");
printLog(LOG_INFO, mb_internal_encoding()."\n");
$starttime = time();
$horde_db="host=HOSTNAME dbname=horde user=horde password=XXXXX";

$dbconn = pg_connect($horde_db)
          or die('Verbindungsaufbau fehlgeschlagen: ' . pg_last_error());

foreach ($ar as $table=>$tabledata) {
    $tablekey="";
    if (!isset($tabledata["key"])) {
        $query="CREATE SEQUENCE zdv_encoding_".$table."_seq; ";
        $query.="ALTER TABLE ".$table." ADD COLUMN zdv_encoding_id INTEGER NOT NULL DEFAULT nextval('zdv_encoding_".$table."_seq'); ";
        $query.="CREATE UNIQUE INDEX zdv_encoding_id_idx ON ".$table." (zdv_encoding_id);";
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before ALTER TABLE $table ADD index COLUMN\n");
        $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after ALTER TABLE $table ADD index COLUMN\n");
        $tablekey="zdv_encoding_id";
    } else {
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": key vorhanden\n");
        $tablekey=$tabledata["key"];
    }
    if (isset($tabledata["cols"])) {
        $cols=$tabledata["cols"];
        foreach ($cols as $col) {
            $convert_error=0;
            $convert_failed=0;
            $convert_converted=0;
            $convert_already=0;
            $convert_ascii=0;
            $my_stmname="my_update_".$table."_".$col;
            pg_prepare($my_stmname, "UPDATE ".$table." SET ".$col."=$1 WHERE ".$tablekey."=$2");
            $query = "SELECT ".$tablekey.", ".$col." FROM ".$table." where ".$col." like 'a:%:{%';";
            $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
            printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before UPDATE $table:$col\n");
            while ($line = pg_fetch_array($result, null, PGSQL_ASSOC)) {
                $value=$line[$col];
                $id=$line[$tablekey];
                if (!mb_check_encoding($value,"ASCII")) {
                    $changed_value = mb_convert_encoding ( $value , "ISO-8859-15");
                    $unserial_value=@unserialize($changed_value);
                    if ($unserial_value != FALSE) {
                        mb_convert_variables("UTF-8", "ISO-8859-15", $unserial_value);
                        $converted_value=serialize($unserial_value);
                        printLog(LOG_INFO, "Convert $table:$col ID=$id\n");
                        printLog(LOG_DEBUG, "FROM=$value TO=$converted_value\n");
                        $up_result = pg_execute($dbconn, $my_stmname, array($converted_value, $id));
                        if (!$up_result) {
                            printLog(LOG_ERR, "ERROR WITH QUERY: $query\n");
                            $convert_error++;
                        } else {
                            $convert_converted++;
                        }
                    } else {
                        $unserial_value=@unserialize($value);
                        if ($unserial_value == FALSE) {
                            printLog(LOG_ERR, "FAILED: Unserialize ISO and UTF-8 $table:$col VALUE=$value\n");
                            $convert_failed++;
                        } else {
                            printLog(LOG_INFO, "ALREADY Converted: $table:$col  ID=$id\n");
                            printLog(LOG_DEBUG, "VALUE=$value\n");
                            $convert_already++;
                        }
                    }
                } else {
                    printLog(LOG_DEBUG, "ASCII only $table:$col ID=$id\n");
                    $convert_ascii++;
                }
            }
            printLog(LOG_NOTICE, "--------\nConverted   : $convert_converted\nASCII         : $convert_ascii\nAlready con     : $convert_already\nFailed : $convert_failed\nFehler        : $convert_error\n");
            printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after UPDATE $table:$col\n");
        }
    } else {
        printLog(LOG_WARNING, "No Cols for Table $table found\n");
    }
    if (!isset($tabledata["key"])) {
        $query="DROP INDEX zdv_encoding_id_idx; ";
        $query.="ALTER TABLE ".$table." DROP COLUMN zdv_encoding_id; ";
        $query.="DROP SEQUENCE zdv_encoding_".$table."_seq; ";
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": before ALTER TABLE $table DROP COLUMN\n");
        $result = pg_query($query) or die('Abfrage fehlgeschlagen: ' . pg_last_error());
        printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after ALTER TABLE $table DROP COLUMN\n");
    }
}
printLog(LOG_NOTICE, "Sec: ".(time()-$starttime).": after Convert\n");
printLog(LOG_NOTICE, "Errors: ".$errors[LOG_ERR]."\nWarning: ".$errors[LOG_WARNING]."\n");

?>