Svm.reg.plsql

From Studia Informatyczne


SET serveroutput ON
SET pages 10000
SET echo ON


-- usuniecie tabeli i perspektywy do normalizacji
BEGIN 
  EXECUTE IMMEDIATE 'DROP TABLE normalization';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

BEGIN 
  EXECUTE IMMEDIATE 'DROP VIEW v_prepared';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

--
-- normalizacja danych (w tym artybutu decyzyjnego AGE)
--
BEGIN
  -- utworzenie tabeli do przechowywania parametrow normalizacji
  DBMS_DATA_MINING_TRANSFORM.CREATE_NORM_LIN (
    norm_table_name => 'normalization');        

  -- normalizacja za pomoca metody min-max
  DBMS_DATA_MINING_TRANSFORM.INSERT_NORM_LIN_MINMAX (
    norm_table_name => 'normalization',
    data_table_name => 'mining_data_build_v',
    exclude_list    => DBMS_DATA_MINING_TRANSFORM.COLUMN_LIST (
                         'affinity_card',
                         'cust_id',
                         'bookkeeping_application',
                         'bulk_pack_diskettes',
                         'flat_panel_monitor',
                         'home_theater_package',
                         'os_doc_set_kanji',
                         'printer_supplies',
                         'y_box_games')
  );

  -- utworzenie perspektywy pokazujacej znormalizowane dane
  DBMS_DATA_MINING_TRANSFORM.XFORM_NORM_LIN (
    norm_table_name => 'normalization',
    data_table_name => 'mining_data_build_v',
    xform_view_name => 'v_prepared');
END;
/

-- usuniecie starej tabeli z ustawieniami algorytmu
BEGIN 
  EXECUTE IMMEDIATE 'DROP TABLE settings';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

-- utworzenie i wypelnienie tabeli z ustawieniami algorytmu
CREATE TABLE settings (
  setting_name  VARCHAR2(30),
  setting_value VARCHAR2(30));
 
BEGIN       
  INSERT INTO settings (setting_name, setting_value) VALUES
    (dbms_data_mining.svms_kernel_function, dbms_data_mining.svms_gaussian);
  -- (dbms_data_mining.svms_kernel_function,dbms_data_mining.svms_linear);
  INSERT INTO settings (setting_name, setting_value) VALUES
    (dbms_data_mining.svms_conv_tolerance,0.01);
  INSERT INTO settings (setting_name, setting_value) VALUES
    (dbms_data_mining.svms_epsilon,0.1);
  INSERT INTO settings (setting_name, setting_value) VALUES
    (dbms_data_mining.svms_kernel_cache_size,50000000);
  COMMIT;
END;
/

-- usuniecie starego modelu z repozytorium
BEGIN
  DBMS_DATA_MINING.DROP_MODEL('SVMR_Model');
EXCEPTION WHEN OTHERS THEN
  NULL;
END;
/

-- stworzenie nowego modelu regresji
BEGIN
  DBMS_DATA_MINING.CREATE_MODEL(
    model_name          => 'SVMR_Model',
    mining_function     => dbms_data_mining.regression,
    data_table_name     => 'v_prepared',
    case_id_column_name => 'cust_id',
    target_column_name  => 'age',
    settings_table_name => 'settings');
END;
/

-- 
-- wyswietlenie ustawien modelu
-- 
column setting_name format a30
column setting_value format a30
SELECT setting_name, setting_value
  FROM TABLE(DBMS_DATA_MINING.GET_MODEL_SETTINGS('SVMR_Model'))
ORDER BY setting_name;

--
-- wyswietlenie sygnatury modelu
-- 
column attribute_type format a20
SELECT attribute_name, attribute_type
  FROM TABLE(DBMS_DATA_MINING.GET_MODEL_SIGNATURE('SVMR_Model'))
ORDER BY attribute_name;


-----------------------------------------------------------------------
--                               Testowanie modelu
-----------------------------------------------------------------------

-- usuniecie perspektyw zawierajacych dane testowe
BEGIN 
  EXECUTE IMMEDIATE 'DROP VIEW test_prepared';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

BEGIN 
  EXECUTE IMMEDIATE 'DROP VIEW test_scaled';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

-- utworzenie perspektywy pokazujacej dane testowe po normalizacji
BEGIN
  DBMS_DATA_MINING_TRANSFORM.XFORM_NORM_LIN (
    norm_table_name => 'normalization',
    data_table_name => 'mining_data_test_v',
    xform_view_name => 'test_prepared');
END;
/

--
-- atrybut AGE zostal znormalizowany podczas tworzenia modelu, wiec konieczne
-- jest odkodowanie wartosci przez polaczenie z tabela zawierajaca ustawienia normalizacji
--
CREATE VIEW test_scaled AS
SELECT A.cust_id,
       ((PREDICTION(SVMR_Model USING *) * N.scale) + N.shift) pred 
FROM test_prepared A, normalization N
WHERE N.col = 'AGE';

-- 
-- Wyliczenie miar jakosci predykcji
--

--
-- 1. Root Mean Square Error - Sqrt(Mean((x - x')^2))
--
column rmse format 9999.99
SELECT SQRT(AVG((A.pred - B.age) * (A.pred - B.age))) rmse
FROM test_scaled A, mining_data_test_v B
WHERE A.cust_id = B.cust_id;

--
-- 2. Mean Absolute Error - Mean(|(x - x')|)
--
column mae format 9999.99
SELECT AVG(ABS(a.pred - B.age)) mae
FROM test_scaled A, mining_data_test_v B
WHERE A.cust_id = B.cust_id;

-- 
-- 3. Roznice (jesli duza wariancja to nalezy zmienic ustawienia algorytmu)
--  
SELECT TO_CHAR(ROUND(pred, 4)) prediction, residual
FROM (SELECT A.pred, (A.pred - B.age) residual
      FROM test_scaled A, mining_data_test_v B
      WHERE A.cust_id = B.cust_id
      ORDER BY A.pred ASC)
WHERE ROWNUM < 21;

-----------------------------------------------------------------------
--                               Zastosowanie modelu
-----------------------------------------------------------------------

-- usuniecie obiektow pozostalych po poprzednich uruchomieniach programu
BEGIN 
  EXECUTE IMMEDIATE 'DROP VIEW apply_prepared';
EXCEPTION 
  WHEN OTHERS THEN NULL; 
END;
/

-- utworzenie perspektywy pokazujacej dane do zastosowania po normalizacji
BEGIN
  DBMS_DATA_MINING_TRANSFORM.XFORM_NORM_LIN (
    norm_table_name => 'normalization',
    data_table_name => 'mining_data_apply_v',
    xform_view_name => 'apply_prepared');
END;
/

-- 
-- 1. Wyswietl predykcje sredniego wieku klientow z podzialem na plec
--    (wszystkie predykcje musza byc odkodowane na oryginalna domene)
--
SELECT A.cust_gender, COUNT(*) AS cnt,
       ROUND( AVG((PREDICTION(SVMR_Model USING A.*) * N.scale) + N.shift),4) AS avg_age
FROM apply_prepared A, normalization N
WHERE N.col = 'AGE'
GROUP BY cust_gender
ORDER BY cust_gender;


-- 
-- 2. Utworz histogram skladajacy sie z 10 przedzialow, umiesc w histogramie
--    rozklad wieku klientow z Wloch, dla kazdego klienta wyswietl odpowiadajacy jej/jemu
--    numer przedzialu
--
column pred_age format 999.99
WITH
cust_italy AS (
  SELECT *
  FROM apply_prepared
  WHERE country_name = 'Italy'
  )
SELECT cust_id, ((PREDICTION(SVMR_Model USING A.*) * N.scale) + N.shift) pred_age,
       WIDTH_BUCKET( 
			   ((PREDICTION(SVMR_Model USING A.*) * N.scale) + N.shift), 10, 100, 10) "Age Group"
FROM cust_italy A, normalization N
WHERE N.col = 'AGE'
ORDER BY pred_age;