Tuesday, 10 March 2015

Posted by jinson on 16:58 No comments

1. Extract users' speech conversation with Cortana to see what user said, and how Cortana responsed on what device and OS.

Following is the sample script.
#DECLARE StartDate string = "2015-01-20";
#DECLARE EndDate string = "2015-01-20";

//Step1: Initialize CU View resource and access view
MODULE @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CUViewSlapiDependency.module" AS CUModule;
CUModule.InitializeCUViewResource(Start = @StartDate, End = @EndDate, Dataset = "Cortana");

CUView =
    VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"    
    PARAMS
    (            
        Start = @StartDate,
        End = @EndDate,        
        Dataset = "Cortana"        
    );

//Step2: select data from view
data =
    SELECT Market,
           ClientId,
           Request_DeviceInfo.Model AS DeviceModel,
           Request_OSInfo.ProductName AS OSName,
           CU_ConversationId,
           CU_TurnId,
           CU_Request_RequestTime,
           Request_Url,           
           CU_SRBestDisplayText,           
           CU_LGResponseGeneratorDS.FindElementsWithProperty("NODE_NAME", "Results")[0].GetDataPropertyOrDefault("DisplayText", "") AS CortanaResponseText
    FROM CUView
    WHERE NOT Request_IsTestingTraffic
    AND Market == "en-US"
    AND CU_LUInputQueryType == "SpeechQuery";

//Step3: your query script
result =
    SELECT TOP 1000
           Market,
           ClientId,
           DeviceModel,
           OSName,
           CU_ConversationId,
           CU_TurnId,
           CU_Request_RequestTime,
           Request_Url,    
           CU_SRBestDisplayText, 
           CortanaResponseText
    FROM data
    WHERE NOT string.IsNullOrEmpty(CortanaResponseText)
    AND NOT string.IsNullOrEmpty(CU_SRBestDisplayText)
    ORDER BY ClientId, CU_ConversationId, CU_Request_RequestTime;

//Step4: output the result to some where on cosmos
OUTPUT TO SSTREAM @"/my/CUViewCortanaInputandResponseResult.20140920.ss";

2. Extract TOP Domains for Xboxone impressions at en-US market.

LUDomain is from the LUPostProcess DS, and fetch data from DataSourceList UDT is based on KVP concept .
If the key and value are like T" : "D.LU.PostProcess, we can use FindElementsWithProperty("T", "D.LU.PostProcess") to fetch the data. Sometimes, the data is just a node like "LUResult" as below, we can use FindElementsWithProperty("NODE_NAME", "LUResult") to fetch the node.
The key and value are case-sensitive .
#DECLARE StartDate string = "2015-01-20";
#DECLARE EndDate string = "2015-01-20";
#DECLARE OutputStream string = string.Format("/my/XboxOneDomainByImpressions{0}_{1}.txt", @StartDate, @EndDate);

MODULE @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CUViewSlapiDependency.module" AS CUModule;
CUModule.InitializeCUViewResource(Start = @StartDate, End = @EndDate, Dataset = "XboxOne" );

CUView =
    VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"    
    PARAMS
    (            
        Start = @StartDate,
        End = @EndDate,        
        Dataset = "XboxOne"        
    );

data = 
    SELECT Request_ImpressionGuid,
           CU_LUPostProcessDS.FindElementsWithProperty("NODE_NAME", "LUResult")
          .FindElementsWithProperty("NODE_NAME", "SemanticFrames")[0]
          .GetDataPropertyOrDefault("Domain", "") AS LUDomain              
    FROM CUView
    WHERE NOT Request_IsTestingTraffic
    AND Market == "en-US";

results =
    SELECT LUDomain,
           COUNT() AS ImpressionCount
    FROM data
    ORDER BY ImpressionCount DESC;

OUTPUT TO @OutputStream;

3. Get Utterance distribution on SRLocale in MLS traffic.

Following is the sample script.
#DECLARE StartDate string = "2015-01-20";
#DECLARE EndDate string = "2015-01-20";
#DECLARE OutputStream string = string.Format("/my/MLSUtteranceCount{0}_{1}.txt", @StartDate, @EndDate);

MODULE @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CUViewSlapiDependency.module" AS CUModule;
CUModule.InitializeCUViewResource(Start = @StartDate, End = @EndDate, Dataset = "MLS");

CUView =
    VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"    
    PARAMS
    (            
        Start = @StartDate,
        End = @EndDate,        
        Dataset = "MLS"        
    );

data = 
    SELECT CU_SRLocale,
           CU_UtteranceId           
    FROM CUView
    WHERE NOT string.IsNullOrEmpty(CU_UtteranceId);

results =
    SELECT CU_SRLocale,           
           COUNT() AS UtteranceCount
    FROM data
    ORDER BY UtteranceCount DESC;

OUTPUT TO @OutputStream;

Using the custom data
If you want to use custom input data, it looks as below:
Cortana scenario:
CUView =    
      VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"
      PARAMS 
      (
            Dateset = "Cortana",
            CustomInputStream = @"CU=D:\Downloads\CUViewTest.CUmRoot.2014-07-25.1.txt,SML=D:\Downloads\CUViewTest.SMLSS.2014-07-25.1.ss"
      );
CU only scenario:
CUView =    
      VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"
      PARAMS 
      (
            CustomInputStream = @"CU=D:\Downloads\CUViewTest.CUmRoot.2014-07-25.1.txt"
      );

4. If you need extract Cortana data only from IPEMergedLog and care about performance seriously.

Please use Settings="IncludeCuOnlyData=true" parameter to optimize the extraction logic.

#DECLARE StartDate string = "2015-01-20";
#DECLARE EndDate string = "2015-01-20";
#DECLARE OutputStream string = string.Format("/my/CortanaDomainIntentImpressions{0}_{1}.ss", @StartDate, @EndDate);

MODULE @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CUViewSlapiDependency.module" AS CUModule;
CUModule.InitializeCUViewResource(Start = @StartDate, End = @EndDate, Dataset = "Cortana");

CUView =
    VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"    
    PARAMS
    (            
        Start = @StartDate,
        End = @EndDate,        
        Dataset = "Cortana",
        Settings = "IncludeCuOnlyData=true"
    );

data =
    SELECT CU_DialogEngineDomain,
           CU_DialogEngineIntent,          
           Request_ImpressionGuid
    FROM CUView
    WHERE NOT string.IsNullOrEmpty(CU_DialogEngineDomain)
          AND NOT string.IsNullOrEmpty(CU_DialogEngineIntent);

result =
    SELECT TOP 10
           CU_DialogEngineDomain,
           CU_DialogEngineIntent,
           COUNT() AS ImpressionCount
    FROM data
    ORDER BY ImpressionCount DESC;

OUTPUT TO SSTREAM @OutputStream;

5. If you need migration your previous script to CU View with less effort.

You can using Addon="CuScalarColumns" to enable CuScalarColumns Addon to make less changes during the migration.

#DECLARE StartDate string = "2015-01-20";
#DECLARE EndDate string = "2015-01-20";
#DECLARE OutputStream string = string.Format("/my/XboxOneDomainByImpressions{0}_{1}.txt", @StartDate, @EndDate);

MODULE @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CUViewSlapiDependency.module" AS CUModule;
CUModule.InitializeCUViewResource(Start = @StartDate, End = @EndDate, Dataset = "XboxOne", Addon="CuScalarColumns");

CUView =
    VIEW @"/shares/searchDM/distrib/released/ConversationUnderstandingView/CULogPageView.view"    
    PARAMS
    (            
        Start = @StartDate,
        End = @EndDate,        
        Dataset = "XboxOne",
        Addon="CuScalarColumns"  
    );

data = 
    SELECT mImpressionGuid,
           mLUDomain              
    FROM CUView
    WHERE mFullUrl.Contains("ws/cu") 
    AND mRequestMarket == "en-US"
    AND mTrafficType == "Untagged";

results =
    SELECT mLUDomain,
           COUNT() AS ImpressionCount
    FROM data
    ORDER BY ImpressionCount DESC;

OUTPUT TO @OutputStream;

0 comments:

Post a Comment