From fa1a30cb5ea8a5253f3fa1cf4d02bc082d9a301e Mon Sep 17 00:00:00 2001 From: Shreyas Zare Date: Sat, 25 Sep 2021 13:34:05 +0530 Subject: [PATCH] AdvanceBlocking: updated implementation to support adblock lists. Added allow txt blocking report option. Code refactoring done. --- Apps/AdvanceBlockingApp/App.cs | 1166 +++++++++++++++++--------------- 1 file changed, 620 insertions(+), 546 deletions(-) diff --git a/Apps/AdvanceBlockingApp/App.cs b/Apps/AdvanceBlockingApp/App.cs index 8db1a371..382a5ba9 100644 --- a/Apps/AdvanceBlockingApp/App.cs +++ b/Apps/AdvanceBlockingApp/App.cs @@ -45,6 +45,7 @@ namespace AdvanceBlocking string _localCacheFolder; bool _enableBlocking; + bool _allowTxtBlockingReport; bool _blockAsNxDomain; int _blockListUrlUpdateIntervalHours; @@ -75,7 +76,7 @@ namespace AdvanceBlocking } #endregion - + #region private private async void BlockListUrlUpdateTimerCallbackAsync(object state) @@ -84,7 +85,7 @@ namespace AdvanceBlocking { if (DateTime.UtcNow > _blockListUrlLastUpdatedOn.AddHours(_blockListUrlUpdateIntervalHours)) { - if (await UpdateBlockListsAsync()) + if (await UpdateAllListsAsync()) { //block lists were updated //save last updated on time @@ -98,25 +99,49 @@ namespace AdvanceBlocking } } - private string GetBlockListFilePath(Uri blockListUrl) + private void FindAndSetBlockListUrlLastUpdatedOn() { - using (HashAlgorithm hash = SHA256.Create()) + try { - return Path.Combine(_localCacheFolder, BitConverter.ToString(hash.ComputeHash(Encoding.UTF8.GetBytes(blockListUrl.AbsoluteUri))).Replace("-", "").ToLower()); + string[] files = Directory.GetFiles(_localCacheFolder); + DateTime latest = DateTime.MinValue; + + foreach (string file in files) + { + DateTime lastModified = File.GetLastWriteTimeUtc(file); + + if (lastModified > latest) + latest = lastModified; + } + + _blockListUrlLastUpdatedOn = latest; + } + catch (Exception ex) + { + _dnsServer.WriteLog(ex); } } - private async Task UpdateBlockListsAsync() + private string GetListFilePath(Uri listUrl) + { + using (HashAlgorithm hash = SHA256.Create()) + { + return Path.Combine(_localCacheFolder, BitConverter.ToString(hash.ComputeHash(Encoding.UTF8.GetBytes(listUrl.AbsoluteUri))).Replace("-", "").ToLower()); + } + } + + private async Task UpdateAllListsAsync() { List downloadedAllowListUrls = new List(); List downloadedBlockListUrls = new List(); List downloadedRegexAllowListUrls = new List(); List downloadedRegexBlockListUrls = new List(); + List downloadedAdblockListUrls = new List(); bool notModified = false; - async Task DownloadListUrlAsync(Uri listUrl, bool isAllowList, bool isRegexList) + async Task DownloadListUrlAsync(Uri listUrl, bool isAllowList, bool isRegexList, bool isAdblockList) { - string listFilePath = GetBlockListFilePath(listUrl); + string listFilePath = GetListFilePath(listUrl); string listDownloadFilePath = listFilePath + ".downloading"; try @@ -154,42 +179,52 @@ namespace AdvanceBlocking if (httpResponse.Content.Headers.LastModified != null) File.SetLastWriteTimeUtc(listFilePath, httpResponse.Content.Headers.LastModified.Value.UtcDateTime); - if (isAllowList) + if (isAdblockList) { - if (isRegexList) + lock (downloadedAdblockListUrls) { - lock (downloadedRegexAllowListUrls) - { - downloadedRegexAllowListUrls.Add(listUrl); - } - } - else - { - lock (downloadedAllowListUrls) - { - downloadedAllowListUrls.Add(listUrl); - } + downloadedAdblockListUrls.Add(listUrl); } } else { - if (isRegexList) + if (isAllowList) { - lock (downloadedRegexBlockListUrls) + if (isRegexList) { - downloadedRegexBlockListUrls.Add(listUrl); + lock (downloadedRegexAllowListUrls) + { + downloadedRegexAllowListUrls.Add(listUrl); + } + } + else + { + lock (downloadedAllowListUrls) + { + downloadedAllowListUrls.Add(listUrl); + } } } else { - lock (downloadedBlockListUrls) + if (isRegexList) { - downloadedBlockListUrls.Add(listUrl); + lock (downloadedRegexBlockListUrls) + { + downloadedRegexBlockListUrls.Add(listUrl); + } + } + else + { + lock (downloadedBlockListUrls) + { + downloadedBlockListUrls.Add(listUrl); + } } } } - _dnsServer.WriteLog("Advance Blocking app successfully downloaded " + (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block") + " list (" + WebUtilities.GetFormattedSize(new FileInfo(listFilePath).Length) + "): " + listUrl.AbsoluteUri); + _dnsServer.WriteLog("Advance Blocking app successfully downloaded " + (isAdblockList ? "adblock" : (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block")) + " list (" + WebUtilities.GetFormattedSize(new FileInfo(listFilePath).Length) + "): " + listUrl.AbsoluteUri); } break; @@ -197,7 +232,7 @@ namespace AdvanceBlocking { notModified = true; - _dnsServer.WriteLog("Advance Blocking app successfully checked for a new update of the " + (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block") + " list: " + listUrl.AbsoluteUri); + _dnsServer.WriteLog("Advance Blocking app successfully checked for a new update of the " + (isAdblockList ? "adblock" : (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block")) + " list: " + listUrl.AbsoluteUri); } break; @@ -208,7 +243,7 @@ namespace AdvanceBlocking } catch (Exception ex) { - _dnsServer.WriteLog("Advance Blocking app failed to download " + (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block") + " list and will use previously downloaded file (if available): " + listUrl.AbsoluteUri + "\r\n" + ex.ToString()); + _dnsServer.WriteLog("Advance Blocking app failed to download " + (isAdblockList ? "adblock" : (isRegexList ? "regex " : "") + (isAllowList ? "allow" : "block")) + " list and will use previously downloaded file (if available): " + listUrl.AbsoluteUri + "\r\n" + ex.ToString()); } } @@ -217,28 +252,30 @@ namespace AdvanceBlocking IReadOnlyList uniqueBlockListUrls = GetUniqueBlockListUrls(); IReadOnlyList uniqueRegexAllowListUrls = GetUniqueRegexAllowListUrls(); IReadOnlyList uniqueRegexBlockListUrls = GetUniqueRegexBlockListUrls(); + IReadOnlyList uniqueAdblockListUrls = GetUniqueAdblockListUrls(); foreach (Uri allowListUrl in uniqueAllowListUrls) - tasks.Add(DownloadListUrlAsync(allowListUrl, true, false)); + tasks.Add(DownloadListUrlAsync(allowListUrl, true, false, false)); foreach (Uri blockListUrl in uniqueBlockListUrls) - tasks.Add(DownloadListUrlAsync(blockListUrl, false, false)); + tasks.Add(DownloadListUrlAsync(blockListUrl, false, false, false)); foreach (Uri regexAllowListUrl in uniqueRegexAllowListUrls) - tasks.Add(DownloadListUrlAsync(regexAllowListUrl, true, true)); + tasks.Add(DownloadListUrlAsync(regexAllowListUrl, true, true, false)); foreach (Uri regexBlockListUrl in uniqueRegexBlockListUrls) - tasks.Add(DownloadListUrlAsync(regexBlockListUrl, false, true)); + tasks.Add(DownloadListUrlAsync(regexBlockListUrl, false, true, false)); + + foreach (Uri adblockListUrl in uniqueAdblockListUrls) + tasks.Add(DownloadListUrlAsync(adblockListUrl, false, false, true)); await Task.WhenAll(tasks); - if ((downloadedAllowListUrls.Count > 0) || (downloadedBlockListUrls.Count > 0)) - LoadBlockListZones(downloadedAllowListUrls, downloadedBlockListUrls); + bool downloaded = (downloadedAllowListUrls.Count > 0) || (downloadedBlockListUrls.Count > 0) || (downloadedRegexAllowListUrls.Count > 0) || (downloadedRegexBlockListUrls.Count > 0) || (downloadedAdblockListUrls.Count > 0); + if (downloaded) + LoadZones(downloadedAllowListUrls, downloadedBlockListUrls, downloadedRegexAllowListUrls, downloadedRegexBlockListUrls, downloadedAdblockListUrls); - if ((downloadedRegexAllowListUrls.Count > 0) || (downloadedRegexBlockListUrls.Count > 0)) - LoadRegexBlockListZones(downloadedRegexAllowListUrls, downloadedRegexBlockListUrls); - - return (downloadedAllowListUrls.Count > 0) || (downloadedBlockListUrls.Count > 0) || (downloadedRegexAllowListUrls.Count > 0) || (downloadedRegexBlockListUrls.Count > 0) || notModified; + return downloaded || notModified; } private static string PopWord(ref string line) @@ -273,7 +310,7 @@ namespace AdvanceBlocking { _dnsServer.WriteLog("Advance Blocking app is reading " + (isAllowList ? "allow" : "block") + " list from: " + listUrl.AbsoluteUri); - using (FileStream fS = new FileStream(GetBlockListFilePath(listUrl), FileMode.Open, FileAccess.Read)) + using (FileStream fS = new FileStream(GetListFilePath(listUrl), FileMode.Open, FileAccess.Read)) { //parse hosts file and populate block zone StreamReader sR = new StreamReader(fS, true); @@ -359,7 +396,7 @@ namespace AdvanceBlocking { _dnsServer.WriteLog("Advance Blocking app is reading regex " + (isAllowList ? "allow" : "block") + " list from: " + listUrl.AbsoluteUri); - using (FileStream fS = new FileStream(GetBlockListFilePath(listUrl), FileMode.Open, FileAccess.Read)) + using (FileStream fS = new FileStream(GetListFilePath(listUrl), FileMode.Open, FileAccess.Read)) { //parse hosts file and populate block zone StreamReader sR = new StreamReader(fS, true); @@ -393,6 +430,84 @@ namespace AdvanceBlocking return regices; } + private void ReadAdblockListFile(Uri listUrl, out Queue allowedDomains, out Queue blockedDomains) + { + allowedDomains = new Queue(); + blockedDomains = new Queue(); + + try + { + _dnsServer.WriteLog("Advance Blocking app is reading adblock list from: " + listUrl.AbsoluteUri); + + using (FileStream fS = new FileStream(GetListFilePath(listUrl), FileMode.Open, FileAccess.Read)) + { + //parse hosts file and populate block zone + StreamReader sR = new StreamReader(fS, true); + string line; + + while (true) + { + line = sR.ReadLine(); + if (line == null) + break; //eof + + line = line.TrimStart(' ', '\t'); + + if (line.Length == 0) + continue; //skip empty line + + if (line.StartsWith("!")) + continue; //skip comment line + + if (line.StartsWith("||")) + { + int i = line.IndexOf('^'); + if (i > -1) + { + string domain = line.Substring(2, i - 2); + string options = line.Substring(i + 1); + + if (((options.Length == 0) || (options.StartsWith("$") && (options.Contains("doc") || options.Contains("all")))) && DnsClient.IsDomainNameValid(domain)) + blockedDomains.Enqueue(domain); + } + else + { + string domain = line.Substring(2); + + if (DnsClient.IsDomainNameValid(domain)) + blockedDomains.Enqueue(domain); + } + } + else if (line.StartsWith("@@||")) + { + int i = line.IndexOf('^'); + if (i > -1) + { + string domain = line.Substring(4, i - 4); + string options = line.Substring(i + 1); + + if (((options.Length == 0) || (options.StartsWith("$") && (options.Contains("doc") || options.Contains("all")))) && DnsClient.IsDomainNameValid(domain)) + blockedDomains.Enqueue(domain); + } + else + { + string domain = line.Substring(4); + + if (DnsClient.IsDomainNameValid(domain)) + allowedDomains.Enqueue(domain); + } + } + } + } + + _dnsServer.WriteLog("Advance Blocking app read adblock list file (" + (allowedDomains.Count + blockedDomains.Count) + " domains) from: " + listUrl.AbsoluteUri); + } + catch (Exception ex) + { + _dnsServer.WriteLog("Advance Blocking app failed to read adblock list from: " + listUrl.AbsoluteUri + "\r\n" + ex.ToString()); + } + } + private IReadOnlyList GetUniqueAllowListUrls() { List allowListUrls = new List(); @@ -457,222 +572,197 @@ namespace AdvanceBlocking return regexBlockListUrls; } - private static IReadOnlyList GetUniqueAllowListUrls(IReadOnlyList groups) + private IReadOnlyList GetUniqueAdblockListUrls() { - List allowListUrls = new List(); - - foreach (Group group in groups) - { - foreach (Uri blockListUrl in group.AllowListUrls) - { - if (!allowListUrls.Contains(blockListUrl)) - allowListUrls.Add(blockListUrl); - } - } - - return allowListUrls; - } - - private static IReadOnlyList GetUniqueBlockListUrls(IReadOnlyList groups) - { - List blockListUrls = new List(); - - foreach (Group group in groups) - { - foreach (Uri blockListUrl in group.BlockListUrls) - { - if (!blockListUrls.Contains(blockListUrl)) - blockListUrls.Add(blockListUrl); - } - } - - return blockListUrls; - } - - private static IReadOnlyList GetUniqueRegexAllowListUrls(IReadOnlyList groups) - { - List regexAllowListUrls = new List(); - - foreach (Group group in groups) - { - foreach (Uri regexAllowListUrl in group.RegexAllowListUrls) - { - if (!regexAllowListUrls.Contains(regexAllowListUrl)) - regexAllowListUrls.Add(regexAllowListUrl); - } - } - - return regexAllowListUrls; - } - - private static IReadOnlyList GetUniqueRegexBlockListUrls(IReadOnlyList groups) - { - List regexBlockListUrls = new List(); - - foreach (Group group in groups) - { - foreach (Uri regexBlockListUrl in group.RegexBlockListUrls) - { - if (!regexBlockListUrls.Contains(regexBlockListUrl)) - regexBlockListUrls.Add(regexBlockListUrl); - } - } - - return regexBlockListUrls; - } - - private IReadOnlyList GetUpdatedGroups(List updatedAllowListUrls, List updatedBlockListUrls) - { - List updatedGroups = new List(); + List adblockListUrls = new List(); foreach (KeyValuePair group in _groups) { - bool found = false; - - foreach (Uri allowListUrl in group.Value.AllowListUrls) + foreach (Uri adblockListUrl in group.Value.AdblockListUrls) { - if (updatedAllowListUrls.Contains(allowListUrl)) - { - updatedGroups.Add(group.Value); - found = true; - break; - } - } - - if (found) - continue; - - foreach (Uri blockListUrl in group.Value.BlockListUrls) - { - if (updatedBlockListUrls.Contains(blockListUrl)) - { - updatedGroups.Add(group.Value); - break; - } + if (!adblockListUrls.Contains(adblockListUrl)) + adblockListUrls.Add(adblockListUrl); } } - return updatedGroups; + return adblockListUrls; } - private IReadOnlyList GetRegexUpdatedGroups(List updatedRegexAllowListUrls, List updatedRegexBlockListUrls) + private static IReadOnlyList GetAllUniqueListUrls(IReadOnlyDictionary groups) { - List updatedGroups = new List(); + List listUrls = new List(); + + foreach (KeyValuePair group in groups) + { + foreach (Uri allowListUrl in group.Key.AllowListUrls) + { + if (!listUrls.Contains(allowListUrl)) + listUrls.Add(allowListUrl); + } + + foreach (Uri blockListUrl in group.Key.BlockListUrls) + { + if (!listUrls.Contains(blockListUrl)) + listUrls.Add(blockListUrl); + } + + foreach (Uri regexAllowListUrl in group.Key.RegexAllowListUrls) + { + if (!listUrls.Contains(regexAllowListUrl)) + listUrls.Add(regexAllowListUrl); + } + + foreach (Uri regexBlockListUrl in group.Key.RegexBlockListUrls) + { + if (!listUrls.Contains(regexBlockListUrl)) + listUrls.Add(regexBlockListUrl); + } + + foreach (Uri adblockListUrl in group.Key.AdblockListUrls) + { + if (!listUrls.Contains(adblockListUrl)) + listUrls.Add(adblockListUrl); + } + } + + return listUrls; + } + + private void LoadZones(List updatedAllowListUrls, List updatedBlockListUrls, List updatedRegexAllowListUrls, List updatedRegexBlockListUrls, List updatedAdblockListUrls) + { + Dictionary> allowCache = new Dictionary>(); + Dictionary> blockCache = new Dictionary>(); foreach (KeyValuePair group in _groups) { - bool found = false; + bool loadAllowList = ListContainsAnyItem(group.Value.AllowListUrls, updatedAllowListUrls); + bool loadBlockList = ListContainsAnyItem(group.Value.BlockListUrls, updatedBlockListUrls); + bool loadRegexAllowList = ListContainsAnyItem(group.Value.RegexAllowListUrls, updatedRegexAllowListUrls); + bool loadRegexBlockList = ListContainsAnyItem(group.Value.RegexBlockListUrls, updatedRegexBlockListUrls); + bool loadAdblockList = ListContainsAnyItem(group.Value.AdblockListUrls, updatedAdblockListUrls); - foreach (Uri regexAllowListUrl in group.Value.RegexAllowListUrls) - { - if (updatedRegexAllowListUrls.Contains(regexAllowListUrl)) - { - updatedGroups.Add(group.Value); - found = true; - break; - } - } + LoadListZones(allowCache, blockCache, group.Value, loadAllowList, loadBlockList, loadRegexAllowList, loadRegexBlockList, loadAdblockList); + } + } - if (found) - continue; - - foreach (Uri regexBlockListUrl in group.Value.RegexBlockListUrls) - { - if (updatedRegexBlockListUrls.Contains(regexBlockListUrl)) - { - updatedGroups.Add(group.Value); - break; - } - } + private void LoadListZones(Dictionary> allowCache, Dictionary> blockCache, Group group, bool loadAllowList, bool loadBlockList, bool loadRegexAllowList, bool loadRegexBlockList, bool loadAdblockList) + { + if (loadAdblockList) + { + loadAllowList = true; + loadBlockList = true; } - return updatedGroups; - } + Dictionary> allAllowListQueues = new Dictionary>(); + Dictionary> allBlockListQueues = new Dictionary>(); + Dictionary> allRegexAllowListQueues = new Dictionary>(); + Dictionary> allRegexBlockListQueues = new Dictionary>(); - private void LoadBlockListZones(List updatedAllowListUrls, List updatedBlockListUrls) - { - LoadBlockListZones(GetUpdatedGroups(updatedAllowListUrls, updatedBlockListUrls)); - } - - private void LoadRegexBlockListZones(List updatedRegexAllowListUrls, List updatedRegexBlockListUrls) - { - LoadRegexBlockListZones(GetRegexUpdatedGroups(updatedRegexAllowListUrls, updatedRegexBlockListUrls)); - } - - private void LoadBlockListZones(IReadOnlyList updatedGroups) - { - //read all allow lists in a queue - IReadOnlyList uniqueAllowListUrls = GetUniqueAllowListUrls(updatedGroups); - Dictionary> allAllowListQueues = new Dictionary>(uniqueAllowListUrls.Count); - - foreach (Uri allowListUrl in uniqueAllowListUrls) + if (loadAllowList) { - if (!allAllowListQueues.ContainsKey(allowListUrl)) + //read all allow lists in a queue + foreach (Uri allowListUrl in group.AllowListUrls) { - Queue allowListQueue = ReadListFile(allowListUrl, true); + if (allAllowListQueues.ContainsKey(allowListUrl)) + continue; + + if (!allowCache.TryGetValue(allowListUrl, out Queue allowListQueue)) + { + allowListQueue = ReadListFile(allowListUrl, true); + allowCache.Add(allowListUrl, allowListQueue); + } + allAllowListQueues.Add(allowListUrl, allowListQueue); } } - //read all block lists in a queue - IReadOnlyList uniqueBlockListUrls = GetUniqueBlockListUrls(updatedGroups); - Dictionary> allBlockListQueues = new Dictionary>(uniqueBlockListUrls.Count); - - foreach (Uri blockListUrl in uniqueBlockListUrls) + if (loadBlockList) { - if (!allBlockListQueues.ContainsKey(blockListUrl)) + //read all block lists in a queue + foreach (Uri blockListUrl in group.BlockListUrls) { - Queue blockListQueue = ReadListFile(blockListUrl, false); + if (allBlockListQueues.ContainsKey(blockListUrl)) + continue; + + if (!blockCache.TryGetValue(blockListUrl, out Queue blockListQueue)) + { + blockListQueue = ReadListFile(blockListUrl, false); + blockCache.Add(blockListUrl, blockListQueue); + } + allBlockListQueues.Add(blockListUrl, blockListQueue); } } - //load block list zone per group - foreach (Group group in updatedGroups) - group.LoadBlockListZone(allAllowListQueues, allBlockListQueues); - - _dnsServer.WriteLog("Advance Blocking app loaded all block list zones successfully."); - - //force GC collection to remove old zone data from memory quickly - GC.Collect(); - } - - private void LoadRegexBlockListZones(IReadOnlyList updatedGroups) - { - //read all allow lists in a queue - IReadOnlyList uniqueRegexAllowListUrls = GetUniqueRegexAllowListUrls(updatedGroups); - Dictionary> allRegexAllowListQueues = new Dictionary>(uniqueRegexAllowListUrls.Count); - - foreach (Uri regexAllowListUrl in uniqueRegexAllowListUrls) + if (loadAdblockList) { - if (!allRegexAllowListQueues.ContainsKey(regexAllowListUrl)) + //read all adblock lists in queue + foreach (Uri adblockListUrl in group.AdblockListUrls) { - Queue regexAllowListQueue = ReadRegexListFile(regexAllowListUrl, true); + if (!allowCache.TryGetValue(adblockListUrl, out Queue allowListQueue) & !blockCache.TryGetValue(adblockListUrl, out Queue blockListQueue)) + { + ReadAdblockListFile(adblockListUrl, out allowListQueue, out blockListQueue); + + allowCache.Add(adblockListUrl, allowListQueue); + blockCache.Add(adblockListUrl, blockListQueue); + } + + allAllowListQueues.Add(adblockListUrl, allowListQueue); + allBlockListQueues.Add(adblockListUrl, blockListQueue); + } + } + + if (loadRegexAllowList) + { + //read all allow lists in a queue + foreach (Uri regexAllowListUrl in group.RegexAllowListUrls) + { + if (allRegexAllowListQueues.ContainsKey(regexAllowListUrl)) + continue; + + if (!allowCache.TryGetValue(regexAllowListUrl, out Queue regexAllowListQueue)) + { + regexAllowListQueue = ReadRegexListFile(regexAllowListUrl, true); + allowCache.Add(regexAllowListUrl, regexAllowListQueue); + } + allRegexAllowListQueues.Add(regexAllowListUrl, regexAllowListQueue); } } - //read all regex block lists in a queue - IReadOnlyList uniqueRegexBlockListUrls = GetUniqueRegexBlockListUrls(updatedGroups); - Dictionary> allRegexBlockListQueues = new Dictionary>(uniqueRegexBlockListUrls.Count); - - foreach (Uri regexBlockListUrl in uniqueRegexBlockListUrls) + if (loadRegexBlockList) { - if (!allRegexBlockListQueues.ContainsKey(regexBlockListUrl)) + //read all regex block lists in a queue + foreach (Uri regexBlockListUrl in group.RegexBlockListUrls) { - Queue regexBlockListQueue = ReadRegexListFile(regexBlockListUrl, false); + if (allRegexBlockListQueues.ContainsKey(regexBlockListUrl)) + continue; + + if (!blockCache.TryGetValue(regexBlockListUrl, out Queue regexBlockListQueue)) + { + regexBlockListQueue = ReadRegexListFile(regexBlockListUrl, false); + blockCache.Add(regexBlockListUrl, regexBlockListQueue); + } + allRegexBlockListQueues.Add(regexBlockListUrl, regexBlockListQueue); } } - //load regex block list zone per group - foreach (Group group in updatedGroups) - group.LoadRegexBlockListZone(allRegexAllowListQueues, allRegexBlockListQueues); + //load block list zone + if (loadAllowList) + group.LoadAllowListZone(allAllowListQueues); - _dnsServer.WriteLog("Advance Blocking app loaded all regex block list zones successfully."); + if (loadBlockList) + group.LoadBlockListZone(allBlockListQueues); - //force GC collection to remove old zone data from memory quickly - GC.Collect(); + //load regex block list zone + if (loadRegexAllowList) + group.LoadRegexAllowListZone(allRegexAllowListQueues); + + if (loadRegexBlockList) + group.LoadRegexBlockListZone(allRegexBlockListQueues); + + _dnsServer.WriteLog("Advance Blocking app loaded all zones successfully for group: " + group.Name); } private static bool ListsEquals(IReadOnlyList list1, IReadOnlyList list2) @@ -689,6 +779,17 @@ namespace AdvanceBlocking return true; } + private static bool ListContainsAnyItem(IReadOnlyList list, IReadOnlyList items) + { + foreach (T item in list) + { + if (items.Contains(item)) + return true; + } + + return false; + } + private static string GetParentZone(string domain) { int i = domain.IndexOf('.'); @@ -713,6 +814,7 @@ namespace AdvanceBlocking dynamic jsonConfig = JsonConvert.DeserializeObject(config); _enableBlocking = jsonConfig.enableBlocking.Value; + _allowTxtBlockingReport = jsonConfig.allowTxtBlockingReport.Value; _blockAsNxDomain = jsonConfig.blockAsNxDomain.Value; _blockListUrlUpdateIntervalHours = Convert.ToInt32(jsonConfig.blockListUrlUpdateIntervalHours.Value); @@ -760,22 +862,43 @@ namespace AdvanceBlocking _networkGroupMap = networkGroupMap; } + bool cachedListFileMissing = false; + { - List updatedGroups = new List(); - List updatedRegexGroups = new List(); + const int LOAD_ALLOW_LIST_ZONE = 1; + const int LOAD_BLOCK_LIST_ZONE = 2; + const int LOAD_REGEX_ALLOW_LIST_ZONE = 4; + const int LOAD_REGEX_BLOCK_LIST_ZONE = 8; + const int LOAD_ADBLOCK_LIST_ZONE = 16; + + Dictionary updatedGroups = new Dictionary(); Dictionary groups = new Dictionary(); foreach (dynamic jsonGroup in jsonConfig.groups) { - Group group = new Group(jsonGroup); + Group group = new Group(this, jsonGroup); if ((_groups is not null) && _groups.TryGetValue(group.Name, out Group existingGroup)) { - if (!ListsEquals(group.AllowListUrls, existingGroup.AllowListUrls) || !ListsEquals(group.BlockListUrls, existingGroup.BlockListUrls)) - updatedGroups.Add(existingGroup); + int loadFlags = 0; - if (!ListsEquals(group.RegexAllowListUrls, existingGroup.RegexAllowListUrls) || !ListsEquals(group.RegexBlockListUrls, existingGroup.RegexBlockListUrls)) - updatedRegexGroups.Add(existingGroup); + if (!ListsEquals(group.AllowListUrls, existingGroup.AllowListUrls)) + loadFlags |= LOAD_ALLOW_LIST_ZONE; + + if (!ListsEquals(group.BlockListUrls, existingGroup.BlockListUrls)) + loadFlags |= LOAD_BLOCK_LIST_ZONE; + + if (!ListsEquals(group.RegexAllowListUrls, existingGroup.RegexAllowListUrls)) + loadFlags |= LOAD_REGEX_ALLOW_LIST_ZONE; + + if (!ListsEquals(group.RegexBlockListUrls, existingGroup.RegexBlockListUrls)) + loadFlags |= LOAD_REGEX_BLOCK_LIST_ZONE; + + if (!ListsEquals(group.AdblockListUrls, existingGroup.AdblockListUrls)) + loadFlags |= LOAD_ADBLOCK_LIST_ZONE; + + if (loadFlags > 0) + updatedGroups.Add(existingGroup, loadFlags); existingGroup.Enabled = group.Enabled; @@ -789,12 +912,13 @@ namespace AdvanceBlocking existingGroup.RegexAllowListUrls = group.RegexAllowListUrls; existingGroup.RegexBlockListUrls = group.RegexBlockListUrls; + existingGroup.AdblockListUrls = group.AdblockListUrls; + groups.TryAdd(existingGroup.Name, existingGroup); } else { - updatedGroups.Add(group); - updatedRegexGroups.Add(group); + updatedGroups.Add(group, LOAD_ALLOW_LIST_ZONE | LOAD_BLOCK_LIST_ZONE | LOAD_REGEX_ALLOW_LIST_ZONE | LOAD_REGEX_BLOCK_LIST_ZONE | LOAD_ADBLOCK_LIST_ZONE); groups.TryAdd(group.Name, group); } } @@ -803,26 +927,54 @@ namespace AdvanceBlocking if (updatedGroups.Count > 0) { - Task.Run(delegate () + foreach (Uri listUrl in GetAllUniqueListUrls(updatedGroups)) { - LoadBlockListZones(updatedGroups); - }); - } + if (!File.Exists(GetListFilePath(listUrl))) + { + cachedListFileMissing = true; + break; + } + } - if (updatedRegexGroups.Count > 0) - { - Task.Run(delegate () + if (!cachedListFileMissing) { - LoadRegexBlockListZones(updatedRegexGroups); - }); + Task.Run(delegate () + { + Dictionary> allowCache = new Dictionary>(); + Dictionary> blockCache = new Dictionary>(); + + foreach (KeyValuePair group in updatedGroups) + { + bool loadAllowList = (group.Value & LOAD_ALLOW_LIST_ZONE) > 0; + bool loadBlockList = (group.Value & LOAD_ALLOW_LIST_ZONE) > 0; + bool loadRegexAllowList = (group.Value & LOAD_REGEX_ALLOW_LIST_ZONE) > 0; + bool loadRegexBlockList = (group.Value & LOAD_REGEX_ALLOW_LIST_ZONE) > 0; + bool loadAdblockList = (group.Value & LOAD_ADBLOCK_LIST_ZONE) > 0; + + LoadListZones(allowCache, blockCache, group.Key, loadAllowList, loadBlockList, loadRegexAllowList, loadRegexBlockList, loadAdblockList); + } + }); + } } } if (_blockListUrlUpdateTimer is null) { + if (!cachedListFileMissing) + FindAndSetBlockListUrlLastUpdatedOn(); + _blockListUrlUpdateTimer = new Timer(BlockListUrlUpdateTimerCallbackAsync, null, Timeout.Infinite, Timeout.Infinite); _blockListUrlUpdateTimer.Change(BLOCK_LIST_UPDATE_TIMER_INITIAL_INTERVAL, BLOCK_LIST_UPDATE_TIMER_PERIODIC_INTERVAL); } + else + { + if (cachedListFileMissing) + { + //force update + _blockListUrlLastUpdatedOn = DateTime.MinValue; + _blockListUrlUpdateTimer.Change(BLOCK_LIST_UPDATE_TIMER_INITIAL_INTERVAL, BLOCK_LIST_UPDATE_TIMER_PERIODIC_INTERVAL); + } + } return Task.CompletedTask; } @@ -853,7 +1005,7 @@ namespace AdvanceBlocking if (blockListUrls is null) return Task.FromResult(null); - if (question.Type == DnsResourceRecordType.TXT) + if (_allowTxtBlockingReport && (question.Type == DnsResourceRecordType.TXT)) { //return meta data DnsResourceRecord[] answer; @@ -964,6 +1116,8 @@ namespace AdvanceBlocking { #region variables + readonly App _app; + readonly string _name; bool _enabled; @@ -977,6 +1131,9 @@ namespace AdvanceBlocking IReadOnlyList _regexAllowListUrls; IReadOnlyList _regexBlockListUrls; + IReadOnlyList _adblockListUrls; + + IReadOnlyDictionary> _allowListZone = new Dictionary>(0); IReadOnlyDictionary> _blockListZone = new Dictionary>(0); IReadOnlyList _regexAllowListZone = Array.Empty(); @@ -986,382 +1143,293 @@ namespace AdvanceBlocking #region constructor - public Group(dynamic jsonGroup) + public Group(App app, dynamic jsonGroup) { + _app = app; + _name = jsonGroup.name.Value; _enabled = jsonGroup.enabled.Value; - { - Dictionary allowed = new Dictionary(1); + _allowed = ReadJsonDomainArray(jsonGroup.allowed); + _blocked = ReadJsonDomainArray(jsonGroup.blocked); + _allowListUrls = ReadJsonUrlArray(jsonGroup.allowListUrls); + _blockListUrls = ReadJsonUrlArray(jsonGroup.blockListUrls); - foreach (dynamic jsonDomain in jsonGroup.allowed) - allowed.TryAdd(jsonDomain.Value, null); + _allowedRegex = ReadJsonRegexArray(jsonGroup.allowedRegex); + _blockedRegex = ReadJsonRegexArray(jsonGroup.blockedRegex); + _regexAllowListUrls = ReadJsonUrlArray(jsonGroup.regexAllowListUrls); + _regexBlockListUrls = ReadJsonUrlArray(jsonGroup.regexBlockListUrls); - _allowed = allowed; - } - - { - Dictionary blocked = new Dictionary(1); - - foreach (dynamic jsonDomain in jsonGroup.blocked) - blocked.TryAdd(jsonDomain.Value, null); - - _blocked = blocked; - } - - { - List allowListUrls = new List(2); - - foreach (dynamic jsonUrl in jsonGroup.allowListUrls) - { - Uri url = new Uri(jsonUrl.Value); - - if (!allowListUrls.Contains(url)) - allowListUrls.Add(url); - } - - _allowListUrls = allowListUrls; - } - - { - List blockListUrls = new List(2); - - foreach (dynamic jsonUrl in jsonGroup.blockListUrls) - { - Uri url = new Uri(jsonUrl.Value); - - if (!blockListUrls.Contains(url)) - blockListUrls.Add(url); - } - - _blockListUrls = blockListUrls; - } - - { - List allowedRegex = new List(); - - foreach (dynamic jsonRegex in jsonGroup.allowedRegex) - { - string regexPattern = jsonRegex.Value; - - allowedRegex.Add(new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled)); - } - - _allowedRegex = allowedRegex; - } - - { - List blockedRegex = new List(); - - foreach (dynamic jsonRegex in jsonGroup.blockedRegex) - { - string regexPattern = jsonRegex.Value; - - blockedRegex.Add(new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled)); - } - - _blockedRegex = blockedRegex; - } - - { - List regexAllowListUrls = new List(); - - foreach (dynamic jsonUrl in jsonGroup.regexAllowListUrls) - { - string strUrl = jsonUrl.Value; - - regexAllowListUrls.Add(new Uri(strUrl)); - } - - _regexAllowListUrls = regexAllowListUrls; - } - - { - List regexBlockListUrls = new List(); - - foreach (dynamic jsonUrl in jsonGroup.regexBlockListUrls) - { - string strUrl = jsonUrl.Value; - - regexBlockListUrls.Add(new Uri(strUrl)); - } - - _regexBlockListUrls = regexBlockListUrls; - } + _adblockListUrls = ReadJsonUrlArray(jsonGroup.adblockListUrls); } #endregion #region private - private static bool IsZoneAllowed(IReadOnlyDictionary allowedDomains, string domain) + private static IReadOnlyDictionary ReadJsonDomainArray(dynamic jsonDomainArray) + { + Dictionary domains = new Dictionary(jsonDomainArray.Count); + + foreach (dynamic jsonDomain in jsonDomainArray) + domains.TryAdd(jsonDomain.Value, null); + + return domains; + } + + private static IReadOnlyList ReadJsonRegexArray(dynamic jsonRegexArray) + { + List regices = new List(jsonRegexArray.Count); + + foreach (dynamic jsonRegex in jsonRegexArray) + { + string regexPattern = jsonRegex.Value; + + regices.Add(new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled)); + } + + return regices; + } + + private static IReadOnlyList ReadJsonUrlArray(dynamic jsonUrlArray) + { + List urls = new List(jsonUrlArray.Count); + + foreach (dynamic jsonUrl in jsonUrlArray) + { + string strUrl = jsonUrl.Value; + + urls.Add(new Uri(strUrl)); + } + + return urls; + } + + private static bool IsZoneFound(IReadOnlyDictionary domains, string domain, out string foundZone, out T foundValue) where T : class { do { - if (allowedDomains.TryGetValue(domain, out _)) + if (domains.TryGetValue(domain, out T value)) + { + foundZone = domain; + foundValue = value; return true; + } domain = GetParentZone(domain); } while (domain is not null); + foundZone = null; + foundValue = null; return false; } + private static bool IsMatchFound(IReadOnlyList regices, string domain, out string matchingPattern) + { + foreach (Regex regex in regices) + { + if (regex.IsMatch(domain)) + { + //found pattern + matchingPattern = regex.ToString(); + return true; + } + } + + matchingPattern = null; + return false; + } + + private static bool IsMatchFound(IReadOnlyList regices, string domain, out string matchingPattern, out IReadOnlyList blockListUrls) + { + foreach (RegexItem regex in regices) + { + if (regex.Regex.IsMatch(domain)) + { + //found pattern + matchingPattern = regex.ToString(); + blockListUrls = regex.BlockListUrls; + return true; + } + } + + matchingPattern = null; + blockListUrls = null; + return false; + } + + private static IReadOnlyDictionary> LoadListZone(IReadOnlyList listUrls, Dictionary> allListQueues) + { + //select lists + Dictionary> listQueues = new Dictionary>(listUrls.Count); + int totalDomains = 0; + + foreach (Uri listUrl in listUrls) + { + if (allListQueues.TryGetValue(listUrl, out Queue listQueue)) + { + totalDomains += listQueue.Count; + listQueues.Add(listUrl, listQueue); + } + } + + //load list zone + Dictionary> listZone = new Dictionary>(totalDomains); + + foreach (KeyValuePair> listQueue in listQueues) + { + Queue queue = listQueue.Value; + + while (queue.Count > 0) + { + string domain = queue.Dequeue(); + + if (!listZone.TryGetValue(domain, out List sourceListUrls)) + { + sourceListUrls = new List(2); + listZone.Add(domain, sourceListUrls); + } + + sourceListUrls.Add(listQueue.Key); + } + } + + return listZone; + } + + private IReadOnlyList LoadRegexListZone(IReadOnlyList regexListUrls, Dictionary> allRegexListQueues) + { + //select regex lists + Dictionary> regexListQueues = new Dictionary>(regexListUrls.Count); + int totalRegexPatterns = 0; + + foreach (Uri regexListUrl in regexListUrls) + { + if (allRegexListQueues.TryGetValue(regexListUrl, out Queue regexListQueue)) + { + totalRegexPatterns += regexListQueue.Count; + regexListQueues.Add(regexListUrl, regexListQueue); + } + } + + //load regex list patterns from queue + Dictionary allRegexPatterns = new Dictionary(totalRegexPatterns); + + foreach (KeyValuePair> regexListQueue in regexListQueues) + { + Queue queue = regexListQueue.Value; + + while (queue.Count > 0) + { + string regex = queue.Dequeue(); + + if (!allRegexPatterns.TryGetValue(regex, out _)) + allRegexPatterns.Add(regex, null); + } + } + + //load regex list zone + List regexListZone = new List(totalRegexPatterns); + + foreach (KeyValuePair regexPattern in allRegexPatterns) + { + try + { + Regex regex = new Regex(regexPattern.Key, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + + regexListZone.Add(new RegexItem(regex, null)); + } + catch (RegexParseException ex) + { + _app._dnsServer.WriteLog(ex); + } + } + + return regexListZone; + } + #endregion #region public - public void LoadBlockListZone(Dictionary> allAllowListQueues, Dictionary> allBlockListQueues) + public void LoadAllowListZone(Dictionary> allAllowListQueues) { - //read all allowed domains in dictionary - Dictionary allowedDomains = new Dictionary(); + List listUrls = new List(); - foreach (Uri allowListUrl in _allowListUrls) - { - if (allAllowListQueues.TryGetValue(allowListUrl, out Queue queue)) - { - while (queue.Count > 0) - { - string domain = queue.Dequeue(); + listUrls.AddRange(_allowListUrls); + listUrls.AddRange(_adblockListUrls); - allowedDomains.TryAdd(domain, null); - } - } - } - - //select block lists - Dictionary> blockListQueues = new Dictionary>(_blockListUrls.Count); - int totalDomains = 0; - - foreach (Uri blockListUrl in _blockListUrls) - { - if (allBlockListQueues.TryGetValue(blockListUrl, out Queue blockListQueue)) - { - totalDomains += blockListQueue.Count; - blockListQueues.Add(blockListUrl, blockListQueue); - } - } - - //load block list zone - Dictionary> blockListZone = new Dictionary>(totalDomains); - - foreach (KeyValuePair> blockListQueue in blockListQueues) - { - Queue queue = blockListQueue.Value; - - while (queue.Count > 0) - { - string domain = queue.Dequeue(); - - if (IsZoneAllowed(allowedDomains, domain)) - continue; //domain is in allowed list so skip adding it to block list zone - - if (!blockListZone.TryGetValue(domain, out List blockListUrls)) - { - blockListUrls = new List(2); - blockListZone.Add(domain, blockListUrls); - } - - blockListUrls.Add(blockListQueue.Key); - } - } - - _blockListZone = blockListZone; + _allowListZone = LoadListZone(listUrls, allAllowListQueues); } - public void LoadRegexBlockListZone(Dictionary> allRegexAllowListQueues, Dictionary> allRegexBlockListQueues) + public void LoadBlockListZone(Dictionary> allBlockListQueues) { - { - //select regex allow lists - Dictionary> regexAllowListQueues = new Dictionary>(_regexAllowListUrls.Count); - int totalRegexPatterns = 0; + List listUrls = new List(); - foreach (Uri regexAllowListUrl in _regexAllowListUrls) - { - if (allRegexAllowListQueues.TryGetValue(regexAllowListUrl, out Queue regexAllowListQueue)) - { - totalRegexPatterns += regexAllowListQueue.Count; - regexAllowListQueues.Add(regexAllowListUrl, regexAllowListQueue); - } - } + listUrls.AddRange(_blockListUrls); + listUrls.AddRange(_adblockListUrls); - //load regex allow list patterns from queue - Dictionary allRegexPatterns = new Dictionary(totalRegexPatterns); + _blockListZone = LoadListZone(listUrls, allBlockListQueues); + } - foreach (KeyValuePair> regexAllowListQueue in regexAllowListQueues) - { - Queue queue = regexAllowListQueue.Value; + public void LoadRegexAllowListZone(Dictionary> allRegexAllowListQueues) + { + _regexAllowListZone = LoadRegexListZone(_regexAllowListUrls, allRegexAllowListQueues); + } - while (queue.Count > 0) - { - string regex = queue.Dequeue(); - - if (!allRegexPatterns.TryGetValue(regex, out _)) - allRegexPatterns.Add(regex, null); - } - } - - //load regex allow list zone - List regexAllowListZone = new List(totalRegexPatterns); - - foreach (KeyValuePair regexPattern in allRegexPatterns) - { - Regex regex = new Regex(regexPattern.Key, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); - - regexAllowListZone.Add(new RegexItem(regex, null)); - } - - _regexAllowListZone = regexAllowListZone; - } - - { - //select regex block lists - Dictionary> regexBlockListQueues = new Dictionary>(_regexBlockListUrls.Count); - int totalRegexPatterns = 0; - - foreach (Uri regexBlockListUrl in _regexBlockListUrls) - { - if (allRegexBlockListQueues.TryGetValue(regexBlockListUrl, out Queue regexBlockListQueue)) - { - totalRegexPatterns += regexBlockListQueue.Count; - regexBlockListQueues.Add(regexBlockListUrl, regexBlockListQueue); - } - } - - //load regex block list patterns from queue - Dictionary> allRegexPatterns = new Dictionary>(totalRegexPatterns); - - foreach (KeyValuePair> regexBlockListQueue in regexBlockListQueues) - { - Queue queue = regexBlockListQueue.Value; - - while (queue.Count > 0) - { - string regexPattern = queue.Dequeue(); - - if (!allRegexPatterns.TryGetValue(regexPattern, out List regexBlockLists)) - { - regexBlockLists = new List(2); - allRegexPatterns.Add(regexPattern, regexBlockLists); - } - - regexBlockLists.Add(regexBlockListQueue.Key); - } - } - - //load regex block list zone - List regexBlockListZone = new List(totalRegexPatterns); - - foreach (KeyValuePair> regexPattern in allRegexPatterns) - { - Regex regex = new Regex(regexPattern.Key, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); - - regexBlockListZone.Add(new RegexItem(regex, regexPattern.Value)); - } - - _regexBlockListZone = regexBlockListZone; - } + public void LoadRegexBlockListZone(Dictionary> allRegexBlockListQueues) + { + _regexBlockListZone = LoadRegexListZone(_regexBlockListUrls, allRegexBlockListQueues); } public IReadOnlyList IsZoneBlocked(string domain, out string blockedDomain, out string blockedRegex) { domain = domain.ToLower(); - //allowed - string domain1 = domain; - do + //allowed, allow list zone, allowedRegex, regex allow list zone + if (IsZoneFound(_allowed, domain, out _, out _) || IsZoneFound(_allowListZone, domain, out _, out _) || IsMatchFound(_allowedRegex, domain, out _) || IsMatchFound(_regexAllowListZone, domain, out _, out _)) { - if (_allowed.TryGetValue(domain1, out _)) - { - //found zone allowed - blockedDomain = null; - blockedRegex = null; - return null; - } - - domain1 = GetParentZone(domain1); - } - while (domain1 is not null); - - //allowedRegex - foreach (Regex regex in _allowedRegex) - { - if (regex.IsMatch(domain)) - { - //found pattern allowed - blockedDomain = null; - blockedRegex = null; - return null; - } - } - - //regex allow list zone - foreach (RegexItem regexItem in _regexAllowListZone) - { - if (regexItem.Regex.IsMatch(domain)) - { - //found pattern allowed - blockedDomain = null; - blockedRegex = null; - return null; - } + //found zone allowed + blockedDomain = null; + blockedRegex = null; + return null; } //blocked - string domain2 = domain; - do + if (IsZoneFound(_blocked, domain, out string foundZone1, out _)) { - if (_blocked.TryGetValue(domain2, out _)) - { - //found zone blocked - blockedDomain = domain2; - blockedRegex = null; - return Array.Empty(); - } - - domain2 = GetParentZone(domain2); + //found zone blocked + blockedDomain = foundZone1; + blockedRegex = null; + return Array.Empty(); } - while (domain2 is not null); //block list zone - string domain3 = domain; - do + if (IsZoneFound(_blockListZone, domain, out string foundZone2, out List blockListUrls1)) { - if (_blockListZone.TryGetValue(domain3, out List blockListUrls)) - { - //found zone blocked - blockedDomain = domain3; - blockedRegex = null; - return blockListUrls; - } - - domain3 = GetParentZone(domain3); + //found zone blocked + blockedDomain = foundZone2; + blockedRegex = null; + return blockListUrls1; } - while (domain3 is not null); //blockedRegex - foreach (Regex regex in _blockedRegex) + if (IsMatchFound(_blockedRegex, domain, out string blockedPattern1)) { - if (regex.IsMatch(domain)) - { - //found pattern blocked - blockedDomain = null; - blockedRegex = regex.ToString(); - return Array.Empty(); - } + //found pattern blocked + blockedDomain = null; + blockedRegex = blockedPattern1; + return Array.Empty(); } //regex block list zone - foreach (RegexItem regexItem in _regexBlockListZone) + if (IsMatchFound(_regexBlockListZone, domain, out string blockedPattern2, out IReadOnlyList blockListUrls2)) { - if (regexItem.Regex.IsMatch(domain)) - { - //found pattern blocked - blockedDomain = null; - blockedRegex = regexItem.Regex.ToString(); - return regexItem.BlockListUrls; - } + //found pattern blocked + blockedDomain = null; + blockedRegex = blockedPattern2; + return blockListUrls2; } blockedDomain = null; @@ -1430,6 +1498,12 @@ namespace AdvanceBlocking set { _regexAllowListUrls = value; } } + public IReadOnlyList AdblockListUrls + { + get { return _adblockListUrls; } + set { _adblockListUrls = value; } + } + #endregion }