-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(api): benchmark levenshtein distance over placenames
refs #371
- Loading branch information
Showing
3 changed files
with
354 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
## Ignore Visual Studio temporary files, build results, and | ||
## files generated by popular Visual Studio add-ons. | ||
|
||
# User-specific files | ||
*.suo | ||
*.user | ||
*.userosscache | ||
*.sln.docstates | ||
|
||
# User-specific files (MonoDevelop/Xamarin Studio) | ||
*.userprefs | ||
|
||
# Build results | ||
[Dd]ebug/ | ||
[Dd]ebugPublic/ | ||
[Rr]elease/ | ||
[Rr]eleases/ | ||
x64/ | ||
x86/ | ||
bld/ | ||
[Bb]in/ | ||
[Oo]bj/ | ||
[Ll]og/ | ||
|
||
# Visual Studio 2015 cache/options directory | ||
.vs/ | ||
|
||
# Uncomment if you have tasks that create the project's static files in wwwroot | ||
#wwwroot/ | ||
|
||
# MSTest test Results | ||
[Tt]est[Rr]esult*/ | ||
[Bb]uild[Ll]og.* | ||
|
||
# NUNIT | ||
*.VisualState.xml | ||
TestResult.xml | ||
|
||
# Build Results of an ATL Project | ||
[Dd]ebugPS/ | ||
[Rr]eleasePS/ | ||
dlldata.c | ||
|
||
# DNX | ||
project.lock.json | ||
project.fragment.lock.json | ||
artifacts/ | ||
|
||
*_i.c | ||
*_p.c | ||
*_i.h | ||
*.ilk | ||
*.meta | ||
*.obj | ||
*.pch | ||
*.pdb | ||
*.pgc | ||
*.pgd | ||
*.rsp | ||
*.sbr | ||
*.tlb | ||
*.tli | ||
*.tlh | ||
*.tmp | ||
*.tmp_proj | ||
*.log | ||
*.vspscc | ||
*.vssscc | ||
.builds | ||
*.pidb | ||
*.svclog | ||
*.scc | ||
|
||
# Chutzpah Test files | ||
_Chutzpah* | ||
|
||
# Visual C++ cache files | ||
ipch/ | ||
*.aps | ||
*.ncb | ||
*.opendb | ||
*.opensdf | ||
*.sdf | ||
*.cachefile | ||
*.VC.db | ||
*.VC.VC.opendb | ||
|
||
# Visual Studio profiler | ||
*.psess | ||
*.vsp | ||
*.vspx | ||
*.sap | ||
|
||
# TFS 2012 Local Workspace | ||
$tf/ | ||
|
||
# Guidance Automation Toolkit | ||
*.gpState | ||
|
||
# ReSharper is a .NET coding add-in | ||
_ReSharper*/ | ||
*.[Rr]e[Ss]harper | ||
*.DotSettings.user | ||
|
||
# JustCode is a .NET coding add-in | ||
.JustCode | ||
|
||
# TeamCity is a build add-in | ||
_TeamCity* | ||
|
||
# DotCover is a Code Coverage Tool | ||
*.dotCover | ||
|
||
# NCrunch | ||
_NCrunch_* | ||
.*crunch*.local.xml | ||
nCrunchTemp_* | ||
|
||
# MightyMoose | ||
*.mm.* | ||
AutoTest.Net/ | ||
|
||
# Web workbench (sass) | ||
.sass-cache/ | ||
|
||
# Benchmarking | ||
BenchmarkDotNet.Artifacts/ | ||
|
||
# Installshield output folder | ||
[Ee]xpress/ | ||
|
||
# DocProject is a documentation generator add-in | ||
DocProject/buildhelp/ | ||
DocProject/Help/*.HxT | ||
DocProject/Help/*.HxC | ||
DocProject/Help/*.hhc | ||
DocProject/Help/*.hhk | ||
DocProject/Help/*.hhp | ||
DocProject/Help/Html2 | ||
DocProject/Help/html | ||
|
||
# Click-Once directory | ||
publish/ | ||
|
||
# Publish Web Output | ||
*.[Pp]ublish.xml | ||
*.azurePubxml | ||
# TODO: Comment the next line if you want to checkin your web deploy settings | ||
# but database connection strings (with potential passwords) will be unencrypted | ||
#*.pubxml | ||
*.publishproj | ||
|
||
# Microsoft Azure Web App publish settings. Comment the next line if you want to | ||
# checkin your Azure Web App publish settings, but sensitive information contained | ||
# in these scripts will be unencrypted | ||
PublishScripts/ | ||
|
||
# NuGet Packages | ||
*.nupkg | ||
# The packages folder can be ignored because of Package Restore | ||
**/packages/* | ||
# except build/, which is used as an MSBuild target. | ||
!**/packages/build/ | ||
# Uncomment if necessary however generally it will be regenerated when needed | ||
#!**/packages/repositories.config | ||
# NuGet v3's project.json files produces more ignoreable files | ||
*.nuget.props | ||
*.nuget.targets | ||
|
||
# Microsoft Azure Build Output | ||
csx/ | ||
*.build.csdef | ||
|
||
# Microsoft Azure Emulator | ||
ecf/ | ||
rcf/ | ||
|
||
# Windows Store app package directories and files | ||
AppPackages/ | ||
BundleArtifacts/ | ||
Package.StoreAssociation.xml | ||
_pkginfo.txt | ||
|
||
# Visual Studio cache files | ||
# files ending in .cache can be ignored | ||
*.[Cc]ache | ||
# but keep track of directories ending in .cache | ||
!*.[Cc]ache/ | ||
|
||
# Others | ||
ClientBin/ | ||
~$* | ||
*~ | ||
*.dbmdl | ||
*.dbproj.schemaview | ||
*.jfm | ||
*.pfx | ||
*.publishsettings | ||
node_modules/ | ||
orleans.codegen.cs | ||
|
||
# Since there are multiple workflows, uncomment next line to ignore bower_components | ||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) | ||
#bower_components/ | ||
|
||
# RIA/Silverlight projects | ||
Generated_Code/ | ||
|
||
# Backup & report files from converting an old project file | ||
# to a newer Visual Studio version. Backup files are not needed, | ||
# because we have git ;-) | ||
_UpgradeReport_Files/ | ||
Backup*/ | ||
UpgradeLog*.XML | ||
UpgradeLog*.htm | ||
|
||
# SQL Server files | ||
*.mdf | ||
*.ldf | ||
|
||
# Business Intelligence projects | ||
*.rdl.data | ||
*.bim.layout | ||
*.bim_*.settings | ||
|
||
# Microsoft Fakes | ||
FakesAssemblies/ | ||
|
||
# GhostDoc plugin setting file | ||
*.GhostDoc.xml | ||
|
||
# Node.js Tools for Visual Studio | ||
.ntvs_analysis.dat | ||
|
||
# Visual Studio 6 build log | ||
*.plg | ||
|
||
# Visual Studio 6 workspace options file | ||
*.opt | ||
|
||
# Visual Studio LightSwitch build output | ||
**/*.HTMLClient/GeneratedArtifacts | ||
**/*.DesktopClient/GeneratedArtifacts | ||
**/*.DesktopClient/ModelManifest.xml | ||
**/*.Server/GeneratedArtifacts | ||
**/*.Server/ModelManifest.xml | ||
_Pvt_Extensions | ||
|
||
# Paket dependency manager | ||
.paket/paket.exe | ||
paket-files/ | ||
|
||
# FAKE - F# Make | ||
.fake/ | ||
|
||
# JetBrains Rider | ||
.idea/ | ||
*.sln.iml | ||
|
||
# CodeRush | ||
.cr/ | ||
|
||
# Python Tools for Visual Studio (PTVS) | ||
__pycache__/ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" /> | ||
<PackageReference Include="Fastenshtein" Version="1.0.10" /> | ||
<PackageReference Include="Google.Cloud.BigQuery.V2" Version="3.10.0" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
using BenchmarkDotNet.Attributes; | ||
using BenchmarkDotNet.Running; | ||
using Fastenshtein; | ||
using Google.Cloud.BigQuery.V2; | ||
|
||
namespace benchmark.levenshtein; | ||
|
||
public class LowestDistanceList<T>(int maxSize, IComparer<T> comparer) { | ||
private readonly SortedSet<T> _sortedSet = new(comparer); | ||
private readonly int _maxSize = maxSize; | ||
private readonly IComparer<T> _comparer = comparer; | ||
|
||
public void Add(T item) { | ||
if (_sortedSet.Count < _maxSize) { | ||
_sortedSet.Add(item); | ||
} else if (_comparer.Compare(item, _sortedSet.Max) < 0) { | ||
var max = _sortedSet.Max; | ||
|
||
if (max is not null) { | ||
_sortedSet.Remove(max); | ||
} | ||
_sortedSet.Add(item); | ||
} | ||
} | ||
|
||
public IEnumerable<T> Items => _sortedSet; | ||
} | ||
|
||
public class DistanceComparer : IComparer<Map> { | ||
public int Compare(Map? x, Map? y) => (x?.Difference ?? int.MaxValue).CompareTo(y?.Difference ?? int.MaxValue); | ||
} | ||
public record Map(int Difference, string Zone); | ||
|
||
public class LevenshteinOverPlaceNames { | ||
private List<string> _zones { get; set; } = []; | ||
private readonly LowestDistanceList<Map> _priorityQueue = new(4, new DistanceComparer()); | ||
private readonly Levenshtein _lev = new("TAYOLRSVILLE"); | ||
|
||
[GlobalSetup] | ||
public void Setup() { | ||
// get bigquery data | ||
var _client = BigQueryClient.Create("ut-dts-agrc-web-api-dev"); | ||
var table = _client.GetTable("address_grid_mapping_cache", "address_system_mapping"); | ||
|
||
var results = _client.ExecuteQuery( | ||
$"SELECT Zone FROM {table} WHERE Type=@type ORDER BY Zone", [new("type", BigQueryDbType.String, "place")]); | ||
|
||
foreach (var row in results) { | ||
var zone = row["Zone"]?.ToString(); | ||
|
||
if (!string.IsNullOrEmpty(zone)) { | ||
_zones.Add(zone); | ||
} | ||
} | ||
|
||
Console.WriteLine($"Loaded {_zones.Count} items"); | ||
} | ||
|
||
[Benchmark] | ||
public Map[] CalculateAllDistances() { | ||
foreach (var zone in _zones) { | ||
_priorityQueue.Add(new(_lev.DistanceFrom(zone), zone)); | ||
} | ||
|
||
return _priorityQueue.Items.ToArray(); | ||
} | ||
} | ||
|
||
public class Program { | ||
public static void Main(string[] args) { | ||
var _ = BenchmarkRunner.Run<LevenshteinOverPlaceNames>(); | ||
} | ||
} |