首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何将文档名称与两个数据集进行比较和匹配

如何将文档名称与两个数据集进行比较和匹配
EN

Stack Overflow用户
提问于 2015-03-20 03:49:21
回答 1查看 235关注 0票数 1

我试图做一个批上传PDF文件。他们的名字结构如下:

代码语言:javascript
复制
mm/yyyy AAA BBBB.pdf
  • AAA是三个字母的PipeNameAbreviation
  • BBBB是3个或3个以上的字母/数字LocationAbbreviation

在柱塞中,您将看到用户将在视图中选择“公司”和“日期”。然后,他们将选择PDF。我需要做的是将文件的PipeNameAbr与数据库中的PipeNameAbr进行比较,然后将正确的PipeId & PipeName分配给格式数据。这将是一致的字母数,3。我还需要对Location做同样的事情,但对于格式数据需要LocationId & LocationAbr。区别在于字母/数字的长度。所以我需要与最后4个Abbreviation进行比较和匹配。

柱塞

代码语言:javascript
复制
 var regex = /^\d+\D\d+\s*?(\S*)\s*(\S*)\..*$/i;
$scope.upload = function () {
    var files = $scope.files;
    if (files && files.length) {
        for (var i = 0; i < files.length; i++) {
            var file = files[i];

            if (regex.test(file.name)) { 
                var matches = file.name.match(regex);

                    $upload.upload({
                    url: '/api/apiBatchPipeLine',
                    fields: {

                        'typeId': 1,
                        'companyId': $scope.companyId.CompanyId,
                        'documentDate': $scope.model.documentDate,
                        'pipeId': $scope.PipeId,
                        'pipeName': $scope.PipeName,
                        'locationId': $scope.LocationId,
                        'locationAb': $scope.LocationAb
                    },
                    file: file
                }).progress(function (evt) {
                    var progressPercentage = parseInt(100.0 * evt.loaded / evt.total);
                    console.log('progress: ' + progressPercentage + '% ' + evt.config.file.name);
                }).success(function (data, status, headers, config) {
                    console.log('file ' + config.file.name + 'uploaded. Response: ' +
                                JSON.stringify(data));
                }).error(function (err, result) {
                    console.log(err, result);
                });

            }

        }
    }
};

JSON

代码语言:javascript
复制
$scope.pipes = [{"$id":"1","PipeId":1,"PipeAb":"ANR","PipeName":"Transcanada ANR Pipeline","Documents":null},{"$id":"2","PipeId":2,"PipeAb":"CGT","PipeName":"Columbia Gulf Transmission","Documents":null},{"$id":"3","PipeId":3,"PipeAb":"CHA","PipeName":"Enterprise Channel Pipeline","Documents":null},{"$id":"4","PipeId":4,"PipeAb":"FGT","PipeName":"Energy Transfer Florida Gas Transmission","Documents":null},{"$id":"5","PipeId":5,"PipeAb":"GAB","PipeName":"Enbridge Garden Banks Pipeline","Documents":null},{"$id":"6","PipeId":6,"PipeAb":"GLO ","PipeName":"American Midstream Gloria  Pipeline","Documents":null},{"$id":"7","PipeId":7,"PipeAb":"GSO","PipeName":"Boardwalk Gulf South Pipeline","Documents":null},{"$id":"8","PipeId":8,"PipeAb":"HIO","PipeName":"Enterprise High Island Offshore System","Documents":null},{"$id":"9","PipeId":9,"PipeAb":"HIP","PipeName":"American Midstream High Point Pipeline","Documents":null},{"$id":"10","PipeId":10,"PipeAb":"HPL","PipeName":"Energy Transfer Houston Pipeline","Documents":null},{"$id":"11","PipeId":11,"PipeAb":"TEJ","PipeName":"Kinder Morgan Tejas Pipeline","Documents":null},{"$id":"12","PipeId":12,"PipeAb":"KIN","PipeName":"Kinetica Partners Pipeline","Documents":null},{"$id":"13","PipeId":13,"PipeAb":"NGP","PipeName":"Kinder Morgan Natural Gas Pipeline","Documents":null},{"$id":"14","PipeId":14,"PipeAb":"SER","PipeName":"Energy Transfer Sea Robin Pipeline Company ","Documents":null},{"$id":"15","PipeId":15,"PipeAb":"SNT","PipeName":"Kinder Morgan Southern Natural Gas Company","Documents":null},{"$id":"16","PipeId":16,"PipeAb":"SRY","PipeName":"MCPO Stingray Pipeline Company","Documents":null},{"$id":"17","PipeId":17,"PipeAb":"TGT","PipeName":"Kinder Morgan Tennessee Gas Pipeline","Documents":null},{"$id":"18","PipeId":18,"PipeAb":"TET","PipeName":"Spectra Energy Texas Eastern Pipeline","Documents":null},{"$id":"19","PipeId":19,"PipeAb":"TXG","PipeName":"Boardwalk Texas Gas Transmission","Documents":null},{"$id":"20","PipeId":20,"PipeAb":"TSC","PipeName":"Williams Transcontinental Gas Pipeline","Documents":null},{"$id":"21","PipeId":21,"PipeAb":"TRK","PipeName":"Energy Transfer Trunkline Gas Company","Documents":null},{"$id":"22","PipeId":22,"PipeAb":"VGS","PipeName":"Targa Venice Gathering System","Documents":null}];

JSON

代码语言:javascript
复制
 $scope.locations = [{"$id":"1","LocationId":1,"LocationAb":"BS32","LocationName":"BravoSam 32","Documents":null},{"$id":"2","LocationId":2,"LocationAb":"MP46","LocationName":"MikePier 46","Documents":null},{"$id":"3","LocationId":3,"LocationAb":"MP140","LocationName":"MikePier 140","Documents":null},{"$id":"4","LocationId":4,"LocationAb":"VR16","LocationName":"VectorRoland 16","Documents":null},{"$id":"5","LocationId":5,"LocationAb":"AP96","LocationName":"AlphaPass 96","Documents":null},{"$id":"6","LocationId":6,"LocationAb":"DZ26","LocationName":"DeltaZulu 26","Documents":null},{"$id":"7","LocationId":7,"LocationAb":"SV963","LocationName":"SandmanViper 963","Documents":null}];

我是附加一个屏幕截图,我目前正在看什么,我现在有什么。

用于测试的PDF名为

02-2015 HIP 02 32.pdf 02-2015 HIP MP46.pdf 02-2015 HIP MP140.pdf 02-2015 TSC VR16.pdf

我有一个正则表达式,它从pdf名称中删除日期,并分离管道和位置。我相信我需要遍历文件数组,但是什么样的循环呢?我会用开关语句来比较吗?我用数据库中的json创建了一个柱塞。

这里更新的是工作角度版本

代码语言:javascript
复制
$scope.upload = function () {
    var files = $scope.files;
    if (files && files.length) {
        for (var i = 0; i < files.length; i++) {
            var file = files[i];

            $scope.pipeLookup = {};
            $scope.locationLookup = {};
            
            $scope.pipes.map(function (pip) {
                $scope.pipeLookup[pip['PipeAb']] = pip;
            });
            $scope.locations.map(function (loc) {
                $scope.locationLookup[loc['LocationAb']] = loc;
            });

            var matches = file.name.match(/^\d+\D\d+\s*?(\S*)\s*(\S*?)\./i);

                $scope.pipe = $scope.pipeLookup[matches[1]];
                $scope.loc = $scope.locationLookup[matches[2]];

                $upload.upload({
                    url: '/api/apiBatchPipeLine',
                    fields: {

                        'typeId': 1,
                        'companyId': $scope.companyId.CompanyId,
                        'documentDate': $scope.model.documentDate,
                        'pipeId': $scope.pipe.PipeId,
                        'pipeName': $scope.pipe['PipeName'],
                        'companyName': $scope.CompanyName,
                        'locationId': $scope.loc['LocationId'],
                        'locationAb': $scope.loc['LocationAb']
                    },
                    file: file
                }).progress(function (evt) {
                    var progressPercentage = parseInt(100.0 * evt.loaded / evt.total);
                    console.log('progress: ' + progressPercentage + '% ' + evt.config.file.name);
                }).success(function (data, status, headers, config) {
                    notificationFactory.success();
                }).error(function (err, result) {
                    notificationFactory.error();
                    console.log(err, result);
                });
            
        }
    }
};
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2015-03-23 16:21:22

如果你必须与之匹配的缩略语永远是独一无二的,那么你需要做相当多的查找。您可以选择迭代JSON一次,并以一种以缩写作为关键的方式存储管道/位置:

代码语言:javascript
复制
var pipeLookup = {};
var locationLookup = {};

$scope.pipes.map(function(pip) {
    pipeLookup[pip['PipeAb']] = pip;
});
$scope.locations.map(function(loc) {
    locationLookup[loc['LocationAb']] = loc;
});

所以这里我要说的是,对于$scope.pipes的每一个条目,我都希望在pipeLookup中有一个条目,其中包含as key管道的PipeAbvalue的整个管道对象。在此之后,这些Lookup变量本质上是hashmap,因此您可以很容易地使用regex匹配捕获组找到正确的管道,如下所示:

代码语言:javascript
复制
var matches = file.match(/^\d+\D\d+\s*?(\S*)\s*(\S*?)\./i);
var pipe = pipeLookup[matches[1]];
var loc = locationLookup[matches[2]];

然后,您可以使用pipe.PipeIdpipe.PipeNameloc.LocationId和任何其他可以分配给fields属性的属性。(您是否要制作这些pipeLookuplocationLookup vars属性取决于您。我对AngularJS不太熟悉)

为下面的代码片段中的每个文件名找到与正确的Pipe/Location属性匹配的简化版本。(为了简单起见,它没有AngularJS )

代码语言:javascript
复制
var pipes = [{"$id":"1","PipeId":1,"PipeAb":"ANR","PipeName":"Transcanada ANR Pipeline","Documents":null},{"$id":"2","PipeId":2,"PipeAb":"CGT","PipeName":"Columbia Gulf Transmission","Documents":null},{"$id":"3","PipeId":3,"PipeAb":"CHA","PipeName":"Enterprise Channel Pipeline","Documents":null},{"$id":"4","PipeId":4,"PipeAb":"FGT","PipeName":"Energy Transfer Florida Gas Transmission","Documents":null},{"$id":"5","PipeId":5,"PipeAb":"GAB","PipeName":"Enbridge Garden Banks Pipeline","Documents":null},{"$id":"6","PipeId":6,"PipeAb":"GLO ","PipeName":"American Midstream Gloria  Pipeline","Documents":null},{"$id":"7","PipeId":7,"PipeAb":"GSO","PipeName":"Boardwalk Gulf South Pipeline","Documents":null},{"$id":"8","PipeId":8,"PipeAb":"HIO","PipeName":"Enterprise High Island Offshore System","Documents":null},{"$id":"9","PipeId":9,"PipeAb":"HIP","PipeName":"American Midstream High Point Pipeline","Documents":null},{"$id":"10","PipeId":10,"PipeAb":"HPL","PipeName":"Energy Transfer Houston Pipeline","Documents":null},{"$id":"11","PipeId":11,"PipeAb":"TEJ","PipeName":"Kinder Morgan Tejas Pipeline","Documents":null},{"$id":"12","PipeId":12,"PipeAb":"KIN","PipeName":"Kinetica Partners Pipeline","Documents":null},{"$id":"13","PipeId":13,"PipeAb":"NGP","PipeName":"Kinder Morgan Natural Gas Pipeline","Documents":null},{"$id":"14","PipeId":14,"PipeAb":"SER","PipeName":"Energy Transfer Sea Robin Pipeline Company ","Documents":null},{"$id":"15","PipeId":15,"PipeAb":"SNT","PipeName":"Kinder Morgan Southern Natural Gas Company","Documents":null},{"$id":"16","PipeId":16,"PipeAb":"SRY","PipeName":"MCPO Stingray Pipeline Company","Documents":null},{"$id":"17","PipeId":17,"PipeAb":"TGT","PipeName":"Kinder Morgan Tennessee Gas Pipeline","Documents":null},{"$id":"18","PipeId":18,"PipeAb":"TET","PipeName":"Spectra Energy Texas Eastern Pipeline","Documents":null},{"$id":"19","PipeId":19,"PipeAb":"TXG","PipeName":"Boardwalk Texas Gas Transmission","Documents":null},{"$id":"20","PipeId":20,"PipeAb":"TSC","PipeName":"Williams Transcontinental Gas Pipeline","Documents":null},{"$id":"21","PipeId":21,"PipeAb":"TRK","PipeName":"Energy Transfer Trunkline Gas Company","Documents":null},{"$id":"22","PipeId":22,"PipeAb":"VGS","PipeName":"Targa Venice Gathering System","Documents":null}];
var locations = [{"$id":"1","LocationId":1,"LocationAb":"BS32","LocationName":"BravoSam 32","Documents":null},{"$id":"2","LocationId":2,"LocationAb":"MP46","LocationName":"MikePier 46","Documents":null},{"$id":"3","LocationId":3,"LocationAb":"MP140","LocationName":"MikePier 140","Documents":null},{"$id":"4","LocationId":4,"LocationAb":"VR16","LocationName":"VectorRoland 16","Documents":null},{"$id":"5","LocationId":5,"LocationAb":"AP96","LocationName":"AlphaPass 96","Documents":null},{"$id":"6","LocationId":6,"LocationAb":"DZ26","LocationName":"DeltaZulu 26","Documents":null},{"$id":"7","LocationId":7,"LocationAb":"SV963","LocationName":"SandmanViper 963","Documents":null}];

var files = ['02-2015 HIP BS32.pdf', '02-2015 HIP MP46.pdf', '02-2015 HIP MP140.pdf', '02-2015 TSC VR16.pdf'];

var pipeLookup = {};
var locationLookup = {};

pipes.map(function(pip) {
  pipeLookup[pip['PipeAb']] = pip;
});
locations.map(function(loc) {
  locationLookup[loc['LocationAb']] = loc;
});

document.getElementById('out').value = files.map(function(file) {
  var matches = file.match(/^\d+\D\d+\s*?(\S*)\s*(\S*?)\./i);
  var pipe = pipeLookup[matches[1]];
  var loc = locationLookup[matches[2]];
  return file + '\tPipeId: ' + pipe['PipeId'] +
            '\tLocationId: ' + loc['LocationId'] + 
            '\tPipeName: ' + pipe['PipeName'];
}).join('\n');
代码语言:javascript
复制
<textarea id="out" rows="10" style="width:100%"></textarea>

另一种方法是迭代每个文件的$scope.pipes$scope.locations。这意味着代码可读性降低,性能下降。

票数 3
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/29159095

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档